import { getFirstWordInText } from "./getFirstWordInText";
import { getNextRangeBoundary } from "./getNextRangeBoundary";
import { getTextFromRange } from "./getTextFromRange";
import { isValidWord } from "./isValidWord";

const WHITESPACE_REGEX = /\s/;

/**
 * specifies how many words we want to include into the raw text range prior to
 * performing real text extraction (in order to avoid unnecessary lookups
 * because the range consists of irrelevant nodes).
 */
const RAW_TEXT_RANGE_WORD_COUNT = 25;

/**
 * Utility to extract text from only the first word inside the range.
 *
 * This method exists as a way to improve performance of the karaoke highlight
 * method, as it can become very expensive to perform a "getTextFromRange" on
 * large chunks of content.
 *
 *
 * The method works by narrowing down the size of the range by performing
 * guesses of how much content is needed to correctly find the first word using
 * the simple browser-builtin range.toString method, and then extracting text
 * from only that part of the range.
 *
 * If we fail to actually find the first word, then we call back to extract
 * the content from the entire range.
 */
export function getTextFromFirstWordInRange(range: Range): string {
  // create a clone of the range, which we can expand from the beginning and
  // forwards until we assume it contains the first word
  const rawRange = range.cloneRange();
  rawRange.collapse(true);

  while (
    rawRange
      .toString()
      .replace(/\s+/g, " ")
      .split(" ")
      .filter((word) => isValidWord(word)).length < RAW_TEXT_RANGE_WORD_COUNT
  ) {
    const nextBoundary = getNextRangeBoundary(
      rawRange.endContainer,
      rawRange.endOffset
    );

    // bail if we couldn't find a next logical place for the next range
    // boundary
    if (nextBoundary === undefined) {
      break;
    }

    rawRange.setEnd(nextBoundary.container, nextBoundary.offset);

    // if we've reached the end of the original range, then prevent searching
    // any further ahead
    if (
      nextBoundary.container === range.endContainer &&
      nextBoundary.offset === range.endOffset
    ) {
      break;
    }
  }

  const rawRangeText = getTextFromRange(rawRange);

  if (WHITESPACE_REGEX.test(rawRangeText.trim())) {
    return getFirstWordInText(rawRangeText);
  } else {
    return getFirstWordInText(getTextFromRange(range));
  }
}
