interface TextFragment {
  text: string;
  score: number;
}

/**
 * Finds the fragment with the highest number of matching words/phrases from the query
 * @param query Search query
 * @param resultText Large text to search through
 * @param windowSize Number of words in each fragment window (optional)
 * @param step Number of words to slide the window (optional)
 * @returns Best matching text fragment
 */
export function findBestMatchingFragment(
  query: string,
  resultText: string,
  windowSize: number = 100,
  step: number = 50
): string {
  // Normalization and preprocessing
  const queryTerms = extractQueryTerms(query);
  const resultWords = normalizeText(resultText);

  // Split result into sliding windows of words
  const fragments = createTextWindows(resultWords, windowSize, step);

  // Score each fragment
  let bestFragment: TextFragment = { text: '', score: -1 };

  fragments.forEach((fragment) => {
    const score = calculateFragmentScore(fragment, queryTerms);
    if (score > bestFragment.score) {
      bestFragment = { text: fragment.join(' '), score };
    }
  });

  return bestFragment.text;
}

// Helper functions

/**
 * Extracts all possible words and phrases from query
 */
export function extractQueryTerms(query: string): string[][] {
  const words = normalizeText(query);
  const terms: string[][] = [];

  // Generate all consecutive n-grams (1 to full query length)
  for (let n = 1; n <= words.length; n++) {
    for (let i = 0; i <= words.length - n; i++) {
      terms.push(words.slice(i, i + n));
    }
  }

  // Remove duplicate phrases
  return Array.from(new Set(terms.map((t) => t.join(' ')))).map((t) =>
    t.split(' ')
  );
}

/**
 * Normalizes text by lowercasing and splitting into words
 */
export function normalizeText(text: string): string[] {
  const matches =
    text.toLowerCase().match(/(\p{L}[\p{L}'\-]*)|([^\p{L}\s]+)/gu) || [];
  return matches.filter((token) => token.trim().length > 0);
}

/**
 * Creates sliding windows of words from the text
 */
export function createTextWindows(
  words: string[],
  windowSize: number,
  step: number
): string[][] {
  const windows: string[][] = [];

  for (let i = 0; i < words.length; i += step) {
    const end = i + windowSize;
    const window = words.slice(i, end);
    if (window.length === 0) break;
    windows.push(window);
    if (end >= words.length) break;
  }

  return windows;
}

/**
 * Scores a fragment based on number of query term matches
 */
export function calculateFragmentScore(
  fragment: string[],
  queryTerms: string[][]
): number {
  let score = 0;

  queryTerms.forEach((term) => {
    const termLength = term.length;

    // Check for multi-word phrases
    if (termLength > 1) {
      for (let i = 0; i <= fragment.length - termLength; i++) {
        const match = term.every((word, j) => word === fragment[i + j]);
        if (match) score += termLength; // Weight phrases higher
      }
    }
    // Check for single words
    else {
      const word = term[0];
      score += fragment.filter((w) => w === word).length;
    }
  });

  return score;
}
