// normalizeText.ts

/**
 * Normalizes the given text by converting it to lowercase and removing punctuation and excess spaces.
 * @param text The text to be normalized.
 * @returns Normalized text.
 */
import { diffWords } from 'diff';
import chalk from 'chalk';
import { forEach } from 'lodash';
export const normalizeText = (text: string): string => {
  if (text === undefined) {
    return '';
  }

  return (
    text
      .toLowerCase()
      // Replace all periods
      .replace(/\./g, '')
      // Replace other punctuation
      .replace(/[,\#!$%\^&\*;:{}=_`~“”"]/g, '')
      // Replace multiple spaces with a single space
      .replace(/\s{2,}/g, ' ')
      // Remove whitespace from the start and end
      .trim()
  );
};

export function countWords(str: String) {
  // Remove whitespace from both ends of the string and split the string by spaces
  const words = str.trim().split(/\s+/);

  // Filter out any empty strings that might result from consecutive spaces
  const filteredWords = words.filter((word) => word.length > 0);

  return filteredWords.length;
}

export function largeOrSmallRule(str: String | undefined) {
  if (str === undefined) return null;
  else {
    if (countWords(str) < 14) {
      return 'small';
    } else {
      return 'large';
    }
  }
}

export function countWhiteSpaces(str: string): number {
  return (str.match(/\s/g) || []).length;
}

function shuffleArray(array: any[]) {
  for (let i = array.length - 1; i > 0; i--) {
    const j = Math.floor(Math.random() * (i + 1));
    [array[i], array[j]] = [array[j], array[i]];
  }
}
export function processPlaceholderText(
  ruleText: string,
  percentage: string
): { placeholderText: string; wordIndexRemoved: number[] } {
  let placeholderText = ruleText;
  let wordIndexRemoved = [];
  const conjunctions = [
    'for',
    'and',
    'nor',
    'but',
    'or',
    'yet',
    'so',
    'a',
    'an',
    'the',
    'if',
    'in',
    'of',
    'on',
    'to',
    'with',
    'as',
    'at',
    'by',
    'from',
    'into',
    'like',
    'near',
    'not',
    'off',
    'does',
    'be',
    '"'
  ];
  if (percentage === '101') {
    placeholderText = ruleText
      .split(' ')
      .map((word) => '▁'.repeat(word.length))
      .join(' ');
    // create a list of numbers for each word in the rule text
    for (let i = 0; i < ruleText.length; i++) {
      wordIndexRemoved.push(i);
    }
    return { placeholderText, wordIndexRemoved };
  }
  const words = placeholderText.split(/\s+|“|”/);
  let nonConjunctions = words.filter(
    (word) => !conjunctions.includes(word.toLowerCase())
  );
  const percentageToReplace = Number(percentage) / 100;
  const numToReplace = Math.round(nonConjunctions.length * percentageToReplace);
  shuffleArray(nonConjunctions);

  for (let i = 0; i < numToReplace; i++) {
    const index = words.indexOf(nonConjunctions[i]);
    if (index !== -1) {
      words[index] = '▁'.repeat(nonConjunctions[i].length);
      wordIndexRemoved.push(index);
    }
  }
  placeholderText = words.join(' ');
  return { placeholderText, wordIndexRemoved };
}

export function checkMatchingWords(
  word1: string,
  word2: string
): boolean | null {
  if (word1 === undefined || word2 === undefined) {
    return null;
  }

  const normalizedWord1 = normalizeText(word1);
  const normalizedWord2 = normalizeText(word2);
  return normalizedWord1 === normalizedWord2;
}

// use the diff library to compare two strings and return the differences

export function diffStrings(str1: string, str2: string) {
  // find differences between the two strings ignore case and punctuation
  const differences = diffWords(str1, str2);

  return differences;
}
