import _ from 'lodash';
import stringSimilarity from 'string-similarity';
import { RGX_CURR_EURO_STYLE,
  RGX_CURR_US_STYLE,
  RGX_DATES,
  RGX_DUE_DATE_DAYS,
  RGX_FV_PREFIX,
  RGX_HTTPS_LINK,
  RGX_LINE_BRAKES,
  RGX_SPACES_MORE } from './regexExtraction';

export class DocParser {
  text: string;

  words: string[] = [];

  relevantWordsRatings: {
    relevantWord: string;
    wordProbability: number;
  }[] = [];

  constructor(text: string) {
    this.text = text
      .replace(RGX_LINE_BRAKES, ' ')
      .replace(RGX_HTTPS_LINK, '')
      .replace(/[:]/g, ' ')
      .replace(RGX_SPACES_MORE, '')
      .trim();
  }

  splitToWords() {
    this.words = this.text
      .replace(/((\d{1,3}[ ](\d{3}[ ])*\d{3}|\d+)([,]\d{1,2}))/g, (x) =>
        x.replace(' ', ''))
      .split(' ');
    return this.words;
  }

  // value in regex (date)
  extractRelevantWord(
    relevantWordPredicate: (word: string) => boolean,
    keyphrases: string[],
    backCheckLevel: 0 | 1 = 0
  ) {
    // 1. split to words

    this.splitToWords();

    /*
          2. find relevant words indexes

          Use some regex to find anchor for data description
          that was stated before relevant word - dates and numbers are good for that

       */
    const relevantWordsIndxs = this.words.reduce(
      (prev: number[], word, index) =>
        (relevantWordPredicate(word) ? [...prev, index] : prev),
      []
    );

    /*
        3. check every relevant words against data description

          > Keyphrases order matters

          > Check for more words than keyphrase length using "backPhraseCheck"

      */

    if (_.isEmpty(relevantWordsIndxs)) return null;

    const relevantWordsRatings = relevantWordsIndxs.map((relevantIndex) => {
      // get relevant word value
      const relevantWord = this.words[relevantIndex];

      /*
          get probability rating for data description of relevant word

          "faktura vat" (keyphrase) vs "faktra vat nr"
        */

      const wordProbability = keyphrases.reduce((similarityRating: number, keyphrase) => {
        /*
              similarityRating === 1 is max value, no need for more checks
            */
        if (similarityRating === 1) return similarityRating;

        const numWordsKeyphrase = keyphrase.split(' ').length;

        /*
              create phrasesDepending on level
            */

        const phrasesToCheckZero = createPhrasesForChecking({
          backCheckLevel,
          index: relevantIndex,
          numWordsKeyphrase,
          words: this.words,
        });

        //   console.log({ pharsesToCheckZero: phrasesToCheckZero });

        if (_.isEmpty(phrasesToCheckZero)) return similarityRating;

        const ratings = stringSimilarity.findBestMatch(keyphrase, phrasesToCheckZero);

        const highestRating = ratings.bestMatch.rating;

        return highestRating > similarityRating ? highestRating : similarityRating;
      }, 0);
      return {
        relevantWord,
        wordProbability,
      };
    });
    // console.log({ relevantWordsRatings });

    /*
         4. Use only words that probability is over 0.8 (minor spelling mistakes)
      */

    this.relevantWordsRatings = relevantWordsRatings
      .sort((a, b) =>
        b.wordProbability - a.wordProbability)
      .filter(({ wordProbability }) =>
        wordProbability >= 0.8);

    return _.isEmpty(this.relevantWordsRatings) ?
      null :
      this.relevantWordsRatings[0].relevantWord;
  }
}

const createPhrasesForChecking = ({
  index,
  numWordsKeyphrase,
  words,
  backCheckLevel,
}: {
  backCheckLevel: 0 | 1;
  numWordsKeyphrase: number;
  words: string[];
  index: number;
}) => {
  const phrasesToCheck = [];
  if (index >= numWordsKeyphrase && index !== 0)
    phrasesToCheck.push(
      words
        .slice(index - numWordsKeyphrase, index)
        .join(' ')
        .toLowerCase()
    );

  const idx = index + backCheckLevel;
  if (idx >= numWordsKeyphrase && idx !== 0)
    phrasesToCheck.push(
      words
        .slice(idx - numWordsKeyphrase, idx)
        .join(' ')
        .toLowerCase()
    );

  return phrasesToCheck;
};


/* ------------------------ relevant word predicates ------------------------ */

export const findDate = (word: string) =>
  RGX_DATES.test(word);

export const findCurrency = (word: string) =>
  RGX_CURR_EURO_STYLE.test(word) || RGX_CURR_US_STYLE.test(word);

export const findDueDateDays = (word:string) =>
  RGX_DUE_DATE_DAYS.test(word);

export const findAccDocWord = (word: string) => {
  const hasNumbers = /^\d+(\/\d+)*$/gi.test(word);
  const hasPrefix = RGX_FV_PREFIX.test(word);
  const hasNumbersAndLetters = /\S*(\S*([a-zA-Z]\S*[0-9])|([0-9]\S*[a-zA-Z]))\S*/gi.test(word);
  return hasNumbers || hasPrefix || hasNumbersAndLetters;
};
