(function() {
    "use strict";

    const DEBUG = false; // set to true for additional logging to console

    const TF_NORMALIZATION = 0.5;
    const EDIT_DISTANCE_DECAY = 2;

    const { MultiMap, SymSpell } = mnemonist;
    const { PorterStemmer, WordTokenizer, stopwords } = natural;
    const tokenizer = new WordTokenizer();
    const stopwordSet = new Set(stopwords);

    /**
     * Transform an object of array to an object with arrays of values for the keys of objects in the original Array.
     * All objects should have the same keys
     * @param arr
     * @return {{}}
     */
    function _transpose(arr) {
        const keys = _.union(_.flatMap(arr, _.keys));
        const values = _.unzip(_.map(arr, _.values));
        return _.zipObject(keys, values);
    }

    /**
     * Class for computing Tf-Idf values for documents
     */
    class _TfIdf {
        constructor() {
            this.index = new MultiMap();
        }

        /**
         * Insert documents into the tf-idf index.
         * Term frequency for the document is computed at this time.
         * @param documentTerms
         * @param key
         */
        addDocument(documentTerms, key) {
            // get a count by document tokens
            const documentTokenCountsMap = _.countBy(documentTerms, "token");
            const maxCount = _.max(_.values(documentTokenCountsMap));

            // associate counts with token & raw token - then remove duplicate raw tokens
            const countedDocumentTerms = _.uniqBy(
                _.map(documentTerms, term => {
                    const { token } = term;
                    const count = documentTokenCountsMap[token];
                    return { ...term, count };
                }),
                "rawToken"
            );

            const K = TF_NORMALIZATION; // tf normalization constant;
            // the term frequency is normalized to reduce the impact of document length on the search results.
            // without normalization, longer documents will tend to match more results and have higher term frequencies

            // index the term with it's term frequency for the document
            _.forEach(countedDocumentTerms, ({ token, rawToken, count }) => {
                this.index.set(token, {
                    value: { key, rawToken },
                    tf: K + ((1 - K) * count) / maxCount
                });
            });
        }

        /**
         * Get the tf-idf score for all documents which match the term
         * The inverse document frequency is calculated on the fly since the total number of documents in the index may change over time
         * @param term
         * @return {Array|*}
         */
        tfidfs(term) {
            // get the documents which contain this term
            const documents = this.index.get(term);

            // return nothing if no matches
            if (!documents) {
                return [];
            }

            // calculate inverse document frequency (idf)
            // smoothing added to prevent division by zero
            const idf = Math.log(this.index.size / (1 + documents.length));

            // for each match that was found, compute the tf-idf
            return documents.map(({ tf, value }) => {
                return {
                    tfidf: tf * idf,
                    value
                };
            });
        }
    }

    /**
     * Class to find approximate matches for a term in a corpus of terms.
     * The approximate match is based on the Levenshtein distance
     */
    class _FuzzyMatcher {
        constructor(terms, maxDistance = 2) {
            this.maxDistance = maxDistance;
            this.index = SymSpell.from(terms, { maxDistance, verbosity: 2 });
        }

        search(term) {
            return this.index.search(term);
        }
    }

    /**
     * Tokenization and stemming helper
     */
    class _Tokenizer {
        constructor({ removeStopwords, stem, lowerCase }) {
            this.removeStopwords = removeStopwords;
            this.stem = stem;
            this.lowerCase = lowerCase;
        }

        tokenize(document, keyResolver, termResolver) {
            const tokens = SearchFactory.tokenizeAndStem(
                termResolver(document),
                this.removeStopwords,
                this.stem,
                this.lowerCase
            );

            const key = _.isFunction(keyResolver)
                ? keyResolver(document)
                : _.get(document, keyResolver);

            return {
                key,
                terms: tokens.tokens(),
                termsWithRaw: tokens
            };
        }
    }

    /**
     * Created a matcher (really an inverted index) for a field in a given document.
     * The matcher uses tf-idf as a basis for scoring.
     * The matcher will also attempt fuzzy matching of search terms
     */
    class Matcher {
        constructor({
            termResolver,
            removeStopwords = true,
            stem = true,
            lowerCase = true,
            maxEditDistance = 2,
            boost = 1,
            label
        }) {
            this.termResolver = termResolver;
            this.maxEditDistance = maxEditDistance;
            this.boost = boost;
            this.label = label;
            this.removeStopwords = removeStopwords;
            this.stem = stem;
            this.lowerCase = lowerCase;

            this.tokenizer = new _Tokenizer({
                removeStopwords,
                stem,
                lowerCase
            });
        }

        /**
         * Arbitrary scoring function factoring in tf-idf, edit distance, and boost
         * @param editDistance
         * @param tfidf
         * @param isExactMatch
         * @return {number}
         */
        _score(editDistance, tfidf, isExactMatch) {
            // the edit distance factor decreases exponentially as edit distance increases
            // this factor should range between 0 and 1 to not interfere with boosting
            const modifiedEditDistance = editDistance + (isExactMatch ? 0 : 1);
            const editDistanceScore = Math.exp(
                (-EDIT_DISTANCE_DECAY * modifiedEditDistance) /
                    (1 + this.maxEditDistance)
            );
            return editDistanceScore * tfidf * this.boost;
        }

        /**
         * For a given fuzzy term. Find all document matches and score them
         * @param term
         * @param originalTerm
         * @param fuzzyTerm
         * @param editDistance
         * @param count
         * @return {{key: string, fuzzyTerm: *, score: number, editDistance: *, term: *, tfidf: *}[]}
         * @private
         */
        _getScoresForTerm(term, originalTerm, fuzzyTerm, editDistance, count) {
            const tfidfs = this.index.tfidfs(fuzzyTerm);
            return tfidfs.map(({ tfidf, value: { key, rawToken } }) => {
                const isExactMatch = originalTerm === rawToken;
                const score = this._score(editDistance, tfidf, isExactMatch);
                return {
                    term,
                    fuzzyTerm,
                    editDistance,
                    tfidf,
                    score,
                    key
                };
            });
        }

        /**
         * given a user inputted term, find all fuzzy matches and find the best scoring match for the original term
         * @param term
         * @return {Array}
         * @private
         */
        _getMatchesForTerm(term) {
            const { token: stemmedTerm, rawToken: originalTerm } = term;

            // get the fuzzy matches for the term
            const fuzzyMatches = this.fuzzyMatcher.search(stemmedTerm);

            // transform into scores
            const matchResults = _.flatMap(
                fuzzyMatches,
                ({ term: fuzzyTerm, distance: editDistance, count }) => {
                    return this._getScoresForTerm(
                        stemmedTerm,
                        originalTerm,
                        fuzzyTerm,
                        editDistance,
                        count
                    );
                }
            );

            // group scores by their key
            const groupedMatchResults = _.groupBy(matchResults, "key");

            // return the best scoring result for the term for each document
            return _.flatMap(_.values(groupedMatchResults), documentMatches =>
                _.maxBy(documentMatches, m => m.score)
            );
        }

        /**
         * Helper function to build the matcher. The index creation is deferred until a keyResolver is set.
         * Since matchers can be defined outside a search factory the keyResolver should not be in Matcher the constructor.
         * @param documents
         * @param keyResolver
         */
        build(documents, keyResolver) {
            this.keys = _.map(documents, keyResolver);

            // tokenize terms in the given field
            const tokens = documents.map(d => {
                return this.tokenizer.tokenize(
                    d,
                    keyResolver,
                    this.termResolver
                );
            });

            // set up the fuzzy matcher
            if (this.maxEditDistance > 0) {
                this.fuzzyMatcher = new _FuzzyMatcher(
                    _.flatten(tokens.map(t => t.terms)),
                    this.maxEditDistance
                );
            } else {
                // if edit distance is set to 0, mock this feature, count is not currently used
                this.fuzzyMatcher = {
                    search: term => [{ term, distance: 0, count: null }]
                };
            }

            // set up the tf-idf index
            this.index = new _TfIdf();
            tokens.forEach(({ key, termsWithRaw }) => {
                this.index.addDocument(termsWithRaw, key);
            });
        }

        /**
         * Tokenize and stem the search query.
         * Find matches for each term
         * Aggregate scores by document for each term
         * @param queryString
         * @return {{score: number, [id: string]: string, skip?: boolean}}[]}
         */
        match(queryString) {
            if (!this.index) {
                throw new Error("The matcher's index has not been initialized");
            }

            const searchTerms = SearchFactory.tokenizeAndStem(
                queryString,
                this.removeStopwords,
                this.stem,
                this.lowerCase
            );

            // if there are no search terms, just return the documents list in full
            // filters will be applied after matching
            if (_.isEmpty(searchTerms.tokens())) {
                return _.map(this.keys, key => ({
                    key,
                    skip: true,
                    score: 0
                }));
            }

            // get matches for each term
            const queryMatches = _.flatMap(searchTerms, term =>
                this._getMatchesForTerm(term)
            );

            // group by proceeding id
            const groupedMatches = _.groupBy(queryMatches, m => m.key);

            // for the document matches, sum the score
            return _.map(groupedMatches, (documentMatches, key) => {
                const score = _.sumBy(documentMatches, m => m.score);
                const result = { key, score };

                if (DEBUG) {
                    result.metadata = {
                        label: this.label,
                        ..._.omit(_transpose(documentMatches), "key")
                    };
                }

                return result;
            });
        }
    }

    /**
     * Creates a filter to filter search results after matchers generate results.
     * Currently exact match filers are used
     */
    class Filter {
        constructor({ path, termRegexp }) {
            this.path = path;
            this.termRegexp = termRegexp;
        }

        /**
         * Applies the filter to the re-hydrated match results of a given query
         * Note that the full document is expected in the match results - not just the key.
         * @param results
         * @param query
         * @return {*}
         */
        filter(results, query) {
            const terms = SearchFactory.tokenizeAndStem(
                query,
                false,
                false,
                false
            ).tokens();

            // use regex to filter out terms to apply to field
            const termsToApply = _.filter(terms, term => {
                return this.termRegexp.test(term);
            });

            // if there are terms, apply the filter
            if (!_.isEmpty(termsToApply)) {
                return _.filter(results, result => {
                    // get the field to match
                    const fieldToMatch = _.toString(
                        _.at(result, `document.${this.path}`)[0]
                    ); // document is hardCoded in the match results

                    // if any of the terms match return true
                    return _.some(termsToApply, term => {
                        return _.isEqual(fieldToMatch, term);
                    });
                });
            }
            return results;
        }
    }

    class SearchFactory {
        /**
         * Creates a search instance for documents using provided matchers and filters.
         * @param documents documents to index
         * @param matchers {Matcher[]} how to match documents
         * @param filters {Filter[]} any filters to be applied
         * @param keyResolver {String | Function} a lodash compatible resolver for the document key
         * @param [searchRemoveRegexp] {RegExp[]} Any Regex to remove
         */
        constructor({
            documents,
            matchers,
            filters,
            keyResolver,
            searchRemoveRegexp
        }) {
            this.matchers = matchers;
            this.filters = filters;
            this.keyResolver = keyResolver;
            this.searchRemoveRegexp = searchRemoveRegexp;
            this.documentMap = _.keyBy(documents, keyResolver);

            // build the indices
            this.matchers.map(m => m.build(documents, keyResolver));
        }

        /**
         * Custom function for tokenizing and stemming
         * The purpose is the give greater control over choice of tokenizer, stemmer
         * stopword removal, or any other token pre-processing.
         * @param string {String}
         * @param removeStopwords {boolean}
         * @param stem {boolean}
         * @param lowerCase {boolean}
         * @return {*}
         */
        static tokenizeAndStem(string, removeStopwords, stem, lowerCase) {
            let result = [];
            if (!_.isNil(string)) {
                result = _.reduce(
                    tokenizer.tokenize(string), // break into tokens
                    (stemmedTokens, token) => {
                        const preparedToken = _.trim(
                            lowerCase ? _.toLower(token) : token
                        );
                        // optionally skip stopwords
                        if (removeStopwords && stopwordSet.has(preparedToken)) {
                            return stemmedTokens;
                        }
                        // return the stemmed result
                        return stemmedTokens.concat([
                            {
                                rawToken: preparedToken,
                                token: stem
                                    ? PorterStemmer.stem(preparedToken)
                                    : preparedToken
                            }
                        ]);
                    },
                    []
                );
            }
            result.rawTokens = () => _.map(result, r => r.rawToken);
            result.tokens = () => _.map(result, r => r.token);
            return result;
        }

        /**
         * Helper function for removing occurrences of matches for regex expressions in the regexpArray from the string
         * @param string {String}
         * @param regexpArray {RegExp[]}
         * @return {String}
         */
        static removeRegexp(string, regexpArray) {
            return _.reduce(
                regexpArray,
                (result, regexp) => {
                    return _.replace(result, regexp, "");
                },
                string
            );
        }

        /**
         * For a given query string apply the matchers and filters.
         * An optional regexpArray can be passed to remove substrings from the query before the matchers are run.
         * @param query {String}
         * @return {{document: *,  score: number}[]}
         */
        search(query) {
            // search using matchers and merge the results
            const matchResults = this._applyMatchers(
                query,
                this.matchers,
                this.searchRemoveRegexp
            );

            if (DEBUG) {
                console.log("match results");
                console.log(matchResults.slice(0, 20));
            }

            // run the filtering
            const filteredResults = this._applyFilters(
                matchResults,
                this.filters,
                query
            );

            if (DEBUG) {
                console.log("filtered results");
                console.log(filteredResults.slice(0, 20));
            }

            return filteredResults;
        }

        /**
         * aggregates scores for matchers
         * @param matcherOutputs
         * @private
         * @return []
         */
        _mergeResults(matcherOutputs) {
            const flattenedResults = _.flatten(matcherOutputs);

            // check for results which weren't skipped
            // use those results if available
            const validMatches = _.filter(flattenedResults, m => !m.skip);

            // group match results by the key
            const groupedResults = _.groupBy(
                _.isEmpty(validMatches) ? flattenedResults : validMatches,
                "key"
            );

            // aggregate
            return _.map(groupedResults, (matchResults, key) => {
                const result = {
                    document: this.documentMap[key],
                    score: _.sumBy(matchResults, r => r.score)
                };

                if (DEBUG) {
                    result.metadata = _transpose(
                        _.map(matchResults, r => r.metadata)
                    );
                }

                return result;
            }).sort(({ score: a }, { score: b }) => b - a);
        }

        /**
         * Helper function to apply the matchers and merge the results
         * @param query
         * @param matchers matchers to run
         * @param [regexpArray] regexes for terms to remove from the query
         * @private
         * @return {*[]}
         */
        _applyMatchers(query, matchers, regexpArray) {
            const queryWithoutFilterTerms = SearchFactory.removeRegexp(
                query,
                regexpArray
            );
            return this._mergeResults(
                _.map(matchers, m => m.match(queryWithoutFilterTerms))
            );
        }

        /**
         * Helper function to apply filters
         * @param results result documents
         * @param filters
         * @param query
         * @return {*[]}
         * @private
         */
        _applyFilters(results, filters, query) {
            return _.reduce(
                filters,
                (resultsToFilter, nextFilter) => {
                    return nextFilter.filter(resultsToFilter, query);
                },
                results
            );
        }
    }

    angular.module("cpir").provider(
        "SearchUtilsService",
        class {
            $get() {
                return {
                    Matcher,
                    Filter,
                    SearchFactory
                };
            }
        }
    );
})();
