/*
 * Decompiled with CFR 0.152.
 */
package eus.ixa.ixa.pipe.pos.dict;

import com.google.common.base.Joiner;
import eus.ixa.ixa.pipe.pos.StringUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.util.Span;

public class MultiWordMatcher {
    private static final Pattern tabPattern = Pattern.compile("\t");
    private static final Pattern linePattern = Pattern.compile("#");
    private static Map<String, String> dictionary;

    public MultiWordMatcher(Properties props) throws IOException {
        if (dictionary == null) {
            this.loadDictionary(props);
        }
    }

    private void loadDictionary(Properties props) throws IOException {
        String line;
        dictionary = new HashMap<String, String>();
        String lang = props.getProperty("language");
        InputStream dictInputStream = this.getMultiWordDict(lang);
        if (dictInputStream == null) {
            System.err.println("ERROR: Not multiword dictionary for language " + lang + " in src/main/resources!!");
            System.exit(1);
        }
        BufferedReader breader = new BufferedReader(new InputStreamReader(dictInputStream, Charset.forName("UTF-8")));
        while ((line = breader.readLine()) != null) {
            String[] lineArray = tabPattern.split(line);
            if (lineArray.length == 4) {
                Matcher lineMatcher = linePattern.matcher(lineArray[0].toLowerCase());
                dictionary.put(lineMatcher.replaceAll(" "), lineArray[2]);
                continue;
            }
            System.err.println("WARNING: line starting with " + lineArray[0] + " is not well-formed; skipping!!");
        }
    }

    private final InputStream getMultiWordDict(String lang) {
        InputStream dict = null;
        if (lang.equalsIgnoreCase("en")) {
            dict = this.getClass().getResourceAsStream("/lemmatizer-dicts/freeling/en-locutions-extended.txt");
        }
        if (lang.equalsIgnoreCase("es")) {
            dict = this.getClass().getResourceAsStream("/lemmatizer-dicts/freeling/es-locutions.txt");
        }
        if (lang.equalsIgnoreCase("gl")) {
            dict = this.getClass().getResourceAsStream("/lemmatizer-dicts/ctag/gl-locutions.txt");
        }
        return dict;
    }

    public final String[] getTokensWithMultiWords(String[] tokens) {
        Span[] multiWordSpans = this.multiWordsToSpans(tokens);
        ArrayList<String> tokenList = new ArrayList<String>(Arrays.asList(tokens));
        int counter = 0;
        for (Span mwSpan : multiWordSpans) {
            int fromIndex = mwSpan.getStart() - counter;
            int toIndex = mwSpan.getEnd() - counter;
            counter = counter + tokenList.subList(fromIndex, toIndex).size() - 1;
            String multiWord = Joiner.on("#").join(tokenList.subList(fromIndex, toIndex));
            tokenList.subList(fromIndex, toIndex).clear();
            tokenList.add(fromIndex, multiWord);
        }
        return tokenList.toArray(new String[tokenList.size()]);
    }

    public final Span[] multiWordsToSpans(String[] tokens) {
        LinkedList<Span> multiWordsFound = new LinkedList<Span>();
        for (int offsetFrom = 0; offsetFrom < tokens.length; ++offsetFrom) {
            int lengthSearching;
            Span multiwordFound = null;
            String[] tokensSearching = new String[]{};
            for (int offsetTo = offsetFrom; offsetTo < tokens.length && (lengthSearching = offsetTo - offsetFrom + 1) <= this.getMaxTokenCount(); ++offsetTo) {
                tokensSearching = new String[lengthSearching];
                System.arraycopy(tokens, offsetFrom, tokensSearching, 0, lengthSearching);
                String entryForSearch = StringUtils.getStringFromTokens(tokensSearching);
                String entryValue = dictionary.get(entryForSearch.toLowerCase());
                if (entryValue == null) continue;
                multiwordFound = new Span(offsetFrom, offsetTo + 1, entryValue);
            }
            if (multiwordFound == null) continue;
            multiWordsFound.add(multiwordFound);
            offsetFrom += multiwordFound.length() - 1;
        }
        return multiWordsFound.toArray(new Span[multiWordsFound.size()]);
    }

    public int getMaxTokenCount() {
        return dictionary.size();
    }
}

