/*
 * Decompiled with CFR 0.152.
 */
package eus.ixa.ixa.pipe.pos;

import eus.ixa.ixa.pipe.lemma.DictionaryLemmatizer;
import eus.ixa.ixa.pipe.lemma.MorfologikLemmatizer;
import eus.ixa.ixa.pipe.lemma.MultiWordMatcher;
import eus.ixa.ixa.pipe.pos.Morpheme;
import eus.ixa.ixa.pipe.pos.MorphoFactory;
import eus.ixa.ixa.pipe.pos.MorphoTagger;
import eus.ixa.ixa.pipe.pos.Resources;
import eus.ixa.ixa.pipe.pos.dict.DictionaryTagger;
import eus.ixa.ixa.pipe.pos.dict.MorfologikMorphoTagger;
import ixa.kaflib.KAFDocument;
import ixa.kaflib.Span;
import ixa.kaflib.Term;
import ixa.kaflib.WF;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

public class Annotate {
    private final MorphoTagger posTagger;
    private final String lang;
    private final MorphoFactory morphoFactory;
    private DictionaryLemmatizer dictLemmatizer;
    private final Boolean multiwords;
    private MultiWordMatcher multiWordMatcher;
    private final Boolean dictag;
    private DictionaryTagger dictMorphoTagger;

    public Annotate(Properties properties) throws IOException {
        this.lang = properties.getProperty("language");
        this.multiwords = Boolean.valueOf(properties.getProperty("multiwords"));
        this.dictag = Boolean.valueOf(properties.getProperty("dictag"));
        if (this.multiwords.booleanValue()) {
            this.multiWordMatcher = new MultiWordMatcher(properties);
        }
        if (this.dictag.booleanValue()) {
            this.loadMorphoTaggerDicts(properties);
        }
        this.loadLemmatizerDicts(properties);
        this.morphoFactory = new MorphoFactory();
        this.posTagger = new MorphoTagger(properties, this.morphoFactory);
    }

    private void loadLemmatizerDicts(Properties props) {
        Resources resources = new Resources();
        URL binLemmatizerURL = resources.getBinaryDict(this.lang);
        if (binLemmatizerURL == null) {
            System.err.println("ERROR: No binary lemmatizer dictionary available for language " + this.lang + " in src/main/resources!!");
            System.exit(1);
        }
        try {
            this.dictLemmatizer = new MorfologikLemmatizer(binLemmatizerURL, this.lang);
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void loadMorphoTaggerDicts(Properties props) {
        Resources resources = new Resources();
        URL binDictMorphoTaggerURL = resources.getBinaryTaggerDict(this.lang);
        if (binDictMorphoTaggerURL == null) {
            System.err.println("ERROR: No binary POS tagger dictionary available for language " + this.lang + " in src/main/resources!!");
            System.exit(1);
        }
        try {
            this.dictMorphoTagger = new MorfologikMorphoTagger(binDictMorphoTaggerURL, this.lang);
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    private String mapEnglishTagSetToKaf(String postag) {
        if (postag.startsWith("RB")) {
            return "A";
        }
        if (postag.equalsIgnoreCase("CC")) {
            return "C";
        }
        if (postag.startsWith("D") || postag.equalsIgnoreCase("PDT")) {
            return "D";
        }
        if (postag.startsWith("J")) {
            return "G";
        }
        if (postag.equalsIgnoreCase("NN") || postag.equalsIgnoreCase("NNS")) {
            return "N";
        }
        if (postag.startsWith("NNP")) {
            return "R";
        }
        if (postag.equalsIgnoreCase("TO") || postag.equalsIgnoreCase("IN")) {
            return "P";
        }
        if (postag.startsWith("PRP") || postag.startsWith("WP")) {
            return "Q";
        }
        if (postag.startsWith("V")) {
            return "V";
        }
        return "O";
    }

    private String mapSpanishTagSetToKaf(String postag) {
        if (postag.equalsIgnoreCase("RG") || postag.equalsIgnoreCase("RN")) {
            return "A";
        }
        if (postag.equalsIgnoreCase("CC") || postag.equalsIgnoreCase("CS")) {
            return "C";
        }
        if (postag.startsWith("D")) {
            return "D";
        }
        if (postag.startsWith("A")) {
            return "G";
        }
        if (postag.startsWith("NC")) {
            return "N";
        }
        if (postag.startsWith("NP")) {
            return "R";
        }
        if (postag.startsWith("SP")) {
            return "P";
        }
        if (postag.startsWith("P")) {
            return "Q";
        }
        if (postag.startsWith("V")) {
            return "V";
        }
        return "O";
    }

    private String mapGalicianTagSetToKaf(String postag) {
        if (postag.startsWith("R")) {
            return "A";
        }
        if (postag.equalsIgnoreCase("CC") || postag.equalsIgnoreCase("CS")) {
            return "C";
        }
        if (postag.startsWith("D") || postag.startsWith("G") || postag.startsWith("X") || postag.startsWith("Q") || postag.startsWith("T") || postag.startsWith("I") || postag.startsWith("M")) {
            return "D";
        }
        if (postag.startsWith("A")) {
            return "G";
        }
        if (postag.startsWith("NC")) {
            return "N";
        }
        if (postag.startsWith("NP")) {
            return "R";
        }
        if (postag.startsWith("S")) {
            return "P";
        }
        if (postag.startsWith("P")) {
            return "Q";
        }
        if (postag.startsWith("V")) {
            return "V";
        }
        return "O";
    }

    private String getKafTagSet(String postag) {
        String tag = null;
        if (this.lang.equalsIgnoreCase("en")) {
            tag = this.mapEnglishTagSetToKaf(postag);
        }
        if (this.lang.equalsIgnoreCase("es")) {
            tag = this.mapSpanishTagSetToKaf(postag);
        }
        if (this.lang.equalsIgnoreCase("gl")) {
            tag = this.mapGalicianTagSetToKaf(postag);
        }
        return tag;
    }

    private String setTermType(String postag) {
        if (postag.startsWith("N") || postag.startsWith("V") || postag.startsWith("G") || postag.startsWith("A")) {
            return "open";
        }
        return "close";
    }

    public final void annotatePOSToKAF(KAFDocument kaf) {
        List<List<WF>> sentences = kaf.getSentences();
        for (List<WF> wfs : sentences) {
            int i;
            ArrayList<Span<WF>> tokenSpans = new ArrayList<Span<WF>>();
            List<Morpheme> morphemes = null;
            String[] tokens = new String[wfs.size()];
            for (i = 0; i < wfs.size(); ++i) {
                tokens[i] = wfs.get(i).getForm();
                ArrayList<WF> wfTarget = new ArrayList<WF>();
                wfTarget.add(wfs.get(i));
                tokenSpans.add(KAFDocument.newWFSpan(wfTarget));
            }
            if (this.multiwords.booleanValue()) {
                String[] multiWordTokens = this.multiWordMatcher.getTokensWithMultiWords(tokens);
                morphemes = this.posTagger.getMorphemes(multiWordTokens);
                this.getMultiWordSpans(tokens, wfs, tokenSpans);
            } else {
                morphemes = this.posTagger.getMorphemes(tokens);
            }
            for (i = 0; i < morphemes.size(); ++i) {
                Term term = kaf.newTerm((Span)tokenSpans.get(i));
                if (this.dictag.booleanValue()) {
                    String dictPosTag = this.dictMorphoTagger.tag(morphemes.get(i).getWord(), morphemes.get(i).getTag());
                    morphemes.get(i).setTag(dictPosTag);
                }
                String posId = this.getKafTagSet(morphemes.get(i).getTag());
                String type = this.setTermType(posId);
                String lemma = this.dictLemmatizer.lemmatize(morphemes.get(i).getWord(), morphemes.get(i).getTag());
                morphemes.get(i).setLemma(lemma);
                term.setType(type);
                term.setLemma(morphemes.get(i).getLemma());
                term.setPos(posId);
                term.setMorphofeat(morphemes.get(i).getTag());
            }
        }
    }

    private void getMultiWordSpans(String[] tokens, List<WF> wfs, List<Span<WF>> tokenSpans) {
        opennlp.tools.util.Span[] multiWordSpans = this.multiWordMatcher.multiWordsToSpans(tokens);
        int counter = 0;
        for (opennlp.tools.util.Span mwSpan : multiWordSpans) {
            Integer fromIndex = mwSpan.getStart() - counter;
            Integer toIndex = mwSpan.getEnd() - counter;
            counter = counter + tokenSpans.subList(fromIndex, toIndex).size() - 1;
            List<WF> wfTargets = wfs.subList(mwSpan.getStart(), mwSpan.getEnd());
            Span<WF> multiWordSpan = KAFDocument.newWFSpan(wfTargets);
            tokenSpans.subList(fromIndex, toIndex).clear();
            tokenSpans.add(fromIndex, multiWordSpan);
        }
    }

    public final String annotatePOSToCoNLL(KAFDocument kaf) throws IOException {
        StringBuilder sb = new StringBuilder();
        List<List<WF>> sentences = kaf.getSentences();
        for (List<WF> wfs : sentences) {
            int i;
            ArrayList<Span<WF>> tokenSpans = new ArrayList<Span<WF>>();
            List<Morpheme> morphemes = null;
            String[] tokens = new String[wfs.size()];
            for (i = 0; i < wfs.size(); ++i) {
                tokens[i] = wfs.get(i).getForm();
                ArrayList<WF> wfTarget = new ArrayList<WF>();
                wfTarget.add(wfs.get(i));
                tokenSpans.add(KAFDocument.newWFSpan(wfTarget));
            }
            if (this.multiwords.booleanValue()) {
                String[] multiWordTokens = this.multiWordMatcher.getTokensWithMultiWords(tokens);
                morphemes = this.posTagger.getMorphemes(multiWordTokens);
                this.getMultiWordSpans(tokens, wfs, tokenSpans);
            } else {
                morphemes = this.posTagger.getMorphemes(tokens);
            }
            for (i = 0; i < morphemes.size(); ++i) {
                String posTag = morphemes.get(i).getTag();
                String word = morphemes.get(i).getWord();
                if (this.dictag.booleanValue()) {
                    String dictPosTag = this.dictMorphoTagger.tag(word, posTag);
                    morphemes.get(i).setTag(dictPosTag);
                }
                String lemma = this.dictLemmatizer.lemmatize(word, morphemes.get(i).getTag());
                sb.append(word).append("\t").append(lemma).append("\t").append(morphemes.get(i).getTag()).append("\n");
            }
            sb.append("\n");
        }
        return sb.toString();
    }
}

