/*
 * Decompiled with CFR 0.152.
 */
package eus.ixa.ixa.pipe.nerc;

import com.google.common.collect.Lists;
import eus.ixa.ixa.pipe.nerc.DictionariesNameFinder;
import eus.ixa.ixa.pipe.nerc.Name;
import eus.ixa.ixa.pipe.nerc.NameFactory;
import eus.ixa.ixa.pipe.nerc.NumericNameFinder;
import eus.ixa.ixa.pipe.nerc.SpanUtils;
import eus.ixa.ixa.pipe.nerc.StatisticalNameFinder;
import eus.ixa.ixa.pipe.nerc.StringUtils;
import eus.ixa.ixa.pipe.nerc.dict.Dictionaries;
import eus.ixa.ixa.pipe.nerc.train.Flags;
import ixa.kaflib.Entity;
import ixa.kaflib.KAFDocument;
import ixa.kaflib.Span;
import ixa.kaflib.Term;
import ixa.kaflib.WF;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.NameSample;

public class Annotate {
    private NameFactory nameFactory;
    private StatisticalNameFinder nameFinder;
    private Dictionaries dictionaries;
    private DictionariesNameFinder dictFinder;
    private NumericNameFinder numericLexerFinder;
    private boolean statistical;
    private boolean postProcess;
    private boolean dictTag;
    private boolean lexerFind;
    private String clearFeatures;

    public Annotate(Properties properties) throws IOException {
        this.clearFeatures = properties.getProperty("clearFeatures");
        this.nameFactory = new NameFactory();
        this.annotateOptions(properties);
    }

    private void annotateOptions(Properties properties) throws IOException {
        String ruleBasedOption = properties.getProperty("ruleBasedOption");
        String dictOption = properties.getProperty("dictTag");
        String dictPath = properties.getProperty("dictPath");
        if (!dictOption.equals("off")) {
            if (dictPath.equals("off")) {
                Flags.dictionaryException();
            }
            if (!ruleBasedOption.equals("off")) {
                this.lexerFind = true;
            }
            if (!dictPath.equals("off")) {
                if (this.dictionaries == null) {
                    this.dictionaries = new Dictionaries(dictPath);
                    this.dictFinder = new DictionariesNameFinder(this.dictionaries, this.nameFactory);
                }
                if (dictOption.equalsIgnoreCase("tag")) {
                    this.dictTag = true;
                    this.postProcess = false;
                    this.statistical = false;
                } else if (dictOption.equalsIgnoreCase("post")) {
                    this.nameFinder = new StatisticalNameFinder(properties, this.nameFactory);
                    this.statistical = true;
                    this.postProcess = true;
                    this.dictTag = false;
                } else {
                    this.nameFinder = new StatisticalNameFinder(properties, this.nameFactory);
                    this.statistical = true;
                    this.dictTag = false;
                    this.postProcess = false;
                }
            }
        } else if (!ruleBasedOption.equals("off")) {
            this.lexerFind = true;
            this.statistical = true;
            this.dictTag = false;
            this.postProcess = false;
            this.nameFinder = new StatisticalNameFinder(properties, this.nameFactory);
        } else {
            this.lexerFind = false;
            this.statistical = true;
            this.dictTag = false;
            this.postProcess = false;
            this.nameFinder = new StatisticalNameFinder(properties, this.nameFactory);
        }
    }

    public StatisticalNameFinder getStatisticalNameFinder() {
        return this.nameFinder;
    }

    public final void annotateNEs(KAFDocument kaf) throws IOException {
        ArrayList<opennlp.tools.util.Span> allSpans = null;
        List<List<WF>> sentences = kaf.getSentences();
        for (List<WF> sentence : sentences) {
            String[] tokens = new String[sentence.size()];
            String[] tokenIds = new String[sentence.size()];
            for (int i = 0; i < sentence.size(); ++i) {
                tokens[i] = sentence.get(i).getForm();
                tokenIds[i] = sentence.get(i).getId();
            }
            if (this.statistical) {
                if (this.clearFeatures.equalsIgnoreCase("docstart") && tokens[0].startsWith("-DOCSTART-")) {
                    this.nameFinder.clearAdaptiveData();
                }
                opennlp.tools.util.Span[] statSpans = this.nameFinder.nercToSpans(tokens);
                allSpans = Lists.newArrayList(statSpans);
            }
            if (this.postProcess) {
                opennlp.tools.util.Span[] dictSpans = this.dictFinder.nercToSpansExact(tokens);
                SpanUtils.postProcessDuplicatedSpans(allSpans, dictSpans);
                SpanUtils.concatenateSpans(allSpans, dictSpans);
            }
            if (this.dictTag) {
                opennlp.tools.util.Span[] dictOnlySpans = this.dictFinder.nercToSpansExact(tokens);
                allSpans = Lists.newArrayList(dictOnlySpans);
            }
            if (this.lexerFind) {
                String sentenceText = StringUtils.getStringFromTokens(tokens);
                StringReader stringReader = new StringReader(sentenceText);
                BufferedReader sentenceReader = new BufferedReader(stringReader);
                this.numericLexerFinder = new NumericNameFinder(sentenceReader, this.nameFactory);
                opennlp.tools.util.Span[] spanArray = this.numericLexerFinder.nercToSpans(tokens);
                SpanUtils.concatenateSpans(allSpans, spanArray);
            }
            opennlp.tools.util.Span[] allSpansArray = NameFinderME.dropOverlappingSpans(allSpans.toArray(new opennlp.tools.util.Span[allSpans.size()]));
            List<Object> names = new ArrayList();
            names = this.statistical ? this.nameFinder.getNamesFromSpans(allSpansArray, tokens) : this.dictFinder.getNamesFromSpans(allSpansArray, tokens);
            for (Name name : names) {
                Integer startIndex = name.getSpan().getStart();
                Integer endIndex = name.getSpan().getEnd();
                List<Term> nameTerms = kaf.getTermsFromWFs(Arrays.asList(Arrays.copyOfRange(tokenIds, (int)startIndex, (int)endIndex)));
                Span<Term> neSpan = KAFDocument.newTermSpan(nameTerms);
                ArrayList<Span<Term>> references = new ArrayList<Span<Term>>();
                references.add(neSpan);
                Entity neEntity = kaf.newEntity(references);
                neEntity.setType(name.getType());
            }
            if (!this.clearFeatures.equalsIgnoreCase("yes")) continue;
            this.nameFinder.clearAdaptiveData();
        }
        this.nameFinder.clearAdaptiveData();
    }

    public final String annotateNEsToKAF(KAFDocument kaf) {
        return kaf.toString();
    }

    public final String annotateNEsToOpenNLP(KAFDocument kaf) {
        StringBuilder sb = new StringBuilder();
        ArrayList<opennlp.tools.util.Span> allSpans = null;
        List<List<WF>> sentences = kaf.getSentences();
        for (List<WF> sentence : sentences) {
            String[] tokens = new String[sentence.size()];
            String[] tokenIds = new String[sentence.size()];
            for (int i = 0; i < sentence.size(); ++i) {
                tokens[i] = sentence.get(i).getForm();
                tokenIds[i] = sentence.get(i).getId();
            }
            if (this.statistical) {
                if (this.clearFeatures.equalsIgnoreCase("docstart") && tokens[0].startsWith("-DOCSTART-")) {
                    this.nameFinder.clearAdaptiveData();
                }
                opennlp.tools.util.Span[] statSpans = this.nameFinder.nercToSpans(tokens);
                allSpans = Lists.newArrayList(statSpans);
            }
            if (this.postProcess) {
                opennlp.tools.util.Span[] dictSpans = this.dictFinder.nercToSpansExact(tokens);
                SpanUtils.postProcessDuplicatedSpans(allSpans, dictSpans);
                SpanUtils.concatenateSpans(allSpans, dictSpans);
            }
            if (this.dictTag) {
                opennlp.tools.util.Span[] dictOnlySpans = this.dictFinder.nercToSpansExact(tokens);
                allSpans = Lists.newArrayList(dictOnlySpans);
            }
            if (this.lexerFind) {
                String sentenceText = StringUtils.getStringFromTokens(tokens);
                StringReader stringReader = new StringReader(sentenceText);
                BufferedReader sentenceReader = new BufferedReader(stringReader);
                this.numericLexerFinder = new NumericNameFinder(sentenceReader, this.nameFactory);
                opennlp.tools.util.Span[] numericSpans = this.numericLexerFinder.nercToSpans(tokens);
                SpanUtils.concatenateSpans(allSpans, numericSpans);
            }
            boolean isClearAdaptiveData = false;
            if (this.clearFeatures.equalsIgnoreCase("yes")) {
                isClearAdaptiveData = true;
            }
            opennlp.tools.util.Span[] allSpansArray = NameFinderME.dropOverlappingSpans(allSpans.toArray(new opennlp.tools.util.Span[allSpans.size()]));
            NameSample nameSample = new NameSample(tokens, allSpansArray, isClearAdaptiveData);
            sb.append(nameSample.toString()).append("\n");
        }
        this.nameFinder.clearAdaptiveData();
        return sb.toString();
    }

    public String annotateNEsToCoNLL2003(KAFDocument kaf) {
        List<Entity> namedEntityList = kaf.getEntities();
        HashMap<String, Integer> entityToSpanSize = new HashMap<String, Integer>();
        HashMap<String, String> entityToType = new HashMap<String, String>();
        for (Entity ne : namedEntityList) {
            List<Span<Term>> entitySpanList = ne.getSpans();
            for (Span<Term> spanTerm : entitySpanList) {
                Term neTerm = spanTerm.getFirstTarget();
                entityToSpanSize.put(neTerm.getId(), spanTerm.size());
                entityToType.put(neTerm.getId(), ne.getType());
            }
        }
        List<List<WF>> sentences = kaf.getSentences();
        StringBuilder sb = new StringBuilder();
        for (List<WF> sentence : sentences) {
            int sentNumber = sentence.get(0).getSent();
            List<Term> sentenceTerms = kaf.getSentenceTerms(sentNumber);
            String previousType = null;
            boolean previousIsEntity = false;
            for (int i = 0; i < sentenceTerms.size(); ++i) {
                Term thisTerm = sentenceTerms.get(i);
                if (entityToSpanSize.get(thisTerm.getId()) != null) {
                    int neSpanSize = (Integer)entityToSpanSize.get(thisTerm.getId());
                    String neClass = (String)entityToType.get(thisTerm.getId());
                    String neType = this.convertToConLLTypes(neClass);
                    if (neSpanSize > 1) {
                        for (int j = 0; j < neSpanSize; ++j) {
                            thisTerm = sentenceTerms.get(i + j);
                            sb.append(thisTerm.getForm());
                            sb.append("\t");
                            sb.append(thisTerm.getLemma());
                            sb.append("\t");
                            sb.append(thisTerm.getMorphofeat());
                            sb.append("\t");
                            if (j == 0 && previousIsEntity && previousType.equalsIgnoreCase(neType)) {
                                sb.append(BIO.BEGIN.toString());
                            } else {
                                sb.append(BIO.IN.toString());
                            }
                            sb.append(neType);
                            sb.append("\n");
                        }
                        previousType = neType;
                    } else {
                        sb.append(thisTerm.getForm());
                        sb.append("\t");
                        sb.append(thisTerm.getLemma());
                        sb.append("\t");
                        sb.append(thisTerm.getMorphofeat());
                        sb.append("\t");
                        if (previousIsEntity && previousType.equalsIgnoreCase(neType)) {
                            sb.append(BIO.BEGIN.toString());
                        } else {
                            sb.append(BIO.IN.toString());
                        }
                        sb.append(neType);
                        sb.append("\n");
                    }
                    previousIsEntity = true;
                    previousType = neType;
                    i += neSpanSize - 1;
                    continue;
                }
                sb.append(thisTerm.getForm());
                sb.append("\t");
                sb.append(thisTerm.getLemma());
                sb.append("\t");
                sb.append(thisTerm.getMorphofeat());
                sb.append("\t");
                sb.append((Object)BIO.OUT);
                sb.append("\n");
                previousIsEntity = false;
                previousType = BIO.OUT.toString();
            }
            sb.append("\n");
        }
        return sb.toString();
    }

    public String annotateNEsToCoNLL2002(KAFDocument kaf) {
        List<Entity> namedEntityList = kaf.getEntities();
        HashMap<String, Integer> entityToSpanSize = new HashMap<String, Integer>();
        HashMap<String, String> entityToType = new HashMap<String, String>();
        for (Entity ne : namedEntityList) {
            List<Span<Term>> entitySpanList = ne.getSpans();
            for (Span<Term> spanTerm : entitySpanList) {
                Term neTerm = spanTerm.getFirstTarget();
                entityToSpanSize.put(neTerm.getId(), spanTerm.size());
                entityToType.put(neTerm.getId(), ne.getType());
            }
        }
        List<List<WF>> sentences = kaf.getSentences();
        StringBuilder sb = new StringBuilder();
        for (List<WF> sentence : sentences) {
            int sentNumber = sentence.get(0).getSent();
            List<Term> sentenceTerms = kaf.getSentenceTerms(sentNumber);
            for (int i = 0; i < sentenceTerms.size(); ++i) {
                Term thisTerm = sentenceTerms.get(i);
                if (entityToSpanSize.get(thisTerm.getId()) != null) {
                    int neSpanSize = (Integer)entityToSpanSize.get(thisTerm.getId());
                    String neClass = (String)entityToType.get(thisTerm.getId());
                    String neType = this.convertToConLLTypes(neClass);
                    if (neSpanSize > 1) {
                        for (int j = 0; j < neSpanSize; ++j) {
                            thisTerm = sentenceTerms.get(i + j);
                            sb.append(thisTerm.getForm());
                            sb.append("\t");
                            sb.append(thisTerm.getLemma());
                            sb.append("\t");
                            sb.append(thisTerm.getMorphofeat());
                            sb.append("\t");
                            if (j == 0) {
                                sb.append(BIO.BEGIN.toString());
                            } else {
                                sb.append(BIO.IN.toString());
                            }
                            sb.append(neType);
                            sb.append("\n");
                        }
                    } else {
                        sb.append(thisTerm.getForm());
                        sb.append("\t");
                        sb.append(thisTerm.getLemma());
                        sb.append("\t");
                        sb.append(thisTerm.getMorphofeat());
                        sb.append("\t");
                        sb.append(BIO.BEGIN.toString());
                        sb.append(neType);
                        sb.append("\n");
                    }
                    i += neSpanSize - 1;
                    continue;
                }
                sb.append(thisTerm.getForm());
                sb.append("\t");
                sb.append(thisTerm.getLemma());
                sb.append("\t");
                sb.append(thisTerm.getMorphofeat());
                sb.append("\t");
                sb.append((Object)BIO.OUT);
                sb.append("\n");
            }
            sb.append("\n");
        }
        return sb.toString();
    }

    public String convertToConLLTypes(String neType) {
        String conllType = null;
        conllType = neType.equalsIgnoreCase("PERSON") || neType.equalsIgnoreCase("ORGANIZATION") || neType.equalsIgnoreCase("LOCATION") || neType.length() == 3 ? neType.substring(0, 3) : neType;
        return conllType;
    }

    private static enum BIO {
        BEGIN("B-"),
        IN("I-"),
        OUT("O");

        String tag;

        private BIO(String tag) {
            this.tag = tag;
        }

        public String toString() {
            return this.tag;
        }
    }
}

