/*
 * Decompiled with CFR 0.152.
 */
package org.cogroo.formats.ad;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.formats.ad.PortugueseContractionUtility;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;

public class ADExpNameSampleStream
implements ObjectStream<NameSample> {
    private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
    private String leftContractionPart = null;
    private Set<String> tags;
    private final boolean useAdaptativeFeatures;
    int textID = -1;
    private Type corpusType = null;
    private Pattern metaPattern;
    private int textIdMeta2 = -1;
    private String textMeta2 = "";

    public ADExpNameSampleStream(ObjectStream<String> lineStream, Set<String> tags, boolean useAdaptativeFeatures) {
        this.adSentenceStream = new ADSentenceStream(lineStream);
        this.tags = tags;
        this.useAdaptativeFeatures = useAdaptativeFeatures;
    }

    public ADExpNameSampleStream(InputStream in, String charsetName, Set<String> tags, boolean useAdaptativeFeatures) {
        this.useAdaptativeFeatures = useAdaptativeFeatures;
        try {
            this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(in, charsetName));
            this.tags = tags;
        }
        catch (UnsupportedEncodingException e) {
            throw new IllegalStateException(e);
        }
    }

    @Override
    public NameSample read() throws IOException {
        ADSentenceStream.Sentence paragraph = this.adSentenceStream.read();
        if (paragraph != null) {
            boolean clearData = false;
            if (this.useAdaptativeFeatures) {
                int currentTextID = this.getTextID(paragraph);
                if (currentTextID != this.textID) {
                    clearData = true;
                    this.textID = currentTextID;
                }
            } else {
                clearData = true;
            }
            ADSentenceStream.SentenceParser.Node root = paragraph.getRoot();
            ArrayList<String> sentence = new ArrayList<String>();
            ArrayList<Span> names = new ArrayList<Span>();
            this.process(root, sentence, names);
            return new NameSample(sentence.toArray(new String[sentence.size()]), names.toArray(new Span[names.size()]), clearData);
        }
        return null;
    }

    private int getTextID(ADSentenceStream.Sentence paragraph) {
        String meta = paragraph.getMetadata();
        if (this.corpusType == null) {
            if (meta.startsWith("LIT")) {
                this.corpusType = Type.lit;
                this.metaPattern = Pattern.compile("^([a-zA-Z\\-]+)(\\d+).*?p=(\\d+).*");
            } else if (meta.startsWith("CIE")) {
                this.corpusType = Type.cie;
                this.metaPattern = Pattern.compile("^.*?source=\"(.*?)\".*");
            } else {
                this.corpusType = Type.ama;
                this.metaPattern = Pattern.compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*");
            }
        }
        if (this.corpusType.equals((Object)Type.lit)) {
            Matcher m2 = this.metaPattern.matcher(meta);
            if (m2.matches()) {
                String textId = m2.group(1);
                if (!textId.equals(this.textMeta2)) {
                    ++this.textIdMeta2;
                    this.textMeta2 = textId;
                }
                return this.textIdMeta2;
            }
            throw new RuntimeException("Invalid metadata: " + meta);
        }
        if (this.corpusType.equals((Object)Type.cie)) {
            Matcher m2 = this.metaPattern.matcher(meta);
            if (m2.matches()) {
                String textId = m2.group(1);
                if (!textId.equals(this.textMeta2)) {
                    ++this.textIdMeta2;
                    this.textMeta2 = textId;
                }
                return this.textIdMeta2;
            }
            throw new RuntimeException("Invalid metadata: " + meta);
        }
        if (this.corpusType.equals((Object)Type.ama)) {
            Matcher m2 = this.metaPattern.matcher(meta);
            if (m2.matches()) {
                return Integer.parseInt(m2.group(1));
            }
            throw new RuntimeException("Invalid metadata: " + meta);
        }
        return 0;
    }

    private void process(ADSentenceStream.SentenceParser.Node node, List<String> sentence, List<Span> names) {
        if (node != null) {
            for (ADSentenceStream.SentenceParser.TreeElement element : node.getElements()) {
                if (element.isLeaf()) {
                    this.processLeaf((ADSentenceStream.SentenceParser.Leaf)element, sentence, names);
                    continue;
                }
                this.process((ADSentenceStream.SentenceParser.Node)element, sentence, names);
            }
        }
    }

    private void processLeaf(ADSentenceStream.SentenceParser.Leaf leaf, List<String> sentence, List<Span> names) {
        if (leaf != null && this.leftContractionPart == null) {
            String namedEntityTag = null;
            int startOfNamedEntity = -1;
            String leafTag = leaf.getSecondaryTag();
            if (leafTag != null) {
                if (leafTag.contains("<sam->")) {
                    String[] lexemes = leaf.getLexeme().split("_");
                    if (lexemes.length > 1) {
                        for (int i = 0; i < lexemes.length - 1; ++i) {
                            sentence.add(lexemes[i]);
                        }
                    }
                    this.leftContractionPart = lexemes[lexemes.length - 1];
                    return;
                }
                if (leaf.getLexeme().contains("_") && leaf.getLexeme().length() > 3) {
                    String tag = leaf.getFunctionalTag();
                    if (this.tags != null) {
                        if (this.tags.contains(tag)) {
                            namedEntityTag = leaf.getFunctionalTag();
                        }
                    } else {
                        namedEntityTag = leaf.getFunctionalTag();
                    }
                }
            }
            if (namedEntityTag != null) {
                startOfNamedEntity = sentence.size();
            }
            sentence.addAll(Arrays.asList(leaf.getLexeme().split("_")));
            if (namedEntityTag != null) {
                names.add(new Span(startOfNamedEntity, sentence.size(), namedEntityTag));
            }
        } else {
            String tag = leaf.getSecondaryTag();
            String right = leaf.getLexeme();
            if (tag != null && tag.contains("<-sam>")) {
                right = leaf.getLexeme();
                String c = PortugueseContractionUtility.toContraction(this.leftContractionPart, right);
                if (c != null) {
                    sentence.add(c);
                } else {
                    System.err.println("missing " + this.leftContractionPart + " + " + right);
                    sentence.add(this.leftContractionPart);
                    sentence.add(right);
                }
            } else {
                System.err.println("unmatch" + this.leftContractionPart + " + " + right);
            }
            this.leftContractionPart = null;
        }
    }

    @Override
    public void reset() throws IOException, UnsupportedOperationException {
        this.adSentenceStream.reset();
    }

    @Override
    public void close() throws IOException {
        this.adSentenceStream.close();
    }

    static enum Type {
        ama,
        cie,
        lit;

    }
}

