/*
 * Decompiled with CFR 0.152.
 */
package org.cogroo.formats.ad;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import org.cogroo.tools.featurizer.FeatureSample;

public class ADFeaturizerSampleStream
implements ObjectStream<FeatureSample> {
    private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
    private int start = -1;
    private int end = -1;
    private int index = 0;
    private boolean expandME;
    private int callsCount = 0;

    public ADFeaturizerSampleStream(ObjectStream<String> lineStream, boolean expandME) {
        this.expandME = expandME;
        this.adSentenceStream = new ADSentenceStream(lineStream);
    }

    public ADFeaturizerSampleStream(InputStream in, String charsetName, boolean expandME) {
        try {
            this.expandME = expandME;
            this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(in, charsetName));
        }
        catch (UnsupportedEncodingException e) {
            throw new IllegalStateException(e);
        }
    }

    @Override
    public FeatureSample read() throws IOException {
        ADSentenceStream.Sentence paragraph;
        ++this.callsCount;
        while ((paragraph = this.adSentenceStream.read()) != null) {
            if (this.end > -1 && this.index >= this.end) {
                return null;
            }
            if (this.start > -1 && this.index < this.start) {
                ++this.index;
                continue;
            }
            ADSentenceStream.SentenceParser.Node root = paragraph.getRoot();
            ArrayList<String> sentence = new ArrayList<String>();
            ArrayList<String> lemma = new ArrayList<String>();
            ArrayList<String> tags = new ArrayList<String>();
            ArrayList<String> target = new ArrayList<String>();
            this.processRoot(root, sentence, lemma, tags, target);
            if (sentence.size() <= 0) continue;
            ++this.index;
            return new FeatureSample(sentence, lemma, tags, target);
        }
        return null;
    }

    private void processRoot(ADSentenceStream.SentenceParser.Node root, List<String> sentence, List<String> lemmas, List<String> tags, List<String> target) {
        if (root != null) {
            ADSentenceStream.SentenceParser.TreeElement[] elements = root.getElements();
            for (int i = 0; i < elements.length; ++i) {
                if (elements[i].isLeaf()) {
                    this.processLeaf((ADSentenceStream.SentenceParser.Leaf)elements[i], false, "O", sentence, lemmas, tags, target);
                    continue;
                }
                this.processNode((ADSentenceStream.SentenceParser.Node)elements[i], sentence, lemmas, tags, target, null);
            }
        }
    }

    private void processNode(ADSentenceStream.SentenceParser.Node node, List<String> sentence, List<String> lemmas, List<String> tags, List<String> target, String inheritedTag) {
        String phraseTag = this.getChunkTag(node.getSyntacticTag());
        boolean inherited = false;
        if (phraseTag.equals("O") && inheritedTag != null) {
            phraseTag = inheritedTag;
            inherited = true;
        }
        ADSentenceStream.SentenceParser.TreeElement[] elements = node.getElements();
        for (int i = 0; i < elements.length; ++i) {
            if (elements[i].isLeaf()) {
                boolean isIntermediate = false;
                if (i > 0 && elements[i - 1].isLeaf() && phraseTag != null && !phraseTag.equals("O")) {
                    isIntermediate = true;
                }
                if (inherited && target.size() > 0 && target.get(target.size() - 1).endsWith(phraseTag)) {
                    isIntermediate = true;
                }
                this.processLeaf((ADSentenceStream.SentenceParser.Leaf)elements[i], isIntermediate, phraseTag, sentence, lemmas, tags, target);
                continue;
            }
            this.processNode((ADSentenceStream.SentenceParser.Node)elements[i], sentence, lemmas, tags, target, phraseTag);
        }
    }

    private void processLeaf(ADSentenceStream.SentenceParser.Leaf leaf, boolean isIntermediate, String phraseTag, List<String> sentence, List<String> lemmas, List<String> tags, List<String> target) {
        String postag;
        String lemma = leaf.getLemma();
        String lexeme = leaf.getLexeme();
        String featureTag = leaf.getMorphologicalTag();
        if (("\u00ab".equals(lexeme) || "\u00bb".equals(lexeme)) && this.callsCount % 2 == 0) {
            lexeme = "\"";
        }
        featureTag = featureTag == null ? "-" : featureTag.replace(" ", "=");
        if (leaf.getSyntacticTag() == null) {
            postag = lexeme;
            lemma = lexeme;
        } else {
            postag = ADFeaturizerSampleStream.convertFuncTag(leaf.getFunctionalTag());
        }
        if (postag == null) {
            return;
        }
        if (this.expandME && lexeme.contains("_") && !"prop".equals(postag)) {
            StringTokenizer tokenizer = new StringTokenizer(lexeme, "_");
            if (tokenizer.countTokens() > 0) {
                ArrayList<String> toks = new ArrayList<String>(tokenizer.countTokens());
                ArrayList<String> tagsWithCont = new ArrayList<String>(tokenizer.countTokens());
                toks.add(tokenizer.nextToken());
                tagsWithCont.add("B-" + postag);
                target.add(featureTag);
                while (tokenizer.hasMoreTokens()) {
                    toks.add(tokenizer.nextToken());
                    tagsWithCont.add("I-" + postag);
                    target.add(featureTag);
                }
                lemmas.addAll(toks);
                sentence.addAll(toks);
                tags.addAll(tagsWithCont);
            } else {
                sentence.add(lexeme);
                lemmas.add(lemma);
                target.add(featureTag);
                tags.add(postag);
            }
        } else {
            sentence.add(lexeme);
            lemmas.add(lemma);
            target.add(featureTag);
            tags.add(postag);
        }
    }

    private static String convertFuncTag(String t) {
        return t;
    }

    private String getChunkTag(String tag) {
        String phraseTag = tag.substring(tag.lastIndexOf(":") + 1);
        phraseTag = phraseTag.equals("np") || phraseTag.equals("vp") || phraseTag.equals("pp") || phraseTag.equals("ap") || phraseTag.equals("advp") ? phraseTag.toUpperCase() : "O";
        return phraseTag;
    }

    public void setStart(int aStart) {
        this.start = aStart;
    }

    public void setEnd(int aEnd) {
        this.end = aEnd;
    }

    @Override
    public void reset() throws IOException, UnsupportedOperationException {
        this.adSentenceStream.reset();
    }

    @Override
    public void close() throws IOException {
        this.adSentenceStream.close();
    }
}

