/*
 * Decompiled with CFR 0.152.
 */
package org.cogroo.formats.ad;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.formats.ad.PortugueseContractionUtility;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import org.cogroo.ContractionUtility;

public class ADContractionNameSampleStream
implements ObjectStream<NameSample> {
    private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
    private String leftContractionPart = null;
    private static final Pattern underlinePattern = Pattern.compile("[_]+");
    private Set<String> tags;

    public ADContractionNameSampleStream(ObjectStream<String> lineStream, Set<String> tags) {
        this.adSentenceStream = new ADSentenceStream(lineStream);
        this.tags = tags;
    }

    public ADContractionNameSampleStream(InputStream in, String charsetName, Set<String> tags) {
        try {
            this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(in, charsetName));
            this.tags = tags;
        }
        catch (UnsupportedEncodingException e) {
            throw new IllegalStateException(e);
        }
    }

    @Override
    public NameSample read() throws IOException {
        ADSentenceStream.Sentence paragraph = this.adSentenceStream.read();
        if (paragraph != null) {
            ADSentenceStream.SentenceParser.Node root = paragraph.getRoot();
            ArrayList<String> sentence = new ArrayList<String>();
            ArrayList<Span> names = new ArrayList<Span>();
            this.process(root, sentence, names);
            return new NameSample(sentence.toArray(new String[sentence.size()]), names.toArray(new Span[names.size()]), true);
        }
        return null;
    }

    private void process(ADSentenceStream.SentenceParser.Node node, List<String> sentence, List<Span> names) {
        if (node != null) {
            for (ADSentenceStream.SentenceParser.TreeElement element : node.getElements()) {
                if (element.isLeaf()) {
                    this.processLeaf((ADSentenceStream.SentenceParser.Leaf)element, sentence, names);
                    continue;
                }
                this.process((ADSentenceStream.SentenceParser.Node)element, sentence, names);
            }
        }
    }

    private void processLeaf(ADSentenceStream.SentenceParser.Leaf leaf, List<String> sentence, List<Span> names) {
        if (leaf != null && this.leftContractionPart == null) {
            int startOfNamedEntity = -1;
            String leafTag = leaf.getSecondaryTag();
            if (leafTag != null && leafTag.contains("<sam->")) {
                String[] lexemes = underlinePattern.split(leaf.getLexeme());
                if (lexemes.length > 1) {
                    for (int i = 0; i < lexemes.length - 1; ++i) {
                        sentence.add(lexemes[i]);
                        String[] conts = ContractionUtility.expand(lexemes[i]);
                        if (conts == null) continue;
                        int end = sentence.size();
                        int start = end - 1;
                        Span s = new Span(start, end, "default");
                        names.add(s);
                        Span[] ss = new Span[]{s};
                        System.out.println(Arrays.toString(Span.spansToStrings(ss, sentence.toArray(new String[sentence.size()]))));
                    }
                }
                this.leftContractionPart = lexemes[lexemes.length - 1];
                return;
            }
            sentence.addAll(Arrays.asList(leaf.getLexeme()));
        } else {
            String tag = leaf.getSecondaryTag();
            String right = leaf.getLexeme();
            if (tag != null && tag.contains("<-sam>")) {
                String[] parts = underlinePattern.split(leaf.getLexeme());
                if (parts != null) {
                    String c = PortugueseContractionUtility.toContraction(this.leftContractionPart, parts[0]);
                    if (c != null) {
                        sentence.add(c);
                        names.add(new Span(sentence.size() - 1, sentence.size(), "default"));
                    }
                    for (int i = 1; i < parts.length; ++i) {
                        sentence.add(parts[i]);
                    }
                } else {
                    right = leaf.getLexeme();
                    String c = PortugueseContractionUtility.toContraction(this.leftContractionPart, right);
                    if (c != null) {
                        sentence.add(c);
                        names.add(new Span(sentence.size() - 1, sentence.size(), "default"));
                    } else {
                        System.err.println("missing " + this.leftContractionPart + " + " + right);
                        sentence.add(this.leftContractionPart);
                        sentence.add(right);
                    }
                }
            } else {
                System.err.println("unmatch" + this.leftContractionPart + " + " + right);
            }
            this.leftContractionPart = null;
        }
    }

    @Override
    public void reset() throws IOException, UnsupportedOperationException {
        this.adSentenceStream.reset();
    }

    @Override
    public void close() throws IOException {
        this.adSentenceStream.close();
    }
}

