/*
 * Decompiled with CFR 0.152.
 */
package org.cogroo.formats.ad;

import com.google.common.base.Strings;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.postag.POSSample;
import opennlp.tools.util.ObjectStream;

public class ADExPOSSampleStream
implements ObjectStream<POSSample> {
    private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
    private boolean expandME;
    private boolean isIncludeFeatures;
    private boolean additionalContext;
    private int callsCount = 0;
    private static final Pattern hyphenPattern = Pattern.compile("((\\p{L}+)-$)|(^-(\\p{L}+)(.*))|((\\p{L}+)-(\\p{L}+)(.*))");
    private static final Pattern GENDER_M = Pattern.compile(".*\\bM\\b.*");
    private static final Pattern GENDER_F = Pattern.compile(".*\\bF\\b.*");
    private static final Pattern GENDER_N = Pattern.compile(".*\\bM/F\\b.*");

    public ADExPOSSampleStream(ObjectStream<String> lineStream, boolean expandME, boolean includeFeatures, boolean additionalContext) {
        this.adSentenceStream = new ADSentenceStream(lineStream);
        this.expandME = expandME;
        this.isIncludeFeatures = includeFeatures;
        this.additionalContext = additionalContext;
    }

    @Override
    public POSSample read() throws IOException {
        ++this.callsCount;
        ADSentenceStream.Sentence paragraph = this.adSentenceStream.read();
        if (paragraph != null) {
            ADSentenceStream.SentenceParser.Node root = paragraph.getRoot();
            ArrayList<String> sentence = new ArrayList<String>();
            ArrayList<String> tags = new ArrayList<String>();
            ArrayList<String> contractions = new ArrayList<String>();
            ArrayList<String> prop = new ArrayList<String>();
            this.process(root, sentence, tags, contractions, prop);
            if (sentence.size() != contractions.size() || sentence.size() != prop.size()) {
                throw new IllegalArgumentException("There must be exactly same number of tokens and additional context!");
            }
            if (this.additionalContext) {
                String[][] ac = new String[2][sentence.size()];
                for (int i = 0; i < sentence.size(); ++i) {
                    if (contractions.get(i) != null) {
                        ac[0][i] = (String)contractions.get(i);
                    }
                    if (prop.get(i) == null) continue;
                    ac[1][i] = (String)prop.get(i);
                }
                return new POSSample(sentence, tags, ac);
            }
            return new POSSample(sentence, tags);
        }
        return null;
    }

    private void process(ADSentenceStream.SentenceParser.Node node, List<String> sentence, List<String> tags, List<String> con, List<String> prop) {
        if (node != null) {
            for (ADSentenceStream.SentenceParser.TreeElement element : node.getElements()) {
                if (element.isLeaf()) {
                    this.processLeaf((ADSentenceStream.SentenceParser.Leaf)element, sentence, tags, con, prop);
                    continue;
                }
                this.process((ADSentenceStream.SentenceParser.Node)element, sentence, tags, con, prop);
            }
        }
    }

    private void processLeaf(ADSentenceStream.SentenceParser.Leaf leaf, List<String> sentence, List<String> tags, List<String> con, List<String> prop) {
        if (leaf != null) {
            String lexeme = leaf.getLexeme();
            if (("\u00ab".equals(lexeme) || "\u00bb".equals(lexeme)) && this.callsCount % 2 == 0) {
                lexeme = "\"";
            }
            String tag = leaf.getFunctionalTag();
            String contraction = null;
            if (leaf.getSecondaryTag() != null) {
                if (leaf.getSecondaryTag().contains("<sam->")) {
                    contraction = "B";
                } else if (leaf.getSecondaryTag().contains("<-sam>")) {
                    contraction = "E";
                }
            }
            if (tag == null) {
                tag = lexeme;
            }
            if (this.isIncludeFeatures && leaf.getMorphologicalTag() != null) {
                tag = tag + " " + leaf.getMorphologicalTag();
            }
            if ((tag = tag.replaceAll("\\s+", "=")) == null) {
                tag = lexeme;
            }
            if (this.expandME && lexeme.contains("_")) {
                StringTokenizer tokenizer = new StringTokenizer(lexeme, "_");
                if ("prop".equals(tag)) {
                    sentence.add(lexeme);
                    tags.add(tag);
                    con.add(null);
                    prop.add("P");
                } else if (tokenizer.countTokens() > 0) {
                    ArrayList<String> toks = new ArrayList<String>(tokenizer.countTokens());
                    ArrayList<String> tagsWithCont = new ArrayList<String>(tokenizer.countTokens());
                    toks.add(tokenizer.nextToken());
                    tagsWithCont.add("B-" + tag);
                    while (tokenizer.hasMoreTokens()) {
                        toks.add(tokenizer.nextToken());
                        tagsWithCont.add("I-" + tag);
                    }
                    if (contraction != null) {
                        con.addAll(Arrays.asList(new String[toks.size() - 1]));
                        con.add(contraction);
                    } else {
                        con.addAll(Arrays.asList(new String[toks.size()]));
                    }
                    sentence.addAll(toks);
                    tags.addAll(tagsWithCont);
                    prop.addAll(Arrays.asList(new String[toks.size()]));
                } else {
                    sentence.add(lexeme);
                    tags.add(tag);
                    prop.add(null);
                    con.add(contraction);
                }
            } else if (lexeme.contains("-") && lexeme.length() > 1) {
                Matcher matcher = hyphenPattern.matcher(lexeme);
                String firstTok = null;
                String hyphen = "-";
                String secondTok = null;
                String rest = null;
                if (matcher.matches()) {
                    if (matcher.group(1) != null) {
                        firstTok = matcher.group(2);
                    } else if (matcher.group(3) != null) {
                        secondTok = matcher.group(4);
                        rest = matcher.group(5);
                    } else if (matcher.group(6) != null) {
                        firstTok = matcher.group(7);
                        secondTok = matcher.group(8);
                        rest = matcher.group(9);
                    } else {
                        throw new IllegalStateException("wrong hyphen pattern");
                    }
                    if (!Strings.isNullOrEmpty((String)firstTok)) {
                        sentence.add(firstTok);
                        tags.add(tag);
                        prop.add(null);
                        con.add(contraction);
                    }
                    if (!Strings.isNullOrEmpty((String)hyphen)) {
                        sentence.add(hyphen);
                        tags.add("-");
                        prop.add(null);
                        con.add(contraction);
                    }
                    if (!Strings.isNullOrEmpty((String)secondTok)) {
                        sentence.add(secondTok);
                        tags.add(tag);
                        prop.add(null);
                        con.add(contraction);
                    }
                    if (!Strings.isNullOrEmpty((String)rest)) {
                        sentence.add(rest);
                        tags.add(tag);
                        prop.add(null);
                        con.add(contraction);
                    }
                } else {
                    sentence.add(lexeme);
                    tags.add(tag);
                    prop.add(null);
                    con.add(contraction);
                }
            } else {
                tag = this.addGender(tag, leaf.getMorphologicalTag());
                sentence.add(lexeme);
                tags.add(tag);
                prop.add(null);
                con.add(contraction);
            }
        }
    }

    private String addGender(String tag, String morphologicalTag) {
        if (("n".equals(tag) || "art".equals(tag)) && morphologicalTag != null && !GENDER_N.matcher(morphologicalTag).matches()) {
            if (GENDER_M.matcher(morphologicalTag).matches()) {
                tag = tag + "m";
            } else if (GENDER_F.matcher(morphologicalTag).matches()) {
                tag = tag + "f";
            }
        }
        return tag;
    }

    @Override
    public void reset() throws IOException, UnsupportedOperationException {
        this.adSentenceStream.reset();
    }

    @Override
    public void close() throws IOException {
        this.adSentenceStream.close();
    }
}

