/*
 * Decompiled with CFR 0.152.
 */
package edu.washington.cs.knowitall.normalization;

import com.google.common.collect.ImmutableList;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedExtraction;
import edu.washington.cs.knowitall.normalization.FieldNormalizer;
import edu.washington.cs.knowitall.normalization.NormalizedField;
import edu.washington.cs.knowitall.sequence.SequenceException;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import uk.ac.susx.informatics.Morpha;

public class VerbalRelationNormalizer
implements FieldNormalizer {
    private Morpha lexer = new Morpha((InputStream)new ByteArrayInputStream("".getBytes()));
    private boolean stripBeAdj = false;
    private HashSet<String> ignorePosTags = new HashSet();
    private HashSet<String> auxVerbs;

    public VerbalRelationNormalizer() {
        this.ignorePosTags.add("MD");
        this.ignorePosTags.add("DT");
        this.ignorePosTags.add("PDT");
        this.ignorePosTags.add("WDT");
        this.ignorePosTags.add("JJ");
        this.ignorePosTags.add("RB");
        this.ignorePosTags.add("PRP$");
        this.auxVerbs = new HashSet();
        this.auxVerbs.add("be");
        this.auxVerbs.add("have");
        this.auxVerbs.add("do");
    }

    public void stripBeAdj(boolean value) {
        this.stripBeAdj = value;
    }

    @Override
    public NormalizedField normalizeField(ChunkedExtraction field) {
        ImmutableList<String> tokens = field.getTokens();
        ImmutableList<String> posTags = field.getPosTags();
        ArrayList<String> tokensCopy = new ArrayList<String>(tokens.size());
        tokensCopy.addAll((Collection<String>)tokens);
        ArrayList<String> posTagsCopy = new ArrayList<String>(posTags.size());
        posTagsCopy.addAll((Collection<String>)posTags);
        this.normalizeModify(tokensCopy, posTagsCopy);
        try {
            return new NormalizedField(field, tokensCopy, posTagsCopy);
        }
        catch (SequenceException e) {
            String msg = String.format("tokens and posTags are not the same length for field %s", field);
            throw new IllegalStateException(msg, e);
        }
    }

    private void normalizeModify(List<String> tokens, List<String> posTags) {
        this.stemAll(tokens, posTags);
        this.removeIgnoredPosTags(tokens, posTags);
        this.removeLeadingBeHave(tokens, posTags);
    }

    private String stem(String token, String posTag) {
        token = token.toLowerCase();
        String wordTag = token + "_" + posTag;
        try {
            this.lexer.yyreset((Reader)new StringReader(wordTag));
            this.lexer.yybegin(4);
            String tokenNorm = this.lexer.next();
            return tokenNorm;
        }
        catch (Throwable e) {
            return token;
        }
    }

    private void stemAll(List<String> tokens, List<String> posTags) {
        for (int i = 0; i < tokens.size(); ++i) {
            String tok = tokens.get(i);
            String tag = posTags.get(i);
            String newTok = this.stem(tok, tag);
            tokens.set(i, newTok);
        }
    }

    private void removeIgnoredPosTags(List<String> tokens, List<String> posTags) {
        boolean noNoun = true;
        for (int j = 0; j < posTags.size(); ++j) {
            if (!posTags.get(j).startsWith("N")) continue;
            noNoun = false;
            break;
        }
        int i = 0;
        while (i < posTags.size()) {
            boolean keepAdj;
            String tag = posTags.get(i);
            boolean isAdj = tag.startsWith("J");
            boolean bl = keepAdj = isAdj && noNoun;
            if (this.ignorePosTags.contains(tag) && (!keepAdj || this.stripBeAdj)) {
                tokens.remove(i);
                posTags.remove(i);
                continue;
            }
            ++i;
        }
    }

    private void removeLeadingBeHave(List<String> tokens, List<String> posTags) {
        int i;
        int lastVerbIndex = -1;
        int n = tokens.size();
        for (i = 0; i < n; ++i) {
            String tag = posTags.get(n - i - 1);
            if (!tag.startsWith("V")) continue;
            lastVerbIndex = n - i - 1;
            break;
        }
        if (lastVerbIndex < 0) {
            return;
        }
        i = 0;
        while (i < lastVerbIndex) {
            String tok = tokens.get(i);
            if (i + 1 < posTags.size() && !posTags.get(i + 1).startsWith("V")) break;
            if (this.auxVerbs.contains(tok)) {
                tokens.remove(i);
                posTags.remove(i);
                --lastVerbIndex;
                continue;
            }
            ++i;
        }
    }
}

