/*
 * Decompiled with CFR 0.152.
 */
package edu.washington.cs.knowitall.normalization;

import edu.washington.cs.knowitall.nlp.extraction.ChunkedExtraction;
import edu.washington.cs.knowitall.normalization.FieldNormalizer;
import edu.washington.cs.knowitall.normalization.NormalizedField;
import edu.washington.cs.knowitall.sequence.SequenceException;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.List;
import uk.ac.susx.informatics.Morpha;

public class HeadNounNormalizer
implements FieldNormalizer {
    private static Morpha lexer;

    public HeadNounNormalizer() {
        lexer = new Morpha((InputStream)new ByteArrayInputStream("".getBytes()));
    }

    @Override
    public NormalizedField normalizeField(ChunkedExtraction field) {
        NormalizedField norm;
        boolean containsProperNoun = false;
        int lastNounIndex = -1;
        for (int i = 0; i < field.getLength(); ++i) {
            String tag = field.getPosTag(i);
            if (tag.equals("NNP") || tag.equals("NNPS")) {
                containsProperNoun = true;
            }
            if (!tag.startsWith("N")) continue;
            lastNounIndex = i;
        }
        if (containsProperNoun || lastNounIndex == -1) {
            try {
                norm = new NormalizedField(field, (List<String>)field.getTokens(), (List<String>)field.getPosTags());
            }
            catch (SequenceException e) {
                String msg = String.format("tokens and posTags are not the same length for field %s", field);
                throw new IllegalStateException(msg, e);
            }
        }
        String token = field.getToken(lastNounIndex);
        String posTag = field.getPosTag(lastNounIndex);
        String normToken = this.stem(token, posTag);
        String[] tokens = new String[]{normToken};
        String[] posTags = new String[]{posTag};
        try {
            norm = new NormalizedField(field, tokens, posTags);
        }
        catch (SequenceException e) {
            String msg = String.format("tokens and posTags are not the same length for field %s", field);
            throw new IllegalStateException(msg, e);
        }
        return norm;
    }

    private String stem(String token, String posTag) {
        token = token.toLowerCase();
        String wordTag = token + "_" + posTag;
        try {
            lexer.yyreset((Reader)new StringReader(wordTag));
            lexer.yybegin(4);
            String tokenNorm = lexer.next();
            return tokenNorm;
        }
        catch (Throwable e) {
            return token;
        }
    }
}

