/*
 * Decompiled with CFR 0.152.
 */
package chalk.tools.formats.ad;

import chalk.tools.formats.ad.ADSentenceStream;
import chalk.tools.formats.ad.PortugueseContractionUtility;
import chalk.tools.namefind.NameSample;
import chalk.tools.util.ObjectStream;
import chalk.tools.util.PlainTextByLineStream;
import chalk.tools.util.Span;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ADNameSampleStream
implements ObjectStream<NameSample> {
    private static final Pattern tagPattern = Pattern.compile("<(NER:)?(.*?)>");
    private static final Pattern whitespacePattern = Pattern.compile("\\s+");
    private static final Pattern underlinePattern = Pattern.compile("[_]+");
    private static final Pattern hyphenPattern = Pattern.compile("((\\p{L}+)-$)|(^-(\\p{L}+)(.*))|((\\p{L}+)-(\\p{L}+)(.*))");
    private static final Pattern alphanumericPattern = Pattern.compile("^[\\p{L}\\p{Nd}]+$");
    private static final Map<String, String> HAREM;
    private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
    private String leftContractionPart = null;
    private final boolean splitHyphenatedTokens;
    int textID = -1;
    private Type corpusType = null;
    private Pattern metaPattern;
    private int textIdMeta2 = -1;
    private String textMeta2 = "";

    public ADNameSampleStream(ObjectStream<String> objectStream, boolean bl) {
        this.adSentenceStream = new ADSentenceStream(objectStream);
        this.splitHyphenatedTokens = bl;
    }

    public ADNameSampleStream(InputStream inputStream, String string, boolean bl) {
        try {
            this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(inputStream, string));
            this.splitHyphenatedTokens = bl;
        }
        catch (UnsupportedEncodingException unsupportedEncodingException) {
            throw new IllegalStateException(unsupportedEncodingException);
        }
    }

    @Override
    public NameSample read() throws IOException {
        ADSentenceStream.Sentence sentence = this.adSentenceStream.read();
        if (sentence != null) {
            int n = this.getTextID(sentence);
            boolean bl = false;
            if (n != this.textID) {
                bl = true;
                this.textID = n;
            }
            ADSentenceStream.SentenceParser.Node node = sentence.getRoot();
            ArrayList<String> arrayList = new ArrayList<String>();
            ArrayList<Span> arrayList2 = new ArrayList<Span>();
            this.process(node, arrayList, arrayList2);
            return new NameSample(arrayList.toArray(new String[arrayList.size()]), arrayList2.toArray(new Span[arrayList2.size()]), bl);
        }
        return null;
    }

    private void process(ADSentenceStream.SentenceParser.Node node, List<String> list, List<Span> list2) {
        if (node != null) {
            for (ADSentenceStream.SentenceParser.TreeElement treeElement : node.getElements()) {
                if (treeElement.isLeaf()) {
                    this.processLeaf((ADSentenceStream.SentenceParser.Leaf)treeElement, list, list2);
                    continue;
                }
                this.process((ADSentenceStream.SentenceParser.Node)treeElement, list, list2);
            }
        }
    }

    private void processLeaf(ADSentenceStream.SentenceParser.Leaf leaf, List<String> list, List<Span> list2) {
        String[] stringArray;
        String string;
        boolean bl = false;
        if (this.leftContractionPart != null) {
            string = leaf.getLexeme();
            String string2 = PortugueseContractionUtility.toContraction(this.leftContractionPart, string);
            if (string2 != null) {
                stringArray = whitespacePattern.split(string2);
                list.addAll(Arrays.asList(stringArray));
                bl = true;
            } else {
                list.add(this.leftContractionPart);
            }
            this.leftContractionPart = null;
        }
        string = null;
        int n = -1;
        stringArray = leaf.getSecondaryTag();
        boolean bl2 = false;
        if (stringArray != null) {
            if (stringArray.contains("<sam->") && !bl) {
                String[] stringArray2 = underlinePattern.split(leaf.getLexeme());
                if (stringArray2.length > 1) {
                    list.addAll(Arrays.asList(stringArray2).subList(0, stringArray2.length - 1));
                }
                this.leftContractionPart = stringArray2[stringArray2.length - 1];
                return;
            }
            if (stringArray.contains("<NER2>")) {
                bl2 = true;
            }
            string = ADNameSampleStream.getNER((String)stringArray);
        }
        if (string != null) {
            n = list.size();
        }
        if (!bl) {
            list.addAll(this.processLexeme(leaf.getLexeme()));
        }
        if (string != null) {
            list2.add(new Span(n, list.size(), string));
        }
        if (bl2) {
            int n2 = list2.size() - 1;
            Span span = null;
            boolean bl3 = false;
            if (list2.size() > 0) {
                span = list2.get(n2);
                if (span.getEnd() == list.size() - 1) {
                    list2.set(n2, new Span(span.getStart(), list.size(), span.getType()));
                } else {
                    bl3 = true;
                }
            } else {
                bl3 = true;
            }
            if (bl3) {
                // empty if block
            }
        }
    }

    private List<String> processLexeme(String string) {
        String[] stringArray;
        ArrayList<String> arrayList = new ArrayList<String>();
        for (String string2 : stringArray = underlinePattern.split(string)) {
            if (string2.length() > 1 && !alphanumericPattern.matcher(string2).matches()) {
                arrayList.addAll(this.processTok(string2));
                continue;
            }
            arrayList.add(string2);
        }
        return arrayList;
    }

    private List<String> processTok(String string) {
        char c;
        boolean bl = false;
        String string2 = string;
        ArrayList<String> arrayList = new ArrayList<String>();
        LinkedList<String> linkedList = new LinkedList<String>();
        char c2 = string.charAt(0);
        if (c2 == '\u00ab') {
            arrayList.add(Character.toString(c2));
            string = string.substring(1);
        }
        if ((c = string.charAt(string.length() - 1)) == '\u00bb' || c == ':' || c == ',' || c == '!') {
            linkedList.add(Character.toString(c));
            string = string.substring(0, string.length() - 1);
        }
        if (this.splitHyphenatedTokens && string.contains("-") && string.length() > 1) {
            Matcher matcher = hyphenPattern.matcher(string);
            String string3 = null;
            String string4 = "-";
            String string5 = null;
            String string6 = null;
            if (matcher.matches()) {
                if (matcher.group(1) != null) {
                    string3 = matcher.group(2);
                } else if (matcher.group(3) != null) {
                    string5 = matcher.group(4);
                    string6 = matcher.group(5);
                } else if (matcher.group(6) != null) {
                    string3 = matcher.group(7);
                    string5 = matcher.group(8);
                    string6 = matcher.group(9);
                }
                this.addIfNotEmpty(string3, arrayList);
                this.addIfNotEmpty(string4, arrayList);
                this.addIfNotEmpty(string5, arrayList);
                this.addIfNotEmpty(string6, arrayList);
                bl = true;
            }
        }
        if (!bl) {
            if (!string2.equals(string) && string.length() > 1 && !alphanumericPattern.matcher(string).matches()) {
                arrayList.addAll(this.processTok(string));
            } else {
                arrayList.add(string);
            }
        }
        arrayList.addAll(linkedList);
        return arrayList;
    }

    private void addIfNotEmpty(String string, List<String> list) {
        if (string != null && string.length() > 0) {
            list.addAll(this.processTok(string));
        }
    }

    private static String getNER(String string) {
        String[] stringArray;
        if (string.contains("<NER2>")) {
            return null;
        }
        for (String string2 : stringArray = string.split("\\s+")) {
            String string3;
            Matcher matcher = tagPattern.matcher(string2);
            if (!matcher.matches() || !HAREM.containsKey(string3 = matcher.group(2))) continue;
            return HAREM.get(string3);
        }
        return null;
    }

    @Override
    public void reset() throws IOException, UnsupportedOperationException {
        this.adSentenceStream.reset();
    }

    @Override
    public void close() throws IOException {
        this.adSentenceStream.close();
    }

    private int getTextID(ADSentenceStream.Sentence sentence) {
        String string = sentence.getMetadata();
        if (this.corpusType == null) {
            if (string.startsWith("LIT")) {
                this.corpusType = Type.lit;
                this.metaPattern = Pattern.compile("^([a-zA-Z\\-]+)(\\d+).*?p=(\\d+).*");
            } else if (string.startsWith("CIE")) {
                this.corpusType = Type.cie;
                this.metaPattern = Pattern.compile("^.*?source=\"(.*?)\".*");
            } else {
                this.corpusType = Type.ama;
                this.metaPattern = Pattern.compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*");
            }
        }
        if (this.corpusType.equals((Object)Type.lit)) {
            Matcher matcher = this.metaPattern.matcher(string);
            if (matcher.matches()) {
                String string2 = matcher.group(1);
                if (!string2.equals(this.textMeta2)) {
                    ++this.textIdMeta2;
                    this.textMeta2 = string2;
                }
                return this.textIdMeta2;
            }
            throw new RuntimeException("Invalid metadata: " + string);
        }
        if (this.corpusType.equals((Object)Type.cie)) {
            Matcher matcher = this.metaPattern.matcher(string);
            if (matcher.matches()) {
                String string3 = matcher.group(1);
                if (!string3.equals(this.textMeta2)) {
                    ++this.textIdMeta2;
                    this.textMeta2 = string3;
                }
                return this.textIdMeta2;
            }
            throw new RuntimeException("Invalid metadata: " + string);
        }
        if (this.corpusType.equals((Object)Type.ama)) {
            Matcher matcher = this.metaPattern.matcher(string);
            if (matcher.matches()) {
                return Integer.parseInt(matcher.group(1));
            }
            throw new RuntimeException("Invalid metadata: " + string);
        }
        return 0;
    }

    static {
        HashMap<String, String> hashMap = new HashMap<String, String>();
        hashMap.put("hum", "person");
        hashMap.put("official", "person");
        hashMap.put("member", "person");
        hashMap.put("admin", "organization");
        hashMap.put("org", "organization");
        hashMap.put("inst", "organization");
        hashMap.put("media", "organization");
        hashMap.put("party", "organization");
        hashMap.put("suborg", "organization");
        hashMap.put("groupind", "group");
        hashMap.put("groupofficial", "group");
        hashMap.put("top", "place");
        hashMap.put("civ", "place");
        hashMap.put("address", "place");
        hashMap.put("site", "place");
        hashMap.put("virtual", "place");
        hashMap.put("astro", "place");
        hashMap.put("occ", "event");
        hashMap.put("event", "event");
        hashMap.put("history", "event");
        hashMap.put("tit", "artprod");
        hashMap.put("pub", "artprod");
        hashMap.put("product", "artprod");
        hashMap.put("V", "artprod");
        hashMap.put("artwork", "artprod");
        hashMap.put("brand", "abstract");
        hashMap.put("genre", "abstract");
        hashMap.put("school", "abstract");
        hashMap.put("idea", "abstract");
        hashMap.put("plan", "abstract");
        hashMap.put("author", "abstract");
        hashMap.put("absname", "abstract");
        hashMap.put("disease", "abstract");
        hashMap.put("object", "thing");
        hashMap.put("common", "thing");
        hashMap.put("mat", "thing");
        hashMap.put("class", "thing");
        hashMap.put("plant", "thing");
        hashMap.put("currency", "thing");
        hashMap.put("date", "time");
        hashMap.put("hour", "time");
        hashMap.put("period", "time");
        hashMap.put("cyclic", "time");
        hashMap.put("quantity", "numeric");
        hashMap.put("prednum", "numeric");
        hashMap.put("currency", "numeric");
        HAREM = Collections.unmodifiableMap(hashMap);
    }

    static enum Type {
        ama,
        cie,
        lit;

    }
}

