/*
 * Decompiled with CFR 0.152.
 */
package org.fit.layout.classify.taggers;

import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.fit.layout.classify.Tagger;
import org.fit.layout.classify.TextTag;
import org.fit.layout.model.Area;
import org.fit.layout.model.Tag;

public class TitleTagger
implements Tagger {
    protected final int MIN_WORDS = 3;
    protected Pattern areaexpr = Pattern.compile("[A-Z0-9]");
    protected Pattern titleexpr = Pattern.compile("[A-Z][A-Za-z\\s\\.\\:\\-\\p{Pd}]*");
    protected Pattern contexpr = Pattern.compile("[A-Za-z\\s\\.\\:\\-\\p{Pd}]+");
    protected Vector<String> blacklist = new Vector();

    public TitleTagger() {
        this.blacklist.add("session");
        this.blacklist.add("chair");
    }

    @Override
    public TextTag getTag() {
        return new TextTag("title", this);
    }

    @Override
    public double getRelevance() {
        return 0.6;
    }

    @Override
    public boolean belongsTo(Area node) {
        String text;
        if (node.isLeaf() && this.areaexpr.matcher(text = this.getText(node)).lookingAt()) {
            Matcher match = this.titleexpr.matcher(text);
            while (match.find()) {
                String s = match.group();
                String[] words = s.split("\\s+");
                if (words.length < 3 || this.containsBlacklistedWord(words)) continue;
                return true;
            }
            return false;
        }
        return false;
    }

    @Override
    public boolean allowsContinuation(Area node) {
        String text;
        return node.isLeaf() && this.contexpr.matcher(text = node.getText().trim()).lookingAt();
    }

    @Override
    public boolean allowsJoining() {
        return true;
    }

    @Override
    public boolean mayCoexistWith(Tag other) {
        return !other.getValue().equals("session");
    }

    @Override
    public Vector<String> extract(String src) {
        Vector<String> ret = new Vector<String>();
        Matcher match = this.titleexpr.matcher(src);
        while (match.find()) {
            String s = match.group();
            String[] words = s.split("\\s+");
            if (words.length < 3) continue;
            ret.add(s);
        }
        return ret;
    }

    protected String getText(Area node) {
        String s = node.getText().trim();
        s = s.replaceAll("^[\\\"\\p{Pi}]+", "");
        s = s.replaceAll("[\\\"\\p{Pf}]+$", "");
        return s;
    }

    protected boolean containsBlacklistedWord(String[] words) {
        for (String w : words) {
            if (!this.blacklist.contains(w.toLowerCase())) continue;
            return true;
        }
        return false;
    }
}

