/*
 * Decompiled with CFR 0.152.
 */
package org.fit.layout.classify.taggers;

import java.util.ArrayList;
import java.util.List;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.fit.layout.classify.TextTag;
import org.fit.layout.classify.taggers.BaseTagger;
import org.fit.layout.model.Area;
import org.fit.layout.model.Tag;

public class TitleTagger
extends BaseTagger {
    private static final float YES = 0.6f;
    private static final float COULDBE = 0.1f;
    private static final float NO = 0.0f;
    protected final int MIN_WORDS = 3;
    protected Pattern areaexpr = Pattern.compile("[A-Z0-9]");
    protected Pattern titleexpr = Pattern.compile("[A-Z][A-Za-z\\s\\.\\:\\-\\p{Pd}]*");
    protected Pattern contexpr = Pattern.compile("[A-Za-z\\s\\.\\:\\-\\p{Pd}]+");
    protected Vector<String> blacklist = new Vector();

    public TitleTagger() {
        this.blacklist.add("session");
        this.blacklist.add("chair");
    }

    public String getId() {
        return "FITLayout.Tag.Title";
    }

    public String getName() {
        return "Titles";
    }

    public String getDescription() {
        return "General paper or news titles";
    }

    @Override
    public TextTag getTag() {
        return new TextTag("title", this);
    }

    @Override
    public float belongsTo(Area node) {
        String text;
        if (node.isLeaf() && this.areaexpr.matcher(text = this.getText(node)).lookingAt()) {
            Matcher match = this.titleexpr.matcher(text);
            float ret = 0.0f;
            while (match.find()) {
                String s = match.group();
                String[] words = s.split("\\s+");
                if (this.containsBlacklistedWord(words)) continue;
                if (words.length >= 3) {
                    ret = 0.6f;
                    continue;
                }
                ret = Math.max(ret, 0.1f);
            }
            return ret;
        }
        return 0.0f;
    }

    @Override
    public boolean allowsContinuation(Area node) {
        String text;
        return node.isLeaf() && this.contexpr.matcher(text = node.getText().trim()).lookingAt();
    }

    @Override
    public boolean allowsJoining() {
        return true;
    }

    @Override
    public boolean mayCoexistWith(Tag other) {
        return !other.getValue().equals("session");
    }

    @Override
    public List<String> extract(String src) {
        Vector<String> ret = new Vector<String>();
        Matcher match = this.titleexpr.matcher(src);
        while (match.find()) {
            String s = match.group();
            String[] words = s.split("\\s+");
            if (words.length < 3) continue;
            ret.add(s);
        }
        return ret;
    }

    @Override
    public List<String> split(String src) {
        ArrayList<String> ret = new ArrayList<String>(1);
        ret.add(src);
        return ret;
    }

    protected String getText(Area node) {
        String s = node.getText().trim();
        s = s.replaceAll("^[\\\"\\p{Pi}]+", "");
        s = s.replaceAll("[\\\"\\p{Pf}]+$", "");
        return s;
    }

    protected boolean containsBlacklistedWord(String[] words) {
        for (String w : words) {
            if (!this.blacklist.contains(w.toLowerCase())) continue;
            return true;
        }
        return false;
    }
}

