/*
 * Decompiled with CFR 0.152.
 */
package cz.vutbr.fit.layout.text.taggers;

import cz.vutbr.fit.layout.api.AreaConcatenator;
import cz.vutbr.fit.layout.api.Parameter;
import cz.vutbr.fit.layout.impl.ParameterInt;
import cz.vutbr.fit.layout.impl.ParameterString;
import cz.vutbr.fit.layout.model.Area;
import cz.vutbr.fit.layout.model.Tag;
import cz.vutbr.fit.layout.model.TagOccurrence;
import cz.vutbr.fit.layout.text.TextFlowConcatenator;
import cz.vutbr.fit.layout.text.taggers.BaseTagger;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RegexpTagger
extends BaseTagger {
    private static final float YES = 0.6f;
    private static final float COULDBE = 0.1f;
    private static final float NO = 0.0f;
    private int minWords = 3;
    private int minWordLength = 3;
    private Pattern areaExpr = Pattern.compile("[A-Z0-9]");
    private Pattern mainExpr = Pattern.compile("[A-Z][A-Za-z\\s\\.\\:\\-\\p{Pd}]*");
    private Pattern contExpr = Pattern.compile("[A-Za-z\\s\\.\\:\\-\\p{Pd}]+");
    private List<String> blacklist;
    private AreaConcatenator concat = new TextFlowConcatenator();

    public RegexpTagger() {
        this.blacklist = new ArrayList<String>();
    }

    public String getId() {
        return "FITLayout.Tag.Regexp";
    }

    public String getName() {
        return "Regular expressions";
    }

    public String getDescription() {
        return "Tag by regular expressions";
    }

    public List<Parameter> defineParams() {
        List<ParameterInt> ret = List.of(new ParameterString("areaExpr", 0, 512), new ParameterString("mainExpr", 0, 512), new ParameterString("contExpr", 0, 512), new ParameterInt("minWords", 0, 100));
        return ret;
    }

    public String getAreaExpr() {
        return this.areaExpr.toString();
    }

    public void setAreaExpr(String areaExpr) {
        this.areaExpr = Pattern.compile(areaExpr);
    }

    public String getMainExpr() {
        return this.mainExpr.toString();
    }

    public void setMainExpr(String mainExpr) {
        this.mainExpr = Pattern.compile(mainExpr);
    }

    public String getContExpr() {
        return this.contExpr.toString();
    }

    public void setContExpr(String contExpr) {
        this.contExpr = Pattern.compile(contExpr);
    }

    public int getMinWords() {
        return this.minWords;
    }

    public void setMinWords(int minWords) {
        this.minWords = minWords;
    }

    public int getMinWordLength() {
        return this.minWordLength;
    }

    public void setMinWordLength(int minWordLength) {
        this.minWordLength = minWordLength;
    }

    public AreaConcatenator getConcatenator() {
        return this.concat;
    }

    public float belongsTo(Area node) {
        String text;
        if (this.isHomogeneous(node) && this.areaExpr.matcher(text = this.getText(node)).lookingAt()) {
            Matcher match = this.mainExpr.matcher(text);
            float ret = 0.0f;
            while (match.find()) {
                String s = match.group();
                String[] words = s.split("\\s+");
                if (this.containsBlacklistedWord(words)) continue;
                if (this.wordCount(words) >= this.minWords) {
                    ret = 0.6f;
                    continue;
                }
                ret = Math.max(ret, 0.1f);
            }
            return ret;
        }
        return 0.0f;
    }

    public boolean allowsContinuation(Area node) {
        String text;
        return node.isLeaf() && this.contExpr.matcher(text = node.getText().trim()).lookingAt();
    }

    public boolean allowsJoining() {
        return true;
    }

    public boolean mayCoexistWith(Tag other) {
        return true;
    }

    public List<TagOccurrence> extract(String src) {
        ArrayList<TagOccurrence> ret = new ArrayList<TagOccurrence>();
        Matcher match = this.mainExpr.matcher(src);
        while (match.find()) {
            TagOccurrence occ = new TagOccurrence(match.group(), match.start(), 0.1f);
            String[] words = occ.getText().split("\\s+");
            if (this.wordCount(words) >= this.minWords) {
                occ.setSupport(0.6f);
            }
            ret.add(occ);
        }
        return ret;
    }

    protected String getText(Area node) {
        String s = node.getText(this.getConcatenator()).trim();
        s = s.replaceAll("^[\\\"\\p{Pi}]+", "");
        s = s.replaceAll("[\\\"\\p{Pf}]+$", "");
        return s;
    }

    protected boolean containsBlacklistedWord(String[] words) {
        for (String w : words) {
            if (!this.blacklist.contains(w.toLowerCase())) continue;
            return true;
        }
        return false;
    }

    protected int wordCount(String[] words) {
        int cnt = 0;
        for (String w : words) {
            if (w.length() < this.minWordLength) continue;
            ++cnt;
        }
        return cnt;
    }
}

