/*
 * Decompiled with CFR 0.152.
 */
package eus.ixa.ixa.pipe.ml.tok;

import eus.ixa.ixa.pipe.ml.tok.Token;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Normalizer {
    public static final String THREE_DOTS = "...";
    public static final Pattern ellipsis = Pattern.compile("[\u2026\u8230]");
    public static final Pattern longDash = Pattern.compile("\u2013|[\u2014\u8212]");
    public static final Pattern oneFourth = Pattern.compile("\u00bc");
    public static final Pattern oneThird = Pattern.compile("\u2153");
    public static final Pattern oneHalf = Pattern.compile("\u00bd");
    public static final Pattern twoThirds = Pattern.compile("\u2154");
    public static final Pattern threeQuarters = Pattern.compile("\u00be");
    private static final Pattern cents = Pattern.compile("\u00a2");
    private static final Pattern sterling = Pattern.compile("\u00a3");
    public static final Pattern apostrophe = Pattern.compile("[''\u0092\u2019]");
    public static final Pattern leftSingleQuote = Pattern.compile("[\u0091\u201b\u2018\u2039]");
    public static final Pattern rightSingleQuote = Pattern.compile("['\u0092\u203a\u2019]");
    public static final Pattern leftDoubleQuote = Pattern.compile("[\u00ab\u0093\u201c]");
    public static final Pattern rightDoubleQuote = Pattern.compile("[\u00bb\u0094\u201d]");
    public static final Pattern singleAsciiQuote = Pattern.compile("'|'");
    public static final Pattern invertSingleAsciiQuote = Pattern.compile("([\\p{Alpha}])([^\\p{Space}])", 256);
    public static final Pattern doubleAsciiQuote = Pattern.compile("\"");
    public static final Pattern doubleAsciiQuoteAlphaNumeric = Pattern.compile("([\\p{Alpha}\\p{Digit}$])", 256);
    public static final String TO_ASCII_SINGLE_QUOTE = "['\u0091\u0092\u2019\u201a\u201b\u203a\u2018\u2039]";
    public static final Pattern toAsciiSingleQuote = Pattern.compile("['\u0091\u0092\u2019\u201a\u201b\u203a\u2018\u2039]");
    public static final Pattern toAsciiDoubleQuote = Pattern.compile("[\u00ab\u00bb\u0093\u0094\u201c\u201d\u201e\"]");

    private Normalizer() {
    }

    public static void convertNonCanonicalStrings(List<Token> sentence, String lang) {
        for (Token token : sentence) {
            token.setTokenValue(apostrophe.matcher(token.getTokenValue()).replaceAll("'"));
            token.setTokenValue(ellipsis.matcher(token.getTokenValue()).replaceAll(THREE_DOTS));
            token.setTokenValue(longDash.matcher(token.getTokenValue()).replaceAll("--"));
            if (lang.equalsIgnoreCase("en")) {
                token.setTokenValue(oneFourth.matcher(token.getTokenValue()).replaceAll("1\\\\/4"));
                token.setTokenValue(oneThird.matcher(token.getTokenValue()).replaceAll("1\\\\/3"));
                token.setTokenValue(oneHalf.matcher(token.getTokenValue()).replaceAll("1\\\\/2"));
                token.setTokenValue(threeQuarters.matcher(token.getTokenValue()).replaceAll("3\\\\/4"));
                token.setTokenValue(sterling.matcher(token.getTokenValue()).replaceAll("#"));
            }
            token.setTokenValue(oneFourth.matcher(token.getTokenValue()).replaceAll("1/4"));
            token.setTokenValue(oneThird.matcher(token.getTokenValue()).replaceAll("1/3"));
            token.setTokenValue(oneHalf.matcher(token.getTokenValue()).replaceAll("1/2"));
            token.setTokenValue(twoThirds.matcher(token.getTokenValue()).replaceAll("2/3"));
            token.setTokenValue(threeQuarters.matcher(token.getTokenValue()).replaceAll("3/4"));
            token.setTokenValue(cents.matcher(token.getTokenValue()).replaceAll("cents"));
        }
    }

    public static void normalizeQuotes(List<Token> sentence, String lang) {
        for (Token token : sentence) {
            if (lang.equalsIgnoreCase("en")) {
                token.setTokenValue(leftSingleQuote.matcher(token.getTokenValue()).replaceAll("`"));
                token.setTokenValue(rightSingleQuote.matcher(token.getTokenValue()).replaceAll("'"));
                token.setTokenValue(leftDoubleQuote.matcher(token.getTokenValue()).replaceAll("``"));
                token.setTokenValue(rightDoubleQuote.matcher(token.getTokenValue()).replaceAll("''"));
                continue;
            }
            if (!lang.equalsIgnoreCase("ca") && !lang.equalsIgnoreCase("de") && !lang.equalsIgnoreCase("es") && !lang.equalsIgnoreCase("eu") && !lang.equalsIgnoreCase("fr") && !lang.equalsIgnoreCase("gl") && !lang.equalsIgnoreCase("it") && !lang.equalsIgnoreCase("nl") && !lang.equalsIgnoreCase("pt") && !lang.equalsIgnoreCase("ru")) continue;
            token.setTokenValue(toAsciiSingleQuote.matcher(token.getTokenValue()).replaceAll("'"));
            token.setTokenValue(toAsciiDoubleQuote.matcher(token.getTokenValue()).replaceAll("\""));
        }
    }

    public static void normalizeDoubleQuotes(List<Token> sentence, String lang) {
        boolean isLeft = true;
        for (int i = 0; i < sentence.size(); ++i) {
            if (!lang.equalsIgnoreCase("en")) continue;
            Matcher doubleAsciiQuoteMatcher = doubleAsciiQuote.matcher(sentence.get(i).getTokenValue());
            Matcher singleAsciiQuoteMatcher = singleAsciiQuote.matcher(sentence.get(i).getTokenValue());
            if (doubleAsciiQuoteMatcher.find()) {
                if (isLeft && i < sentence.size() - 1 && doubleAsciiQuoteAlphaNumeric.matcher(sentence.get(i + 1).getTokenValue()).find()) {
                    sentence.get(i).setTokenValue("``");
                    isLeft = false;
                    continue;
                }
                if (isLeft) continue;
                sentence.get(i).setTokenValue("''");
                isLeft = true;
                continue;
            }
            if (!singleAsciiQuoteMatcher.find() || i >= sentence.size() - 2 || !sentence.get(i + 1).getTokenValue().matches("[A-Za-z]") || !sentence.get(i + 2).getTokenValue().matches("[^ \t\n\r\u00a0\u00b6]")) continue;
            sentence.get(i).setTokenValue("`");
        }
    }
}

