/*
 * Decompiled with CFR 0.152.
 */
package chalk.tools.tokenize;

import chalk.tools.dictionary.Dictionary;
import chalk.tools.tokenize.AbstractTokenizer;
import chalk.tools.tokenize.TokSpanEventStream;
import chalk.tools.tokenize.TokenContextGenerator;
import chalk.tools.tokenize.TokenSample;
import chalk.tools.tokenize.TokenizerFactory;
import chalk.tools.tokenize.TokenizerModel;
import chalk.tools.tokenize.WhitespaceTokenizer;
import chalk.tools.tokenize.lang.Factory;
import chalk.tools.util.ObjectStream;
import chalk.tools.util.Span;
import chalk.tools.util.TrainingParameters;
import chalk.tools.util.model.ModelUtil;
import java.io.IOException;
import java.io.ObjectStreamException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import nak.model.AbstractModel;
import nak.model.EventStream;
import nak.model.MaxentModel;
import nak.model.TrainUtil;

public class TokenizerME
extends AbstractTokenizer {
    public static final String SPLIT = "T";
    public static final String NO_SPLIT = "F";
    @Deprecated
    public static final Pattern alphaNumeric = Pattern.compile("^[A-Za-z0-9]+$");
    private final Pattern alphanumeric;
    private MaxentModel model;
    private final TokenContextGenerator cg;
    private boolean useAlphaNumericOptimization;
    private List<Double> tokProbs;
    private List<Span> newTokens;

    public TokenizerME(TokenizerModel tokenizerModel) {
        TokenizerFactory tokenizerFactory = tokenizerModel.getFactory();
        this.alphanumeric = tokenizerFactory.getAlphaNumericPattern();
        this.cg = tokenizerFactory.getContextGenerator();
        this.model = tokenizerModel.getMaxentModel();
        this.useAlphaNumericOptimization = tokenizerFactory.isUseAlphaNumericOptmization();
        this.newTokens = new ArrayList<Span>();
        this.tokProbs = new ArrayList<Double>(50);
    }

    public TokenizerME(TokenizerModel tokenizerModel, Factory factory) {
        String string = tokenizerModel.getLanguage();
        this.alphanumeric = factory.getAlphanumeric(string);
        this.cg = factory.createTokenContextGenerator(string, TokenizerME.getAbbreviations(tokenizerModel.getAbbreviations()));
        this.model = tokenizerModel.getMaxentModel();
        this.useAlphaNumericOptimization = tokenizerModel.useAlphaNumericOptimization();
        this.newTokens = new ArrayList<Span>();
        this.tokProbs = new ArrayList<Double>(50);
    }

    private static Set<String> getAbbreviations(Dictionary dictionary) {
        if (dictionary == null) {
            return Collections.emptySet();
        }
        return dictionary.asStringSet();
    }

    public double[] getTokenProbabilities() {
        double[] dArray = new double[this.tokProbs.size()];
        for (int i = 0; i < dArray.length; ++i) {
            dArray[i] = this.tokProbs.get(i);
        }
        return dArray;
    }

    @Override
    public Span[] tokenizePos(String string) {
        Span[] spanArray = WhitespaceTokenizer.INSTANCE.tokenizePos(string);
        this.newTokens.clear();
        this.tokProbs.clear();
        for (Span span : spanArray) {
            String string2 = string.substring(span.getStart(), span.getEnd());
            if (string2.length() < 2) {
                this.newTokens.add(span);
                this.tokProbs.add(1.0);
                continue;
            }
            if (this.useAlphaNumericOptimization() && this.alphanumeric.matcher(string2).matches()) {
                this.newTokens.add(span);
                this.tokProbs.add(1.0);
                continue;
            }
            int n = span.getStart();
            int n2 = span.getEnd();
            int n3 = span.getStart();
            double d = 1.0;
            for (int i = n3 + 1; i < n2; ++i) {
                double[] dArray = this.model.eval(this.cg.getContext(string2, i - n3));
                String string3 = this.model.getBestOutcome(dArray);
                d *= dArray[this.model.getIndex(string3)];
                if (!string3.equals(SPLIT)) continue;
                this.newTokens.add(new Span(n, i));
                this.tokProbs.add(d);
                n = i;
                d = 1.0;
            }
            this.newTokens.add(new Span(n, n2));
            this.tokProbs.add(d);
        }
        Span[] spanArray2 = new Span[this.newTokens.size()];
        this.newTokens.toArray(spanArray2);
        return spanArray2;
    }

    public static TokenizerModel train(ObjectStream<TokenSample> objectStream, TokenizerFactory tokenizerFactory, TrainingParameters trainingParameters) throws IOException {
        HashMap<String, String> hashMap = new HashMap<String, String>();
        TokSpanEventStream tokSpanEventStream = new TokSpanEventStream(objectStream, tokenizerFactory.isUseAlphaNumericOptmization(), tokenizerFactory.getAlphaNumericPattern(), tokenizerFactory.getContextGenerator());
        AbstractModel abstractModel = TrainUtil.train((EventStream)tokSpanEventStream, trainingParameters.getSettings(), hashMap);
        return new TokenizerModel(abstractModel, hashMap, tokenizerFactory);
    }

    public static TokenizerModel train(String string, ObjectStream<TokenSample> objectStream, boolean bl, TrainingParameters trainingParameters) throws IOException {
        return TokenizerME.train(string, objectStream, null, bl, trainingParameters);
    }

    public static TokenizerModel train(String string, ObjectStream<TokenSample> objectStream, Dictionary dictionary, boolean bl, TrainingParameters trainingParameters) throws IOException {
        Factory factory = new Factory();
        HashMap<String, String> hashMap = new HashMap<String, String>();
        TokSpanEventStream tokSpanEventStream = new TokSpanEventStream(objectStream, bl, factory.getAlphanumeric(string), factory.createTokenContextGenerator(string, TokenizerME.getAbbreviations(dictionary)));
        AbstractModel abstractModel = TrainUtil.train((EventStream)tokSpanEventStream, trainingParameters.getSettings(), hashMap);
        return new TokenizerModel(string, abstractModel, dictionary, bl, hashMap);
    }

    @Deprecated
    public static TokenizerModel train(String string, ObjectStream<TokenSample> objectStream, boolean bl, int n, int n2) throws IOException {
        return TokenizerME.train(string, objectStream, bl, ModelUtil.createTrainingParameters(n2, n));
    }

    public static TokenizerModel train(String string, ObjectStream<TokenSample> objectStream, boolean bl) throws IOException, ObjectStreamException {
        return TokenizerME.train(string, objectStream, bl, 5, 100);
    }

    public boolean useAlphaNumericOptimization() {
        return this.useAlphaNumericOptimization;
    }
}

