/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.language;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.tika.exception.TikaException;

@Deprecated
public class LanguageProfilerBuilder {
    static final int ABSOLUTE_MIN_NGRAM_LENGTH = 3;
    static final int ABSOLUTE_MAX_NGRAM_LENGTH = 3;
    static final int DEFAULT_MIN_NGRAM_LENGTH = 3;
    static final int DEFAULT_MAX_NGRAM_LENGTH = 3;
    static final String FILE_EXTENSION = "ngp";
    static final int MAX_SIZE = 1000;
    static final char SEPARATOR = '_';
    private static final String SEP_CHARSEQ = new String(new char[]{'_'});
    private String name = null;
    private List<NGramEntry> sorted = null;
    private int minLength = 3;
    private int maxLength = 3;
    private int[] ngramcounts = null;
    private Map<CharSequence, NGramEntry> ngrams = null;
    private QuickStringBuffer word = new QuickStringBuffer();

    public LanguageProfilerBuilder(String name, int minlen, int maxlen) {
        this.ngrams = new HashMap<CharSequence, NGramEntry>(4000);
        this.minLength = minlen;
        this.maxLength = maxlen;
        this.name = name;
    }

    public LanguageProfilerBuilder(String name) {
        this.ngrams = new HashMap<CharSequence, NGramEntry>(4000);
        this.minLength = 3;
        this.maxLength = 3;
        this.name = name;
    }

    public String getName() {
        return this.name;
    }

    public void add(StringBuffer word) {
        for (int i2 = this.minLength; i2 <= this.maxLength && i2 < word.length(); ++i2) {
            this.add(word, i2);
        }
    }

    private void add(QuickStringBuffer word) {
        int wlen = word.length();
        if (wlen >= this.minLength) {
            int max2 = Math.min(this.maxLength, wlen);
            for (int i2 = this.minLength; i2 <= max2; ++i2) {
                this.add(word.subSequence(wlen - i2, wlen));
            }
        }
    }

    private void add(CharSequence cs) {
        if (cs.equals(SEP_CHARSEQ)) {
            return;
        }
        NGramEntry nge = this.ngrams.get(cs);
        if (nge == null) {
            nge = new NGramEntry(cs);
            this.ngrams.put(cs, nge);
        }
        nge.inc();
    }

    public void analyze(StringBuilder text) {
        if (this.ngrams != null) {
            this.ngrams.clear();
            this.sorted = null;
            this.ngramcounts = null;
        }
        this.word.clear().append('_');
        for (int i2 = 0; i2 < text.length(); ++i2) {
            char c = Character.toLowerCase(text.charAt(i2));
            if (Character.isLetter(c)) {
                this.add(this.word.append(c));
                continue;
            }
            if (this.word.length() <= 1) continue;
            this.add(this.word.append('_'));
            this.word.clear().append('_');
        }
        if (this.word.length() > 1) {
            this.add(this.word.append('_'));
        }
        this.normalize();
    }

    private void add(StringBuffer word, int n) {
        for (int i2 = 0; i2 <= word.length() - n; ++i2) {
            this.add(word.subSequence(i2, i2 + n));
        }
    }

    protected void normalize() {
        NGramEntry e22 = null;
        Iterator<NGramEntry> i2 = this.ngrams.values().iterator();
        if (this.ngramcounts == null) {
            this.ngramcounts = new int[this.maxLength + 1];
            while (i2.hasNext()) {
                e22 = i2.next();
                int n = e22.size();
                this.ngramcounts[n] = this.ngramcounts[n] + e22.count;
            }
        }
        for (NGramEntry e22 : this.ngrams.values()) {
            e22.frequency = (float)e22.count / (float)this.ngramcounts[e22.size()];
        }
    }

    public List<NGramEntry> getSorted() {
        if (this.sorted == null) {
            this.sorted = new ArrayList<NGramEntry>(this.ngrams.values());
            Collections.sort(this.sorted);
            if (this.sorted.size() > 1000) {
                this.sorted = this.sorted.subList(0, 1000);
            }
        }
        return this.sorted;
    }

    public String toString() {
        StringBuffer s2 = new StringBuffer().append("NGramProfile: ").append(this.name).append("\n");
        for (NGramEntry entry : this.getSorted()) {
            s2.append("[").append(entry.seq).append("/").append(entry.count).append("/").append(entry.frequency).append("]\n");
        }
        return s2.toString();
    }

    public float getSimilarity(LanguageProfilerBuilder another) throws TikaException {
        float sum = 0.0f;
        try {
            for (NGramEntry other : another.getSorted()) {
                if (this.ngrams.containsKey(other.seq)) {
                    sum += Math.abs(other.frequency - this.ngrams.get(other.seq).frequency) / 2.0f;
                    continue;
                }
                sum += other.frequency;
            }
            for (NGramEntry other : this.getSorted()) {
                if (another.ngrams.containsKey(other.seq)) {
                    sum += Math.abs(other.frequency - another.ngrams.get(other.seq).frequency) / 2.0f;
                    continue;
                }
                sum += other.frequency;
            }
        }
        catch (Exception e2) {
            throw new TikaException("Could not calculate a score how well NGramProfiles match each other");
        }
        return sum;
    }

    public void load(InputStream is) throws IOException {
        this.ngrams.clear();
        this.ngramcounts = new int[this.maxLength + 1];
        BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
        String line = null;
        while ((line = reader.readLine()) != null) {
            int spacepos;
            String ngramsequence;
            int len;
            if (line.charAt(0) == '#' || (len = (ngramsequence = line.substring(0, spacepos = line.indexOf(32)).trim()).length()) < this.minLength || len > this.maxLength) continue;
            int ngramcount = Integer.parseInt(line.substring(spacepos + 1));
            NGramEntry en = new NGramEntry(ngramsequence, ngramcount);
            this.ngrams.put(en.getSeq(), en);
            int n = len;
            this.ngramcounts[n] = this.ngramcounts[n] + ngramcount;
        }
        this.normalize();
    }

    public static LanguageProfilerBuilder create(String name, InputStream is, String encoding) throws TikaException {
        LanguageProfilerBuilder newProfile = new LanguageProfilerBuilder(name, 3, 3);
        BufferedInputStream bis = new BufferedInputStream(is);
        byte[] buffer = new byte[4096];
        StringBuilder text = new StringBuilder();
        try {
            int len;
            while ((len = bis.read(buffer)) != -1) {
                text.append(new String(buffer, 0, len, encoding));
            }
        }
        catch (IOException e2) {
            throw new TikaException("Could not create profile, " + e2.getMessage());
        }
        newProfile.analyze(text);
        return newProfile;
    }

    public void save(OutputStream os) throws IOException {
        int i2;
        os.write(("# NgramProfile generated at " + new Date() + " for Apache Tika Language Identification\n").getBytes(StandardCharsets.UTF_8));
        ArrayList list = new ArrayList();
        List sublist = new ArrayList<NGramEntry>();
        NGramEntry[] entries = this.ngrams.values().toArray(new NGramEntry[this.ngrams.size()]);
        for (i2 = this.minLength; i2 <= this.maxLength; ++i2) {
            for (int j = 0; j < entries.length; ++j) {
                if (entries[j].getSeq().length() != i2) continue;
                sublist.add(entries[j]);
            }
            Collections.sort(sublist);
            if (sublist.size() > 1000) {
                sublist = sublist.subList(0, 1000);
            }
            list.addAll(sublist);
            sublist.clear();
        }
        for (i2 = 0; i2 < list.size(); ++i2) {
            NGramEntry e2 = (NGramEntry)list.get(i2);
            String line = e2.toString() + " " + e2.getCount() + "\n";
            os.write(line.getBytes(StandardCharsets.UTF_8));
        }
        os.flush();
    }

    public static void main(String[] args) {
        String usage = "Usage: NGramProfile [-create profilename filename encoding] [-similarity file1 file2] [-score profile-name filename encoding]";
        int command = 0;
        boolean CREATE = true;
        int SIMILARITY = 2;
        int SCORE = 3;
        String profilename = "";
        String filename = "";
        String filename2 = "";
        String encoding = "";
        if (args.length == 0) {
            System.err.println(usage);
            System.exit(-1);
        }
        for (int i2 = 0; i2 < args.length; ++i2) {
            if (args[i2].equals("-create")) {
                command = 1;
                profilename = args[++i2];
                filename = args[++i2];
                encoding = args[++i2];
            }
            if (args[i2].equals("-similarity")) {
                command = 2;
                filename = args[++i2];
                filename2 = args[++i2];
                encoding = args[++i2];
            }
            if (!args[i2].equals("-score")) continue;
            command = 3;
            profilename = args[++i2];
            filename = args[++i2];
            encoding = args[++i2];
        }
        try {
            switch (command) {
                case 1: {
                    File f = new File(filename);
                    FileInputStream fis = new FileInputStream(f);
                    LanguageProfilerBuilder newProfile = LanguageProfilerBuilder.create(profilename, fis, encoding);
                    fis.close();
                    f = new File(profilename + "." + FILE_EXTENSION);
                    FileOutputStream fos = new FileOutputStream(f);
                    newProfile.save(fos);
                    System.out.println("new profile " + profilename + "." + FILE_EXTENSION + " was created.");
                    break;
                }
                case 2: {
                    File f = new File(filename);
                    FileInputStream fis = new FileInputStream(f);
                    LanguageProfilerBuilder newProfile = LanguageProfilerBuilder.create(filename, fis, encoding);
                    newProfile.normalize();
                    f = new File(filename2);
                    fis = new FileInputStream(f);
                    LanguageProfilerBuilder newProfile2 = LanguageProfilerBuilder.create(filename2, fis, encoding);
                    newProfile2.normalize();
                    System.out.println("Similarity is " + newProfile.getSimilarity(newProfile2));
                    break;
                }
                case 3: {
                    File f = new File(filename);
                    FileInputStream fis = new FileInputStream(f);
                    LanguageProfilerBuilder newProfile = LanguageProfilerBuilder.create(filename, fis, encoding);
                    f = new File(profilename + "." + FILE_EXTENSION);
                    fis = new FileInputStream(f);
                    LanguageProfilerBuilder compare = new LanguageProfilerBuilder(profilename, 3, 3);
                    compare.load(fis);
                    System.out.println("Score is " + compare.getSimilarity(newProfile));
                }
            }
        }
        catch (Exception e2) {
            e2.printStackTrace();
        }
    }

    private static class QuickStringBuffer
    implements CharSequence {
        private char[] value;
        private int count;

        QuickStringBuffer() {
            this(16);
        }

        QuickStringBuffer(char[] value) {
            this.value = value;
            this.count = value.length;
        }

        QuickStringBuffer(int length) {
            this.value = new char[length];
        }

        QuickStringBuffer(String str2) {
            this(str2.length() + 16);
            this.append(str2);
        }

        @Override
        public int length() {
            return this.count;
        }

        private void expandCapacity(int minimumCapacity) {
            int newCapacity = (this.value.length + 1) * 2;
            if (newCapacity < 0) {
                newCapacity = Integer.MAX_VALUE;
            } else if (minimumCapacity > newCapacity) {
                newCapacity = minimumCapacity;
            }
            char[] newValue = new char[newCapacity];
            System.arraycopy(this.value, 0, newValue, 0, this.count);
            this.value = newValue;
        }

        QuickStringBuffer clear() {
            this.count = 0;
            return this;
        }

        @Override
        public char charAt(int index) {
            return this.value[index];
        }

        QuickStringBuffer append(String str2) {
            int len;
            int newcount;
            if (str2 == null) {
                str2 = String.valueOf(str2);
            }
            if ((newcount = this.count + (len = str2.length())) > this.value.length) {
                this.expandCapacity(newcount);
            }
            str2.getChars(0, len, this.value, this.count);
            this.count = newcount;
            return this;
        }

        QuickStringBuffer append(char c) {
            int newcount = this.count + 1;
            if (newcount > this.value.length) {
                this.expandCapacity(newcount);
            }
            this.value[this.count++] = c;
            return this;
        }

        @Override
        public CharSequence subSequence(int start, int end) {
            return new String(this.value, start, end - start);
        }

        @Override
        public String toString() {
            return new String(this.value);
        }
    }

    static class NGramEntry
    implements Comparable<NGramEntry> {
        private LanguageProfilerBuilder profile = null;
        CharSequence seq = null;
        private int count = 0;
        private float frequency = 0.0f;

        public NGramEntry(CharSequence seq2) {
            this.seq = seq2;
        }

        public NGramEntry(String seq2, int count) {
            this.seq = new StringBuffer(seq2).subSequence(0, seq2.length());
            this.count = count;
        }

        public int getCount() {
            return this.count;
        }

        public float getFrequency() {
            return this.frequency;
        }

        public CharSequence getSeq() {
            return this.seq;
        }

        public int size() {
            return this.seq.length();
        }

        @Override
        public int compareTo(NGramEntry ngram) {
            int diff = Float.compare(ngram.getFrequency(), this.frequency);
            if (diff != 0) {
                return diff;
            }
            return this.toString().compareTo(ngram.toString());
        }

        public void inc() {
            ++this.count;
        }

        public void setProfile(LanguageProfilerBuilder profile2) {
            this.profile = profile2;
        }

        public LanguageProfilerBuilder getProfile() {
            return this.profile;
        }

        public String toString() {
            return this.seq.toString();
        }

        public int hashCode() {
            return this.seq.hashCode();
        }

        public boolean equals(Object obj) {
            NGramEntry ngram = null;
            try {
                ngram = (NGramEntry)obj;
                return ngram.seq.equals(this.seq);
            }
            catch (Exception e2) {
                return false;
            }
        }
    }
}

