package org.structr.files.text;

import java.io.IOException;
import java.io.Writer;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.lang.StringUtils;
import org.apache.tika.language.LanguageIdentifier;
import org.structr.core.Services;
import org.structr.core.app.StructrApp;

/* loaded from: input_file:org/structr/files/text/FulltextTokenizer.class */
public class FulltextTokenizer extends Writer {
    private static final Logger logger = Logger.getLogger(FulltextTokenizer.class.getName());
    public static final Set<Character> SpecialChars = new LinkedHashSet();
    private String fileName;
    private final int wordCountLimit = Services.parseInt(StructrApp.getConfigurationValue("application.filesystem.indexing.limit"), 50000);
    private final int wordMinLength = Services.parseInt(StructrApp.getConfigurationValue("application.filesystem.indexing.word.minlength"), 4);
    private final int wordMaxLength = Services.parseInt(StructrApp.getConfigurationValue("application.filesystem.indexing.word.maxlength"), 40);
    private final StringBuilder rawText = new StringBuilder();
    private final StringBuilder wordBuffer = new StringBuilder();
    private final Set<String> words = new LinkedHashSet();
    private String language = "de";
    private char lastCharacter = 0;
    private int consecutiveCharCount = 0;
    private int wordCount = 0;

    public FulltextTokenizer(String str) {
        this.fileName = null;
        this.fileName = str;
    }

    @Override // java.io.Writer
    public void write(char[] cArr, int i, int i2) throws IOException {
        if (this.wordCount < this.wordCountLimit) {
            int min = Math.min(i + i2, cArr.length);
            for (int i3 = i; i3 < min; i3++) {
                char c = cArr[i3];
                if (c == this.lastCharacter) {
                    int i4 = this.consecutiveCharCount;
                    this.consecutiveCharCount = i4 + 1;
                    if (i4 >= 10) {
                    }
                } else {
                    this.consecutiveCharCount = 0;
                }
                if (Character.isAlphabetic(c) || Character.isDigit(c) || SpecialChars.contains(Character.valueOf(c))) {
                    this.wordBuffer.append(c);
                    this.rawText.append(c);
                } else {
                    flush();
                    if (Character.isWhitespace(c)) {
                        this.rawText.append(c);
                    } else {
                        this.rawText.append(" ");
                    }
                }
                this.lastCharacter = c;
            }
        }
    }

    public String getLanguage() {
        return this.language;
    }

    public String getRawText() {
        return this.rawText.toString();
    }

    public Set<String> getWords() {
        return this.words;
    }

    @Override // java.io.Writer, java.io.Flushable
    public void flush() throws IOException {
        String trim = this.wordBuffer.toString().trim();
        if (StringUtils.isNotBlank(trim)) {
            if (!trim.contains(".") && !trim.contains(",")) {
                addWord(trim.toLowerCase());
            } else if (trim.matches("[\\-0-9\\.,]+")) {
                addWord(trim);
            } else {
                for (String str : trim.split("[\\.,]+")) {
                    String trim2 = str.trim();
                    if (StringUtils.isNotBlank(trim2)) {
                        addWord(trim2.toLowerCase());
                    }
                }
            }
        }
        this.wordBuffer.setLength(0);
    }

    @Override // java.io.Writer, java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        flush();
        LanguageIdentifier languageIdentifier = new LanguageIdentifier(this.rawText.toString());
        if (languageIdentifier.isReasonablyCertain()) {
            this.language = languageIdentifier.getLanguage();
        }
    }

    public int getWordCount() {
        return this.wordCount;
    }

    private void addWord(String str) {
        int length = str.length();
        if (length < this.wordMinLength || length > this.wordMaxLength) {
            return;
        }
        this.words.add(str);
        this.wordCount++;
        if (this.wordCount > this.wordCountLimit) {
            logger.log(Level.INFO, "Indexing word count of {0} reached for {1}, no more words will be indexed. Set {2} in structr.conf to increase this limit.", new Object[]{Integer.valueOf(this.wordCountLimit), this.fileName, "application.filesystem.indexing.limit"});
        }
    }

    static {
        SpecialChars.add('_');
        SpecialChars.add((char) 228);
        SpecialChars.add((char) 246);
        SpecialChars.add((char) 252);
        SpecialChars.add((char) 196);
        SpecialChars.add((char) 214);
        SpecialChars.add((char) 220);
        SpecialChars.add((char) 223);
        SpecialChars.add((char) 167);
        SpecialChars.add('-');
        SpecialChars.add('%');
        SpecialChars.add('/');
        SpecialChars.add('@');
        SpecialChars.add('$');
        SpecialChars.add((char) 8364);
        SpecialChars.add((char) 230);
        SpecialChars.add((char) 162);
        SpecialChars.add('.');
        SpecialChars.add(',');
        SpecialChars.add('\'');
        SpecialChars.add('\"');
        SpecialChars.add('`');
    }
}
