/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev.dumpcheck;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.dev.dumpcheck.DocumentLimitReachedException;
import org.languagetool.dev.dumpcheck.MixingSentenceSource;
import org.languagetool.dev.dumpcheck.Sentence;
import org.languagetool.dev.index.Indexer;
import org.xml.sax.helpers.DefaultHandler;

public class SentenceSourceIndexer
extends DefaultHandler
implements AutoCloseable {
    public static final String MAX_DOC_COUNT_VALUE = "maxDocCountValue";
    public static final String MAX_DOC_COUNT_FIELD = "maxDocCount";
    public static final String MAX_DOC_COUNT_FIELD_VAL = "1";
    private final Indexer indexer;
    private final int maxSentences;
    private int sentenceCount = 0;

    SentenceSourceIndexer(Directory dir, Language language, int maxSentences, Analyzer analyzer) {
        this.indexer = analyzer == null ? new Indexer(dir, language) : new Indexer(dir, language, analyzer);
        this.maxSentences = maxSentences;
    }

    SentenceSourceIndexer(Directory dir, Language language, int maxSentences) {
        this.indexer = new Indexer(dir, language);
        this.maxSentences = maxSentences;
    }

    @Override
    public void close() throws Exception {
        this.indexer.close();
    }

    private void run(List<String> dumpFileNames, Language language) throws IOException {
        MixingSentenceSource mixingSource = MixingSentenceSource.create(dumpFileNames, language);
        while (mixingSource.hasNext()) {
            Sentence sentence = mixingSource.next();
            if (this.sentenceCount % 1000 == 0) {
                System.out.println("Indexing sentence #" + this.sentenceCount + " (" + mixingSource.getSourceDistribution() + "):");
                System.out.println("  [" + sentence.getSource() + "] " + sentence);
            }
            this.indexer.indexSentence(sentence, this.sentenceCount);
            ++this.sentenceCount;
            if (this.maxSentences <= 0 || this.sentenceCount < this.maxSentences) continue;
            throw new DocumentLimitReachedException(this.maxSentences);
        }
    }

    private void writeMetaDocuments() throws IOException {
        Document doc = new Document();
        doc.add((IndexableField)new StringField(MAX_DOC_COUNT_FIELD, MAX_DOC_COUNT_FIELD_VAL, Field.Store.YES));
        doc.add((IndexableField)new StringField(MAX_DOC_COUNT_VALUE, this.sentenceCount + "", Field.Store.YES));
        this.indexer.add(doc);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void main(String ... args) throws Exception {
        StandardAnalyzer analyzer;
        if (args.length != 5) {
            System.out.println("Usage: " + SentenceSourceIndexer.class.getSimpleName() + " <dataFile...> <indexDir> <languageCode> <maxSentences> <indexPosTags>");
            System.out.println("\t<dataFiles> comma-separated list of a Wikipedia XML dump (*.xml) and/or Tatoeba files (tatoeba-*)");
            System.out.println("\t<indexDir> directory where Lucene index will be written to, existing index content will be removed");
            System.out.println("\t<languageCode> short code like en for English, de for German etc");
            System.out.println("\t<maxSentences> maximum number of sentences to be indexed, use 0 for no limit");
            System.out.println("\t<indexPosTags> 1 to also index POS tags (i.e. analyze text by LT), 0 to index only the plain text");
            System.exit(1);
        }
        List<String> dumpFilesNames = Arrays.asList(args[0].split(","));
        File indexDir = new File(args[1]);
        String languageCode = args[2];
        int maxSentences = Integer.parseInt(args[3]);
        Language language = Languages.getLanguageForShortCode((String)languageCode);
        if (maxSentences == 0) {
            System.out.println("Going to index contents from " + dumpFilesNames);
        } else {
            System.out.println("Going to index up to " + maxSentences + " sentences from " + dumpFilesNames);
        }
        System.out.println("Output index dir: " + indexDir);
        long start = System.currentTimeMillis();
        String indexPos = args[4];
        if (indexPos.equals(MAX_DOC_COUNT_FIELD_VAL)) {
            analyzer = null;
        } else if (indexPos.equals("0")) {
            analyzer = new StandardAnalyzer(new CharArraySet(Collections.emptyList(), false));
        } else {
            throw new IllegalArgumentException("Unknown value '" + indexPos + "' for indexPosTags parameter, use 0 or 1");
        }
        try (FSDirectory fsDirectory = FSDirectory.open((Path)indexDir.toPath());
             SentenceSourceIndexer indexer = new SentenceSourceIndexer((Directory)fsDirectory, language, maxSentences, (Analyzer)analyzer);){
            try {
                indexer.run(dumpFilesNames, language);
            }
            catch (DocumentLimitReachedException e) {
                System.out.println("Sentence limit (" + e.getLimit() + ") reached, stopping indexing");
            }
            finally {
                indexer.writeMetaDocuments();
            }
            if (analyzer != null) {
                analyzer.close();
            }
        }
        long end = System.currentTimeMillis();
        float minutes = (float)(end - start) / 60000.0f;
        System.out.printf("Indexing took %.2f minutes\n", Float.valueOf(minutes));
    }
}

