/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev.wikipedia;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.languagetool.Language;
import org.languagetool.TextFilter;
import org.languagetool.dev.index.Indexer;
import org.languagetool.dev.wikipedia.SwebleWikipediaTextFilter;
import org.languagetool.dev.wikipedia.TextFilterTools;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class WikipediaIndexHandler
extends DefaultHandler {
    public static final String MAX_DOC_COUNT_VALUE = "maxDocCountValue";
    public static final String MAX_DOC_COUNT_FIELD = "maxDocCount";
    public static final String MAX_DOC_COUNT_FIELD_VAL = "1";
    private final Indexer indexer;
    private int articleCount = 0;
    private int start = 0;
    private int end = 0;
    private boolean inText = false;
    private boolean inTitle = false;
    private StringBuilder text = new StringBuilder();
    private StringBuilder title = new StringBuilder();
    private TextFilter textFilter = new SwebleWikipediaTextFilter();

    public WikipediaIndexHandler(Directory dir, Language language, int start, int end) {
        this.indexer = new Indexer(dir, language);
        this.start = start;
        this.end = end;
        if (start > end && end != 0) {
            throw new RuntimeException("\"start\" should be smaller than \"end\": " + start + ", " + end);
        }
        this.textFilter = TextFilterTools.getTextFilter(language);
    }

    @Override
    public void startElement(String namespaceURI, String lName, String qName, Attributes attrs) throws SAXException {
        if (qName.equals("title")) {
            this.inTitle = true;
        } else if (qName.equals("text")) {
            this.inText = true;
        }
    }

    @Override
    public void endElement(String namespaceURI, String sName, String qName) {
        if (qName.equals("title")) {
            this.inTitle = false;
        } else if (qName.equals("text")) {
            System.out.println(++this.articleCount + ": " + this.title);
            String tempTitle = this.title.toString();
            this.title = new StringBuilder();
            if (this.articleCount < this.start) {
                return;
            }
            if (this.articleCount >= this.end && this.end != 0) {
                throw new DocumentLimitReachedException(this.end);
            }
            try {
                try {
                    String textToCheck = this.textFilter.filter(this.text.toString());
                    if (!textToCheck.contains("#REDIRECT") && !textToCheck.trim().equals("")) {
                        this.indexer.index(textToCheck, false, this.articleCount);
                    }
                }
                catch (Exception e) {
                    System.err.println("Exception when filtering/indexing '" + tempTitle + "' (" + this.articleCount + ") - skipping file. Stacktrace follows:");
                    e.printStackTrace();
                }
            }
            catch (Exception e) {
                throw new RuntimeException("Failed checking article '" + tempTitle + "' (" + this.articleCount + ")", e);
            }
        }
        this.text = new StringBuilder();
        this.inText = false;
    }

    @Override
    public void characters(char[] buf, int offset, int len) {
        String s = new String(buf, offset, len);
        if (this.inText) {
            this.text.append(s);
        } else if (this.inTitle) {
            this.title.append(s);
        }
    }

    public void close() throws Exception {
        this.indexer.close();
    }

    private void writeMetaDocuments() throws IOException {
        Document doc = new Document();
        doc.add((IndexableField)new StringField(MAX_DOC_COUNT_FIELD, MAX_DOC_COUNT_FIELD_VAL, Field.Store.YES));
        doc.add((IndexableField)new StringField(MAX_DOC_COUNT_VALUE, this.articleCount + "", Field.Store.YES));
        this.indexer.add(doc);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void main(String ... args) throws Exception {
        if (args.length != 4) {
            System.out.println("Usage: " + WikipediaIndexHandler.class.getSimpleName() + " <wikipediaDump> <indexDir> <languageCode> <maxDocs>");
            System.out.println("\t<wikipediaDump> a Wikipedia XML dump");
            System.out.println("\t<indexDir> directory where Lucene index will be written to, existing index content will be removed");
            System.out.println("\t<languageCode> short code like en for English, de for German etc");
            System.out.println("\t<maxDocs> maximum number of documents to be indexed, use 0 for no limit");
            System.exit(1);
        }
        File dumpFile = new File(args[0]);
        File indexDir = new File(args[1]);
        String languageCode = args[2];
        int maxDocs = Integer.parseInt(args[3]);
        Language language = Language.getLanguageForShortName((String)languageCode);
        if (maxDocs == 0) {
            System.out.println("Going to index all documents from " + dumpFile);
        } else {
            System.out.println("Going to index up to " + maxDocs + " documents from " + dumpFile);
        }
        System.out.println("Output index dir: " + indexDir);
        long start = System.currentTimeMillis();
        SAXParserFactory factory = SAXParserFactory.newInstance();
        SAXParser saxParser = factory.newSAXParser();
        FSDirectory fsDirectory = FSDirectory.open((File)indexDir);
        try {
            WikipediaIndexHandler handler = new WikipediaIndexHandler((Directory)fsDirectory, language, 1, maxDocs);
            try {
                saxParser.parse((InputStream)new FileInputStream(dumpFile), (DefaultHandler)handler);
            }
            catch (DocumentLimitReachedException e) {
                System.out.println("Document limit (" + e.limit + ") reached, stopping indexing");
            }
            finally {
                handler.writeMetaDocuments();
                handler.close();
            }
        }
        finally {
            fsDirectory.close();
        }
        long end = System.currentTimeMillis();
        float minutes = (float)(end - start) / 60000.0f;
        System.out.printf("Indexing took %.2f minutes\n", Float.valueOf(minutes));
    }

    private class DocumentLimitReachedException
    extends RuntimeException {
        int limit;

        DocumentLimitReachedException(int limit) {
            this.limit = limit;
        }
    }
}

