/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev.dumpcheck;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;
import org.languagetool.Language;
import org.languagetool.dev.dumpcheck.Sentence;
import org.languagetool.dev.dumpcheck.SentenceSource;
import org.languagetool.dev.wikipedia.SwebleWikipediaTextFilter;
import org.languagetool.tokenizers.Tokenizer;

class WikipediaSentenceSource
extends SentenceSource {
    private static final boolean ONLY_ARTICLES = false;
    private static final String ARTICLE_NAMESPACE = "0";
    private final SwebleWikipediaTextFilter textFilter = new SwebleWikipediaTextFilter();
    private final XMLEventReader reader;
    private final Tokenizer sentenceTokenizer;
    private final List<WikipediaSentence> sentences;
    private final Language language;
    private int articleCount = 0;
    private int namespaceSkipCount = 0;
    private int redirectSkipCount = 0;

    WikipediaSentenceSource(InputStream xmlInput, Language language) {
        super(language);
        this.textFilter.enableMapping(false);
        try {
            XMLInputFactory factory = XMLInputFactory.newInstance();
            this.reader = factory.createXMLEventReader(xmlInput);
            this.sentenceTokenizer = language.getSentenceTokenizer();
            this.sentences = new ArrayList<WikipediaSentence>();
            this.language = language;
        }
        catch (XMLStreamException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public boolean hasNext() {
        try {
            this.fillSentences();
        }
        catch (XMLStreamException e) {
            throw new RuntimeException(e);
        }
        return this.sentences.size() > 0;
    }

    @Override
    public Sentence next() {
        try {
            this.fillSentences();
            if (this.sentences.size() == 0) {
                throw new NoSuchElementException();
            }
            WikipediaSentence wikiSentence = this.sentences.remove(0);
            String url = "http://" + this.language.getShortName() + ".wikipedia.org/wiki/" + wikiSentence.title;
            return new Sentence(wikiSentence.sentence, this.getSource(), wikiSentence.title, url, wikiSentence.articleCount);
        }
        catch (XMLStreamException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public String getSource() {
        return "wikipedia";
    }

    private void fillSentences() throws XMLStreamException {
        String title = null;
        String namespace = null;
        while (this.sentences.size() == 0 && this.reader.hasNext()) {
            String elementName;
            XMLEvent event = this.reader.nextEvent();
            if (event.getEventType() != 1) continue;
            switch (elementName = event.asStartElement().getName().getLocalPart()) {
                case "title": {
                    event = this.reader.nextEvent();
                    title = event.asCharacters().getData();
                    ++this.articleCount;
                    break;
                }
                case "ns": {
                    event = this.reader.nextEvent();
                    namespace = event.asCharacters().getData();
                    break;
                }
                case "text": {
                    this.handleTextElement(namespace, title, this.articleCount);
                }
            }
        }
    }

    private void handleTextElement(String namespace, String title, int articleCount) throws XMLStreamException {
        XMLEvent event = this.reader.nextEvent();
        StringBuilder sb = new StringBuilder();
        while (event.isCharacters()) {
            sb.append(event.asCharacters().getData());
            event = this.reader.nextEvent();
        }
        try {
            if (sb.toString().trim().toLowerCase().startsWith("#redirect")) {
                ++this.redirectSkipCount;
                return;
            }
            String textToCheck = this.textFilter.filter(sb.toString()).getPlainText();
            for (String sentence : this.sentenceTokenizer.tokenize(textToCheck)) {
                if (!this.acceptSentence(sentence)) continue;
                this.sentences.add(new WikipediaSentence(sentence, title, articleCount));
            }
        }
        catch (Exception e) {
            System.err.println("Could not extract text, skipping document: " + e.toString() + ", full stacktrace follows:");
            e.printStackTrace();
        }
    }

    private class WikipediaSentence {
        final String sentence;
        final String title;
        final int articleCount;

        WikipediaSentence(String sentence, String title, int articleCount) {
            this.sentence = sentence;
            this.title = title;
            this.articleCount = articleCount;
        }
    }
}

