/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev.dumpcheck;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.languagetool.Language;
import org.languagetool.dev.dumpcheck.CommonCrawlSentenceSource;
import org.languagetool.dev.dumpcheck.PlainTextSentenceSource;
import org.languagetool.dev.dumpcheck.Sentence;
import org.languagetool.dev.dumpcheck.SentenceSource;
import org.languagetool.dev.dumpcheck.TatoebaSentenceSource;
import org.languagetool.dev.dumpcheck.WikipediaSentenceSource;

public class MixingSentenceSource
extends SentenceSource {
    private final List<SentenceSource> sources;
    private final Map<String, Integer> sourceDistribution = new HashMap<String, Integer>();
    private int count;

    public static MixingSentenceSource create(List<String> dumpFileNames, Language language) throws IOException {
        return MixingSentenceSource.create(dumpFileNames, language, null);
    }

    public static MixingSentenceSource create(List<String> dumpFileNames, Language language, Pattern filter) throws IOException {
        ArrayList<SentenceSource> sources = new ArrayList<SentenceSource>();
        for (String dumpFileName : dumpFileNames) {
            File file = new File(dumpFileName);
            if (file.getName().endsWith(".xml")) {
                sources.add(new WikipediaSentenceSource(new FileInputStream(dumpFileName), language, filter));
                continue;
            }
            if (file.getName().startsWith("tatoeba-")) {
                sources.add(new TatoebaSentenceSource(new FileInputStream(dumpFileName), language, filter));
                continue;
            }
            if (file.getName().endsWith(".txt")) {
                sources.add(new PlainTextSentenceSource(new FileInputStream(dumpFileName), language, filter));
                continue;
            }
            if (file.getName().endsWith(".xz")) {
                sources.add(new CommonCrawlSentenceSource(new FileInputStream(dumpFileName), language, filter));
                continue;
            }
            throw new RuntimeException("Could not find a source handler for " + dumpFileName + " - Wikipedia files must be named '*.xml', Tatoeba files must be named 'tatoeba-*', CommonCrawl files '*.xz', plain text files '*.txt'");
        }
        return new MixingSentenceSource(sources, language);
    }

    private MixingSentenceSource(List<SentenceSource> sources, Language language) {
        super(language);
        this.sources = sources;
    }

    Map<String, Integer> getSourceDistribution() {
        return this.sourceDistribution;
    }

    @Override
    public boolean hasNext() {
        for (SentenceSource source : this.sources) {
            if (!source.hasNext()) continue;
            return true;
        }
        return false;
    }

    @Override
    public Sentence next() {
        SentenceSource sentenceSource = this.sources.get(this.count % this.sources.size());
        while (!sentenceSource.hasNext()) {
            this.sources.remove(sentenceSource);
            if (this.sources.isEmpty()) {
                throw new NoSuchElementException();
            }
            ++this.count;
            sentenceSource = this.sources.get(this.count % this.sources.size());
        }
        ++this.count;
        Sentence next = sentenceSource.next();
        this.updateDistributionMap(next);
        return next;
    }

    private void updateDistributionMap(Sentence next) {
        Integer prevCount = this.sourceDistribution.get(next.getSource());
        if (prevCount != null) {
            this.sourceDistribution.put(next.getSource(), prevCount + 1);
        } else {
            this.sourceDistribution.put(next.getSource(), 1);
        }
    }

    @Override
    public String getSource() {
        return StringUtils.join(this.sources, (String)", ");
    }
}

