/*
 * Decompiled with CFR 0.152.
 */
package eus.ixa.ixa.pipe.ml.formats;

import eus.ixa.ixa.pipe.ml.sequence.SequenceLabelSample;
import eus.ixa.ixa.pipe.ml.utils.Span;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.StringUtil;

public class CoNLL03Format
implements ObjectStream<SequenceLabelSample> {
    public static final String DOCSTART = "-DOCSTART-";
    private final ObjectStream<String> lineStream;
    private final String clearFeatures;

    public CoNLL03Format(String resetFeatures, ObjectStream<String> lineStream) {
        this.clearFeatures = resetFeatures;
        this.lineStream = lineStream;
    }

    public CoNLL03Format(String resetFeatures, InputStreamFactory in) throws IOException {
        this.clearFeatures = resetFeatures;
        try {
            this.lineStream = new PlainTextByLineStream(in, "UTF-8");
            System.setOut(new PrintStream((OutputStream)System.out, true, "UTF-8"));
        }
        catch (UnsupportedEncodingException e) {
            throw new IllegalStateException(e);
        }
    }

    @Override
    public SequenceLabelSample read() throws IOException {
        String line;
        ArrayList<String> tokens = new ArrayList<String>();
        ArrayList<String> seqTypes = new ArrayList<String>();
        boolean isClearAdaptiveData = false;
        while ((line = this.lineStream.read()) != null && !StringUtil.isEmpty(line)) {
            if (this.clearFeatures.equalsIgnoreCase("docstart") && line.startsWith(DOCSTART)) {
                isClearAdaptiveData = true;
                String emptyLine = this.lineStream.read();
                if (StringUtil.isEmpty(emptyLine)) continue;
                throw new IOException("Empty line after -DOCSTART- not empty: '" + emptyLine + "'!");
            }
            String[] fields = line.split("\t");
            if (fields.length == 2) {
                tokens.add(fields[0]);
                seqTypes.add(fields[1]);
                continue;
            }
            throw new IOException("Expected two fields per line in training data, got " + fields.length + " for line '" + line + "'!");
        }
        if (this.clearFeatures.equalsIgnoreCase("yes")) {
            isClearAdaptiveData = true;
        }
        if (tokens.size() > 0) {
            ArrayList<Span> sequences = new ArrayList<Span>();
            int beginIndex = -1;
            int endIndex = -1;
            for (int i = 0; i < seqTypes.size(); ++i) {
                String seqTag = (String)seqTypes.get(i);
                if (seqTag.equals("O")) {
                    if (beginIndex == -1) continue;
                    sequences.add(CoNLL03Format.extract(beginIndex, endIndex, (String)seqTypes.get(beginIndex)));
                    beginIndex = -1;
                    endIndex = -1;
                    continue;
                }
                if (seqTag.startsWith("B-")) {
                    if (beginIndex != -1) {
                        sequences.add(CoNLL03Format.extract(beginIndex, endIndex, (String)seqTypes.get(beginIndex)));
                    }
                    beginIndex = i;
                    endIndex = i + 1;
                    continue;
                }
                if (seqTag.startsWith("I-")) {
                    if (beginIndex == -1) {
                        beginIndex = i;
                        endIndex = i + 1;
                        continue;
                    }
                    if (!seqTag.endsWith(((String)seqTypes.get(beginIndex)).substring(1))) {
                        sequences.add(CoNLL03Format.extract(beginIndex, endIndex, (String)seqTypes.get(beginIndex)));
                        beginIndex = i;
                        endIndex = i + 1;
                        continue;
                    }
                    ++endIndex;
                    continue;
                }
                throw new IOException("Invalid tag: " + seqTag);
            }
            if (beginIndex != -1) {
                sequences.add(CoNLL03Format.extract(beginIndex, endIndex, (String)seqTypes.get(beginIndex)));
            }
            return new SequenceLabelSample(tokens.toArray(new String[tokens.size()]), sequences.toArray(new Span[sequences.size()]), isClearAdaptiveData);
        }
        if (line != null) {
            return this.read();
        }
        return null;
    }

    static final Span extract(int begin, int end, String beginTag) throws InvalidFormatException {
        String type = beginTag.substring(2);
        return new Span(begin, end, type);
    }

    @Override
    public void reset() throws IOException, UnsupportedOperationException {
        this.lineStream.reset();
    }

    @Override
    public void close() throws IOException {
        this.lineStream.close();
    }
}

