/*
 * Decompiled with CFR 0.152.
 */
package eus.ixa.ixa.pipe.ml.sequence;

import eus.ixa.ixa.pipe.ml.utils.Span;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.tokenize.WhitespaceTokenizer;

public class SequenceLabelSample {
    private final String id;
    private final List<String> tokens;
    private final List<Span> sequences;
    private final String[][] additionalContext;
    private final boolean isClearAdaptiveData;
    public static final String DEFAULT_TYPE = "default";
    private static final Pattern START_TAG_PATTERN = Pattern.compile("<START(:([^:>\\s]*))?>");

    public SequenceLabelSample(String id, String[] tokens, Span[] sequences, String[][] additionalContext, boolean clearAdaptiveData) {
        this.id = id;
        if (tokens == null) {
            throw new IllegalArgumentException("sentence must not be null!");
        }
        if (sequences == null) {
            sequences = new Span[]{};
        }
        this.tokens = Collections.unmodifiableList(new ArrayList<String>(Arrays.asList(tokens)));
        this.sequences = Collections.unmodifiableList(new ArrayList<Span>(Arrays.asList(sequences)));
        if (additionalContext != null) {
            this.additionalContext = new String[additionalContext.length][];
            for (int i = 0; i < additionalContext.length; ++i) {
                this.additionalContext[i] = new String[additionalContext[i].length];
                System.arraycopy(additionalContext[i], 0, this.additionalContext[i], 0, additionalContext[i].length);
            }
        } else {
            this.additionalContext = null;
        }
        this.isClearAdaptiveData = clearAdaptiveData;
    }

    public SequenceLabelSample(String[] tokens, Span[] sequences, String[][] additionalContext, boolean clearAdaptiveData) {
        this(null, tokens, sequences, additionalContext, clearAdaptiveData);
    }

    public SequenceLabelSample(String[] tokens, Span[] sequences, boolean clearAdaptiveData) {
        this(tokens, sequences, null, clearAdaptiveData);
    }

    public String getId() {
        return this.id;
    }

    public String[] getTokens() {
        return this.tokens.toArray(new String[this.tokens.size()]);
    }

    public Span[] getSequences() {
        return this.sequences.toArray(new Span[this.sequences.size()]);
    }

    public String[][] getAdditionalContext() {
        return this.additionalContext;
    }

    public boolean isClearAdaptiveDataSet() {
        return this.isClearAdaptiveData;
    }

    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj instanceof SequenceLabelSample) {
            SequenceLabelSample a = (SequenceLabelSample)obj;
            return Arrays.equals(this.getTokens(), a.getTokens()) && Arrays.equals(this.getSequences(), a.getSequences()) && Arrays.equals((Object[])this.getAdditionalContext(), (Object[])a.getAdditionalContext()) && this.isClearAdaptiveDataSet() == a.isClearAdaptiveDataSet();
        }
        return false;
    }

    public String toString() {
        StringBuilder result = new StringBuilder();
        if (this.isClearAdaptiveDataSet()) {
            result.append("\n");
        }
        for (int tokenIndex = 0; tokenIndex < this.tokens.size(); ++tokenIndex) {
            for (Span sequence : this.sequences) {
                if (sequence.getStart() == tokenIndex) {
                    if (sequence.getType() == null) {
                        result.append("<START>").append(' ');
                    } else {
                        result.append("<START:").append(sequence.getType()).append("> ");
                    }
                }
                if (sequence.getEnd() != tokenIndex) continue;
                result.append("<END>").append(' ');
            }
            result.append(this.tokens.get(tokenIndex)).append(' ');
        }
        if (this.tokens.size() > 1) {
            result.setLength(result.length() - 1);
        }
        for (Span name : this.sequences) {
            if (name.getEnd() != this.tokens.size()) continue;
            result.append(' ').append("<END>");
        }
        return result.toString();
    }

    private static String errorTokenWithContext(String[] sentence, int index) {
        StringBuilder errorString = new StringBuilder();
        if (index > 1) {
            errorString.append(sentence[index - 2]).append(" ");
        }
        if (index > 0) {
            errorString.append(sentence[index - 1]).append(" ");
        }
        errorString.append("###");
        errorString.append(sentence[index]);
        errorString.append("###").append(" ");
        if (index + 1 < sentence.length) {
            errorString.append(sentence[index + 1]).append(" ");
        }
        if (index + 2 < sentence.length) {
            errorString.append(sentence[index + 2]);
        }
        return errorString.toString();
    }

    public static SequenceLabelSample parse(String taggedTokens, boolean isClearAdaptiveData) throws IOException {
        return SequenceLabelSample.parse(taggedTokens, DEFAULT_TYPE, isClearAdaptiveData);
    }

    public static SequenceLabelSample parse(String taggedTokens, String defaultType, boolean isClearAdaptiveData) throws IOException {
        String[] parts = WhitespaceTokenizer.INSTANCE.tokenize(taggedTokens);
        ArrayList<String> tokenList = new ArrayList<String>(parts.length);
        ArrayList<Span> seqList = new ArrayList<Span>();
        String sequenceType = defaultType;
        int startIndex = -1;
        int wordIndex = 0;
        boolean catchingSequence = false;
        for (int pi = 0; pi < parts.length; ++pi) {
            Matcher startMatcher = START_TAG_PATTERN.matcher(parts[pi]);
            if (startMatcher.matches()) {
                if (catchingSequence) {
                    throw new IOException("Found unexpected annotation while handling a name sequence: " + SequenceLabelSample.errorTokenWithContext(parts, pi));
                }
                catchingSequence = true;
                startIndex = wordIndex;
                String sequenceTypeFromSample = startMatcher.group(2);
                if (sequenceTypeFromSample == null) continue;
                if (sequenceTypeFromSample.length() == 0) {
                    throw new IOException("Missing a name type: " + SequenceLabelSample.errorTokenWithContext(parts, pi));
                }
                sequenceType = sequenceTypeFromSample;
                continue;
            }
            if (parts[pi].equals("<END>")) {
                if (!catchingSequence) {
                    throw new IOException("Found unexpected annotation: " + SequenceLabelSample.errorTokenWithContext(parts, pi));
                }
                catchingSequence = false;
                seqList.add(new Span(startIndex, wordIndex, sequenceType));
                continue;
            }
            tokenList.add(parts[pi]);
            ++wordIndex;
        }
        String[] tokens = tokenList.toArray(new String[tokenList.size()]);
        Span[] sequences = seqList.toArray(new Span[seqList.size()]);
        return new SequenceLabelSample(tokens, sequences, isClearAdaptiveData);
    }
}

