/*
 * Decompiled with CFR 0.152.
 */
package eus.ixa.ixa.pipe.tok;

import com.google.common.io.CharStreams;
import eus.ixa.ixa.pipe.seg.RuleBasedSegmenter;
import eus.ixa.ixa.pipe.tok.RuleBasedTokenizer;
import eus.ixa.ixa.pipe.tok.StringUtils;
import eus.ixa.ixa.pipe.tok.Token;
import eus.ixa.ixa.pipe.tok.Tokenizer;
import ixa.kaflib.KAFDocument;
import ixa.kaflib.WF;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.List;
import java.util.Properties;

public class Annotate {
    private final Tokenizer toker;
    private final RuleBasedSegmenter segmenter;

    public Annotate(BufferedReader breader, Properties properties) {
        String text = StringUtils.readText(breader);
        this.segmenter = new RuleBasedSegmenter(text, properties);
        this.toker = new RuleBasedTokenizer(text, properties);
    }

    public void tokenizeToKAF(KAFDocument kaf) throws IOException {
        int noSents = 0;
        int noParas = 1;
        String[] sentences = this.segmenter.segmentSentence();
        List<List<Token>> tokens = this.toker.tokenize(sentences);
        for (List<Token> tokenizedSentence : tokens) {
            ++noSents;
            for (Token token : tokenizedSentence) {
                if (token.getTokenValue().equals("\u00b6\u00b6")) {
                    if (noSents >= ++noParas) continue;
                    ++noSents;
                    continue;
                }
                WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(), noSents);
                wf.setLength(token.tokenLength());
                wf.setPara(noParas);
            }
        }
    }

    public String tokenizeToCoNLL() {
        StringBuilder sb = new StringBuilder();
        String[] sentences = this.segmenter.segmentSentence();
        List<List<Token>> tokens = this.toker.tokenize(sentences);
        for (List<Token> tokSentence : tokens) {
            for (Token token : tokSentence) {
                String tokenValue = token.getTokenValue();
                if (tokenValue.equals("\u00b6\u00b6")) {
                    tokenValue = "*<P>*";
                }
                sb.append(tokenValue.trim()).append("\n");
            }
            sb.append("\n");
        }
        return sb.toString();
    }

    public String tokenizeToCoNLLOffsets() {
        StringBuilder sb = new StringBuilder();
        String[] sentences = this.segmenter.segmentSentence();
        List<List<Token>> tokens = this.toker.tokenize(sentences);
        for (List<Token> tokSentence : tokens) {
            for (Token token : tokSentence) {
                String tokenValue = token.getTokenValue();
                if (tokenValue.equals("\u00b6\u00b6")) {
                    tokenValue = "*<P>*";
                }
                sb.append(tokenValue.trim()).append(" ").append(token.startOffset()).append(" ").append(token.tokenLength()).append("\n");
            }
            sb.append("\n");
        }
        return sb.toString();
    }

    public String tokenizeToText() {
        StringBuilder sb = new StringBuilder();
        System.err.println("-> Segmenting.....");
        String[] sentences = this.segmenter.segmentSentence();
        System.err.println("-> Tokenizing.....");
        List<List<Token>> tokens = this.toker.tokenize(sentences);
        System.err.println("-> [DONE]!");
        for (List<Token> tokSentence : tokens) {
            for (Token token : tokSentence) {
                String tokenValue = token.getTokenValue();
                if (tokenValue.equals("\u00b6\u00b6")) {
                    sb.append("*<P>*").append("\n");
                    continue;
                }
                sb.append(tokenValue.trim()).append(" ");
            }
            sb.append("\n");
        }
        return sb.toString().trim();
    }

    public static void tokensToKAF(Reader breader, KAFDocument kaf) throws IOException {
        int noSents = 0;
        int noParas = 1;
        List<String> sentences = CharStreams.readLines(breader);
        for (String sentence : sentences) {
            String[] tokens;
            ++noSents;
            for (String token : tokens = sentence.split(" ")) {
                if (token.equals("\u00b6\u00b6")) {
                    ++noParas;
                    while (noParas > noSents) {
                        ++noSents;
                    }
                    continue;
                }
                WF wf = kaf.newWF(0, token, noSents);
                wf.setPara(noParas);
            }
        }
    }
}

