/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.ae.jnet.utils;

import de.julielab.jcore.ae.jnet.utils.Utils;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.TreeSet;

public class FormatConverter {
    public static void main(String[] args) {
        try {
            if (args.length < 3) {
                System.out.println("usage: java FormatConverter <iobFile> <posFile> [further meta data files] <outFile> <taglist (or 0 if not used)>");
                System.exit(0);
            }
            File iobFile = new File(args[0]);
            ArrayList<File> metaDataFiles = new ArrayList<File>();
            int i = 1;
            while (i < args.length - 2) {
                metaDataFiles.add(new File(args[i]));
                ++i;
            }
            File outFile = new File(args[args.length - 2]);
            System.out.println("Reading iob and meta data files...");
            ArrayList<String> iobData = Utils.readFile(iobFile);
            ArrayList<ArrayList<String>> metaData = new ArrayList<ArrayList<String>>();
            int i2 = 0;
            while (i2 < metaDataFiles.size()) {
                metaData.add(Utils.readFile((File)metaDataFiles.get(i2)));
                ++i2;
            }
            TreeSet<String> tagList = null;
            if (!args[args.length - 1].equals("0")) {
                tagList = new TreeSet<String>(Utils.readFile(new File(args[args.length - 1])));
            }
            System.out.println("Making piped format...");
            ArrayList<String> pipedData = FormatConverter.makePipedFormat(iobData, metaData, tagList);
            Utils.writeFile(outFile, pipedData);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static ArrayList<String> makePipedFormat(ArrayList<String> iobData, ArrayList<ArrayList<String>> metaData, TreeSet<String> tags) {
        boolean checkTags = true;
        if (tags == null) {
            checkTags = false;
        }
        int i = 0;
        while (i < metaData.size()) {
            if (iobData.size() != metaData.get(i).size()) {
                System.err.println("Error: IOB file and " + (i + 1) + ". meta data file have different length!");
                System.exit(-1);
            }
            ++i;
        }
        ArrayList<String> pipedData = new ArrayList<String>();
        StringBuffer sentence = new StringBuffer();
        int i2 = 0;
        while (i2 < iobData.size()) {
            String line_iob = iobData.get(i2);
            String[] meta_lines = new String[metaData.size()];
            int j = 0;
            while (j < meta_lines.length) {
                meta_lines[j] = metaData.get(j).get(i2);
                ++j;
            }
            line_iob = line_iob.replaceAll("[\\s]+", "\t");
            j = 0;
            while (j < meta_lines.length) {
                meta_lines[j] = meta_lines[j].replaceAll("[\\s]+", "\t");
                ++j;
            }
            if (!line_iob.equals("-DOCSTART-\tO")) {
                if (line_iob.equals("") || line_iob.equals("\t")) {
                    if (sentence.length() > 0) {
                        pipedData.add(sentence.toString());
                    }
                    sentence.delete(0, sentence.length());
                } else {
                    String[] toks_iob = line_iob.split("[\t]");
                    String[][] toks_meta = new String[meta_lines.length][];
                    int j2 = 0;
                    while (j2 < meta_lines.length) {
                        toks_meta[j2] = meta_lines[j2].split("[\t]");
                        if (toks_iob.length != 2 || toks_meta[j2].length != 2) {
                            System.err.println("Error: format error. Incorrect size of line.");
                            System.err.println(String.valueOf(line_iob) + " - " + toks_iob.length);
                            System.err.println(String.valueOf(meta_lines[j2]) + " - " + toks_meta[j2].length);
                        }
                        if (!toks_meta[j2][0].equals(toks_iob[0])) {
                            System.err.println("error reading, word pos!=word iob");
                            System.out.println("IOB: " + toks_iob[0]);
                            System.out.println("POS: " + toks_meta[j2][0]);
                            System.out.println(String.valueOf(toks_meta[j2][0]) + " - " + toks_iob[0]);
                            System.out.println(String.valueOf(line_iob) + " -- " + meta_lines[j2]);
                            System.out.println("line number: " + i2);
                        }
                        ++j2;
                    }
                    if (checkTags && !tags.contains(toks_iob[1])) {
                        toks_iob[1] = "O";
                    }
                    String token = toks_iob[0];
                    int j3 = 0;
                    while (j3 < meta_lines.length) {
                        token = String.valueOf(token) + "|" + toks_meta[j3][1];
                        ++j3;
                    }
                    token = String.valueOf(token) + "|" + toks_iob[1] + " ";
                    sentence.append(token);
                }
            }
            ++i2;
        }
        return pipedData;
    }

    public static void makeDataSplit(double fractionGold, ArrayList<String> pipedData, ArrayList<String> poolOut, ArrayList<String> goldOut) {
        ArrayList<String> dummy = new ArrayList<String>();
        FormatConverter.makeDataSplit(fractionGold, 0, pipedData, dummy, poolOut, goldOut);
    }

    public static void makeDataSplit(double fractionGold, int initSize, ArrayList<String> pipedData, ArrayList<String> initOut, ArrayList<String> poolOut, ArrayList<String> goldOut) {
        initOut.clear();
        poolOut.clear();
        goldOut.clear();
        int goldSize = (int)((double)(pipedData.size() - initSize) * fractionGold);
        int poolSize = pipedData.size() - goldSize;
        System.out.println("datasize: " + pipedData.size());
        System.out.println("initSize: " + initSize);
        System.out.println("goldSize: " + goldSize);
        System.out.println("poolSize: " + poolSize);
        if (fractionGold < 0.01 || goldSize < 1 || fractionGold > 0.98) {
            System.err.println("Error: fractionGold too small/large! Must be between 0.01 and 0.98 and result in at least one sentence.");
            System.exit(-1);
        }
        Collections.shuffle(pipedData);
        int i = 0;
        while (i < pipedData.size()) {
            if (initOut.size() < initSize) {
                initOut.add(pipedData.get(i));
            } else if (goldOut.size() < goldSize) {
                goldOut.add(pipedData.get(i));
            } else {
                poolOut.add(pipedData.get(i));
            }
            ++i;
        }
    }
}

