/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.chunk;

import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.StringTokenizer;

class ChunkBuildTrain {
    public static final String chunkDir = "C:/Documents and Settings/Ralph Grishman/My Documents/HMM/Chunk/";

    ChunkBuildTrain() {
    }

    public static void main(String[] args) {
        String inFile = "C:/Documents and Settings/Ralph Grishman/My Documents/HMM/Chunk/wsj_15_18_train.log";
        String featureFile = "C:/Documents and Settings/Ralph Grishman/My Documents/HMM/Chunk/chunk features.txt";
        try {
            String line;
            BufferedReader reader = new BufferedReader(new FileReader(inFile));
            PrintStream writer = new PrintStream(new FileOutputStream(featureFile));
            String prevToken = "";
            String prevPOS = "";
            String prevTag = "";
            String currentToken = "";
            String currentPOS = "";
            String currentTag = "";
            String nextToken = "";
            String nextPOS = "";
            String nextTag = "";
            String followingToken = "";
            String followingPOS = "";
            String followingTag = "";
            StringBuffer features = new StringBuffer(200);
            boolean inGroup = false;
            boolean firstToken = true;
            while ((line = reader.readLine()) != null) {
                StringTokenizer st = new StringTokenizer(line);
                int count = st.countTokens();
                if (count == 0) {
                    followingToken = "";
                    followingPOS = "";
                    followingTag = "";
                } else if (count >= 3) {
                    followingToken = st.nextToken();
                    followingPOS = st.nextToken();
                    followingTag = st.nextToken();
                } else {
                    System.out.println("Error:  invalid input line: " + line);
                }
                if (currentToken != "") {
                    features.setLength(0);
                    features.append("prevPOS=" + prevPOS + " ");
                    features.append("currPOS=" + currentPOS + " ");
                    features.append("nextPOS=" + nextPOS + " ");
                    if (nextToken == "") {
                        features.append("POS012=" + currentPOS + ":: ");
                    } else {
                        features.append("POS012=" + currentPOS + ":" + nextPOS + ":" + followingPOS + " ");
                    }
                    features.append("prevTag=" + prevTag + " ");
                    features.append("currWord=" + currentToken + " ");
                    features.append("W-1W0=" + prevToken + ":" + currentToken + " ");
                    features.append("W0W1=" + currentToken + ":" + nextToken + " ");
                    features.append(currentTag);
                    writer.println(features);
                }
                prevToken = currentToken;
                prevPOS = currentPOS;
                prevTag = currentTag;
                currentToken = nextToken;
                currentPOS = nextPOS;
                currentTag = nextTag;
                nextToken = followingToken;
                nextPOS = followingPOS;
                nextTag = followingTag;
            }
        }
        catch (IOException e) {
            System.out.println(e);
        }
    }
}

