/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.zoner;

import edu.nyu.jet.aceJet.Ace;
import edu.nyu.jet.tipster.Annotation;
import edu.nyu.jet.tipster.Document;
import edu.nyu.jet.tipster.ExternalDocument;
import edu.nyu.jet.tipster.Span;
import edu.nyu.jet.zoner.SentenceSplitter;
import edu.nyu.jet.zoner.SpecialZoner;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Vector;

public class SentenceWriter {
    static String dataDir;
    static String outputDir;
    static String fileList;
    static boolean writeXML;
    static boolean inline;
    static final boolean debug = false;
    static final String d = " : ";

    public static void main(String[] args) throws IOException {
        if (args.length > 0) {
            if (args.length < 3 || args.length > 4) {
                System.out.println("SentenceWriter must have 3 or 4 arguments:");
                System.out.println("  filelist  dataDirectory  outputDirectory [XMLflag]");
                System.exit(1);
            }
            fileList = args[0];
            dataDir = args[1];
            outputDir = args[2];
            writeXML = args.length == 4;
            inline = writeXML && args[3].equals("inline");
        }
        SentenceWriter.processFileList(fileList);
    }

    private static void processFileList(String fileList) throws IOException {
        String currentDoc;
        BufferedReader reader = new BufferedReader(new FileReader(fileList));
        int docCount = 0;
        while ((currentDoc = reader.readLine()) != null) {
            ++docCount;
            try {
                SentenceWriter.processFile(currentDoc, docCount);
            }
            catch (Exception e) {
                System.err.println("Error : " + fileList + d + docCount + d + currentDoc + e.toString());
                e.printStackTrace();
            }
        }
    }

    private static void processFile(String currentDoc, int docCount) throws IOException {
        System.out.println("\nProcessing document " + docCount + d + currentDoc);
        String textFileName = dataDir + currentDoc;
        ExternalDocument doc = new ExternalDocument("sgml", textFileName);
        doc.setAllTags(true);
        doc.open();
        SentenceWriter.split(doc, currentDoc);
        if (inline) {
            SentenceWriter.writeInline(doc, currentDoc);
        } else {
            String sentFileName = outputDir + currentDoc + ".sent";
            PrintWriter writer = new PrintWriter(new FileWriter(sentFileName));
            SentenceWriter.writeSents(doc, currentDoc, writer);
            writer.close();
        }
    }

    private static void split(Document doc, String currentDocPath) {
        SpecialZoner.findSpecialZones(doc);
        Vector<Annotation> textSegments = doc.annotationsOfType("TEXT");
        if (textSegments == null) {
            System.out.println("No <TEXT> in " + currentDocPath + ", skipped.");
            return;
        }
        Vector<Annotation> priorSentences = doc.annotationsOfType("sentence");
        if (priorSentences == null || priorSentences.size() == 0) {
            for (Annotation ann : textSegments) {
                Span textSpan = ann.span();
                Ace.monocase = Ace.allLowerCase(doc);
                SentenceSplitter.split(doc, textSpan);
            }
        }
    }

    private static void writeSents(ExternalDocument doc, String currentDocPath, PrintWriter writer) {
        Vector<Annotation> sentences;
        if (writeXML) {
            String currentDoc = currentDocPath;
            if (currentDocPath.indexOf(47) >= 0) {
                currentDoc = currentDocPath.substring(currentDocPath.lastIndexOf(47) + 1);
            }
            writer.print("<source_file URI=\"" + currentDoc + "\"");
            writer.println(" SOURCE=\"newswire\" TYPE=\"text\" AUTHOR=\"NYU\">");
            String docId = Ace.getDocId(doc);
            if (docId == null) {
                docId = currentDoc.endsWith(".sgm") ? currentDoc.substring(0, currentDoc.length() - 4) : currentDoc;
            }
            writer.println("<document DOCID=\"" + docId + "\">");
        }
        if ((sentences = doc.annotationsOfType("sentence")) == null) {
            return;
        }
        for (Annotation sentence : sentences) {
            Span sentenceSpan = sentence.span();
            String sentenceText = doc.text(sentenceSpan).trim().replace('\n', ' ');
            if (writeXML) {
                doc.shrink(sentence);
                writer.println("  <sentence><charseq START=\"" + sentenceSpan.start() + "\"" + " END=\"" + (sentenceSpan.end() - 1) + "\"></charseq>" + "</sentence>");
                continue;
            }
            writer.println(sentenceSpan.start() + " " + sentenceText);
        }
        if (writeXML) {
            writer.println("</document>");
            writer.println("</source_file>");
        }
    }

    private static void writeInline(ExternalDocument doc, String currentDoc) {
        Vector<Annotation> sentences = doc.annotationsOfType("sentence");
        if (sentences != null) {
            int sentNo = 0;
            for (Annotation sentence : sentences) {
                sentence.put("ID", "SENT-" + ++sentNo);
            }
        }
        doc.removeAnnotationsOfType("dateline");
        doc.removeAnnotationsOfType("textBreak");
        doc.shrink("sentence");
        doc.setSGMLwrapMargin(0);
        doc.saveAs(outputDir, currentDoc);
    }

    static {
        writeXML = false;
        inline = false;
    }
}

