/*
 * Decompiled with CFR 0.152.
 */
package org.maochen.nlp.parser.stanford.pcfg;

import edu.stanford.nlp.io.ExtensionFileFilter;
import edu.stanford.nlp.io.NumberRangeFileFilter;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.EnglishGrammaticalStructure;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.SemanticHeadFinder;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Treebank;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Scanner;
import org.maochen.nlp.datastructure.DTree;
import org.maochen.nlp.parser.stanford.pcfg.StanfordTreeBuilder;

public class StanfordPCFGParserTrainer {
    public static final String wsj = "/Users/Maochen/Desktop/treebank_3/parsed/mrg/wsj/";
    public static final String extra = "/Users/Maochen/Desktop/extra/treebank_extra_data/";
    public static final String taggedFiles = "/Users/Maochen/Desktop/extra/treebank_extra_data//train-tech-english";
    public static final String modelOutputFolder = "/Users/Maochen/Desktop/";

    public static void trainEngine(String trainDirPath, int startRange, int endRange, String train2DirPath, String train2FileExtension, double extraTrainingSetWeight, String modelPath, int maxLength, String taggedFiles) {
        File f = new File(modelPath);
        if (f.exists()) {
            System.out.println("Delete the existing model in " + f.getAbsolutePath());
            f.delete();
        }
        ArrayList<String> para = new ArrayList<String>();
        para.add("-goodPCFG");
        para.add("-maxLength");
        para.add(String.valueOf(maxLength));
        para.add("-trainingThreads");
        para.add(String.valueOf(Runtime.getRuntime().availableProcessors()));
        para.add("-wordFunction");
        para.add("edu.stanford.nlp.process.AmericanizeFunction");
        if (taggedFiles != null) {
            para.add("-taggedFiles");
            para.add("tagSeparator=_," + taggedFiles);
        }
        Options op = new Options();
        op.setOptions((String[])para.stream().toArray(String[]::new));
        DiskTreebank trainTreeBank = new DiskTreebank();
        NumberRangeFileFilter trainTreeBankFilter = new NumberRangeFileFilter(startRange, endRange, true);
        trainTreeBank.loadPath(trainDirPath, (FileFilter)trainTreeBankFilter);
        DiskTreebank extraTreeBank = null;
        if (train2DirPath != null) {
            extraTreeBank = new DiskTreebank();
            ExtensionFileFilter extraTreeBankFilter = new ExtensionFileFilter(train2FileExtension, true);
            extraTreeBank.loadPath(train2DirPath, (FileFilter)extraTreeBankFilter);
        }
        LexicalizedParser.getParserFromTreebank((Treebank)trainTreeBank, (Treebank)extraTreeBank, (double)extraTrainingSetWeight, null, (Options)op, null, null).saveParserToSerialized(modelPath);
    }

    public static void printParseTree(LexicalizedParser parser, String sentence) {
        Tree tree = parser.parse(sentence);
        System.out.println(tree.pennString());
        SemanticHeadFinder headFinder = new SemanticHeadFinder(false);
        Collection dependencies = new EnglishGrammaticalStructure(tree, string -> true, (HeadFinder)headFinder).typedDependencies();
        dependencies.stream().forEach(System.out::println);
        List tokens = tree.taggedLabeledYield();
        tokens.parallelStream().forEach(x -> {
            x.setOriginalText(x.word());
            x.setLemma(x.word());
        });
        DTree dtree = StanfordTreeBuilder.generate(tokens, dependencies, null);
        System.out.println(dtree);
    }

    public static String train() throws IOException {
        String modelPath = "/Users/Maochen/Desktop//englishPCFG.ser.gz";
        StanfordPCFGParserTrainer.trainEngine(wsj, 1, 2502, extra, ".mrg", 1.0, modelPath, 40, taggedFiles);
        return modelPath;
    }

    public static void main(String[] args) throws IOException {
        String modelPath = StanfordPCFGParserTrainer.train();
        LexicalizedParser parser = LexicalizedParser.loadModel((String)modelPath, new ArrayList());
        Scanner scan = new Scanner(System.in);
        String input = "";
        String quitRegex = "q|quit|exit";
        while (!input.matches(quitRegex)) {
            System.out.println("Please enter sentence:");
            input = scan.nextLine();
            if (input.trim().isEmpty() || input.matches(quitRegex)) continue;
            StanfordPCFGParserTrainer.printParseTree(parser, input);
        }
    }
}

