/*
 * Decompiled with CFR 0.152.
 */
package edu.washington.cs.knowitall.extractor;

import edu.washington.cs.knowitall.extractor.SentenceExtractor;
import edu.washington.cs.knowitall.extractor.mapper.BracketsRemover;
import edu.washington.cs.knowitall.extractor.mapper.SentenceEndFilter;
import edu.washington.cs.knowitall.extractor.mapper.SentenceLengthFilter;
import edu.washington.cs.knowitall.extractor.mapper.SentenceStartFilter;
import edu.washington.cs.knowitall.util.HtmlUtils;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import opennlp.tools.sentdetect.SentenceDetector;

public class HtmlSentenceExtractor
extends SentenceExtractor {
    public HtmlSentenceExtractor(SentenceDetector detector) {
        super(detector);
    }

    public HtmlSentenceExtractor() throws IOException {
    }

    @Override
    protected Collection<String> extractCandidates(String htmlBlock) {
        String content = HtmlUtils.removeHtml(htmlBlock);
        String[] lines = content.split("\n");
        ArrayList<String> results = new ArrayList<String>();
        SentenceDetector detector = this.getSentenceDetector();
        for (String line : lines) {
            line = line.trim();
            for (String sent : detector.sentDetect(line)) {
                if (sent.trim().equals("")) continue;
                results.add(sent);
            }
        }
        return results;
    }

    public static void main(String[] args) throws Exception {
        BufferedReader in = args.length == 1 ? new BufferedReader(new FileReader(args[0])) : new BufferedReader(new InputStreamReader(System.in));
        StringBuffer sb = new StringBuffer();
        String line = in.readLine();
        while (line != null) {
            sb.append(line);
            line = in.readLine();
        }
        HtmlSentenceExtractor extractor = new HtmlSentenceExtractor();
        extractor.addMapper(new BracketsRemover());
        extractor.addMapper(new SentenceEndFilter());
        extractor.addMapper(new SentenceStartFilter());
        extractor.addMapper(SentenceLengthFilter.minFilter(4));
        Iterable sents = extractor.extract(sb.toString());
        for (String sent : sents) {
            System.out.println(sent);
        }
    }
}

