/*
 * Decompiled with CFR 0.152.
 */
package edu.washington.cs.knowitall.util;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringEscapeUtils;

public class HtmlUtils {
    private static HashSet<Pattern> removePatterns;
    private static HashSet<Pattern> breakPatterns;
    private static boolean initialized;
    private static final String[] breakTags;
    private static final String[] removeTags;
    private static Pattern tag;
    private static Pattern whiteSpace;
    private static Pattern multiSpace;
    private static Pattern multiBreaks;

    public static String removeHtml(String content) {
        if (!initialized) {
            HtmlUtils.initPatterns();
        }
        content = whiteSpace.matcher(content).replaceAll(" ");
        content = HtmlUtils.applyPatterns(removePatterns, content);
        content = HtmlUtils.applyPatterns(breakPatterns, content);
        content = tag.matcher(content).replaceAll("");
        content = StringEscapeUtils.unescapeCsv((String)content);
        content = multiSpace.matcher(content).replaceAll(" ");
        content = multiBreaks.matcher(content).replaceAll("\n");
        content = content.replace(';', '\n');
        return content;
    }

    public static void main(String[] args) throws Exception {
        BufferedReader in = args.length == 1 ? new BufferedReader(new FileReader(args[0])) : new BufferedReader(new InputStreamReader(System.in));
        StringBuffer sb = new StringBuffer();
        String line = in.readLine();
        while (line != null) {
            sb.append(line);
            line = in.readLine();
        }
        System.out.println(HtmlUtils.removeHtml(sb.toString()));
    }

    private static String applyPatterns(HashSet<Pattern> patterns, String s) {
        for (Pattern pat : patterns) {
            s = pat.matcher(s).replaceAll("\n");
        }
        return s;
    }

    private static void initPatterns() {
        Pattern p;
        int i;
        removePatterns = new HashSet();
        breakPatterns = new HashSet();
        for (i = 0; i < removeTags.length; ++i) {
            p = Pattern.compile("(?is)<" + removeTags[i] + "[^<]*?>.*?</" + removeTags[i] + ">");
            removePatterns.add(p);
            p = Pattern.compile("(?i)</?" + removeTags[i] + "[^<]*?>");
            breakPatterns.add(p);
        }
        for (i = 0; i < breakTags.length; ++i) {
            p = Pattern.compile("(?i)</?" + breakTags[i] + "[^<]*?>");
            breakPatterns.add(p);
        }
        initialized = true;
    }

    static {
        initialized = false;
        breakTags = new String[]{"blockquote", "br", "center", "dd", "div", "dt", "fieldset", "h\\d", "hr", "img", "input", "isindex", "li", "noframes", "noscript", "p", "pre", "q", "table", "td", "textarea", "th", "xmp"};
        removeTags = new String[]{"applet", "form", "head", "iframe", "legend", "map", "object", "script", "select", "style", "title"};
        tag = Pattern.compile("<[^<]*?>");
        whiteSpace = Pattern.compile("\\s+");
        multiSpace = Pattern.compile("  +");
        multiBreaks = Pattern.compile("\n\n+");
    }
}

