/*
 * Decompiled with CFR 0.152.
 */
package org.maochen.nlp.utils;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class StopwordsGenerator {
    private Map<String, Double> wordCount = new ConcurrentHashMap<String, Double>();
    private AtomicLong totalCount = new AtomicLong(0L);

    public String stringNormalize(String str) {
        str = str.replaceAll("\"", "").replaceAll(",", " ").replaceAll("\\p{Punct}+$", "").replaceAll("[?:;!]", "").replaceAll("--", "").replaceAll("i've", "i have").replaceAll("we'll", "we will").replaceAll("he's", "he has").replaceAll("'", " ").replaceAll("\\s+", " ").toLowerCase().trim();
        return str;
    }

    public void normalize() {
        for (String token : this.wordCount.keySet()) {
            Double count = this.wordCount.get(token);
            count = count / this.totalCount.doubleValue();
            this.wordCount.put(token, count);
        }
    }

    public Map<String, Double> getProbability() {
        return this.wordCount;
    }

    public void writeFile(String fileName, List<Map.Entry<String, Double>> result) {
        try {
            File file = new File(fileName);
            BufferedWriter output = new BufferedWriter(new FileWriter(file));
            result.stream().forEach(entry -> {
                try {
                    output.write(entry.toString() + System.lineSeparator());
                }
                catch (IOException e) {
                    e.printStackTrace();
                }
            });
            output.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {
        if (args.length != 2) {
            System.err.println("Please specify dir or filename | output file location");
            return;
        }
        StopwordsGenerator g = new StopwordsGenerator();
        File file = new File(args[0]);
        if (file.isFile()) {
            WikiSingleWordCount.generateFromFile(file, g);
        } else {
            File[] files = file.listFiles();
            ((Stream)Arrays.stream(files).parallel()).filter(File::isFile).forEach(f -> WikiSingleWordCount.generateFromFile(f, g));
        }
        g.normalize();
        List<Map.Entry<String, Double>> result = g.getProbability().entrySet().stream().sorted((o1, o2) -> ((Double)o2.getValue()).compareTo((Double)o1.getValue())).collect(Collectors.toList());
        g.writeFile(args[1], result);
    }

    static class WikiSingleWordCount {
        WikiSingleWordCount() {
        }

        public static void generateFromFile(File file, StopwordsGenerator stopwordsGenerator) {
            int maxThreshold = -1;
            StringBuilder wordBuilder = new StringBuilder();
            try (BufferedReader br = new BufferedReader(new FileReader(file));){
                int c = br.read();
                while (c != -1) {
                    if (maxThreshold > 0 && stopwordsGenerator.totalCount.get() > (long)maxThreshold) {
                        break;
                    }
                    if (c != 32) {
                        wordBuilder.append((char)c);
                    } else {
                        String token = stopwordsGenerator.stringNormalize(wordBuilder.toString());
                        wordBuilder.setLength(0);
                        Double count = stopwordsGenerator.wordCount.containsKey(token) ? (Double)stopwordsGenerator.wordCount.get(token) : Double.valueOf(0.0);
                        count = count + 1.0;
                        stopwordsGenerator.wordCount.put(token, count);
                        stopwordsGenerator.totalCount.addAndGet(1L);
                        if (stopwordsGenerator.totalCount.get() % 10000000L == 0L) {
                            if (maxThreshold > 0) {
                                System.out.println("Processed tokens: " + (double)stopwordsGenerator.totalCount.get() / (double)maxThreshold * 100.0 + "%");
                            } else {
                                System.out.println("Processed tokens: " + stopwordsGenerator.totalCount.get());
                            }
                        }
                    }
                    c = br.read();
                }
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    static class DocumentCount {
        DocumentCount() {
        }

        private static void addCount(String sentence, StopwordsGenerator stopwordsGenerator) {
            if ((sentence = stopwordsGenerator.stringNormalize(sentence)).isEmpty()) {
                return;
            }
            stopwordsGenerator.totalCount.addAndGet(1L);
            Set tokens = ((Stream)Arrays.stream(sentence.split("\\s")).parallel()).collect(Collectors.toSet());
            tokens.parallelStream().forEach(token -> {
                Double tokenCount;
                Double d = tokenCount = stopwordsGenerator.wordCount.containsKey(token) ? (Double)stopwordsGenerator.wordCount.get(token) : Double.valueOf(0.0);
                Double d2 = tokenCount = Double.valueOf(tokenCount + 1.0);
                stopwordsGenerator.wordCount.put(token, tokenCount);
            });
        }

        public static void generateFromFile(File file, StopwordsGenerator stopwordsGenerator) {
            try (BufferedReader br = new BufferedReader(new FileReader(file));){
                StringBuilder sb = new StringBuilder();
                String line = br.readLine();
                while (line != null) {
                    if (line.trim().isEmpty()) {
                        String[] sentences = sb.toString().split("\\.");
                        Arrays.stream(sentences).forEach(s -> DocumentCount.addCount(s, stopwordsGenerator));
                        sb.setLength(0);
                    } else {
                        sb.append(line);
                        sb.append(" ");
                    }
                    line = br.readLine();
                }
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

