/*
 * Decompiled with CFR 0.152.
 */
package com.hazelcast.jet.examples.tfidf;

import com.hazelcast.jet.Util;
import com.hazelcast.jet.examples.tfidf.TfIdfJdkStreams;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class BuildStopwords {
    public static void main(String[] args) throws IOException {
        Set<String> docIds = TfIdfJdkStreams.buildDocumentInventory();
        long docCount = docIds.size();
        System.out.println("Analyzing documents");
        Map wordDocs = docIds.parallelStream().flatMap(TfIdfJdkStreams::docLines).flatMap(BuildStopwords::tokenize).collect(Collectors.groupingBy(Map.Entry::getValue, Collectors.mapping(Map.Entry::getKey, Collectors.toSet())));
        File stopwordsFile = new File("stopwords.txt");
        System.out.println("Writing the stopwords file " + stopwordsFile.getAbsolutePath());
        try (PrintWriter w = new PrintWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(stopwordsFile), StandardCharsets.UTF_8));){
            wordDocs.entrySet().stream().map(e -> Util.entry(e.getKey(), (Object)((Set)e.getValue()).size())).filter(e -> (long)((Integer)e.getValue()).intValue() == docCount).sorted(Comparator.comparing(Map.Entry::getKey)).map(Map.Entry::getKey).forEach(w::println);
        }
    }

    private static Stream<Map.Entry<String, String>> tokenize(Map.Entry<String, String> docLine) {
        return Arrays.stream(TfIdfJdkStreams.DELIMITER.split(docLine.getValue())).filter(token -> !token.isEmpty()).map(word -> Util.entry(docLine.getKey(), (Object)word));
    }
}

