/*
 * Decompiled with CFR 0.152.
 */
package com.hazelcast.jet.examples.tfidf;

import com.hazelcast.function.BiFunctionEx;
import com.hazelcast.function.FunctionEx;
import com.hazelcast.function.Functions;
import com.hazelcast.jet.Jet;
import com.hazelcast.jet.JetInstance;
import com.hazelcast.jet.Job;
import com.hazelcast.jet.Traversers;
import com.hazelcast.jet.Util;
import com.hazelcast.jet.aggregate.AggregateOperations;
import com.hazelcast.jet.examples.tfidf.SearchGui;
import com.hazelcast.jet.pipeline.BatchStage;
import com.hazelcast.jet.pipeline.JoinClause;
import com.hazelcast.jet.pipeline.Pipeline;
import com.hazelcast.jet.pipeline.Sinks;
import com.hazelcast.jet.pipeline.Sources;
import java.io.IOException;
import java.io.Serializable;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class TfIdf {
    private static final Pattern DELIMITER = Pattern.compile("\\W+");
    private static final String INVERTED_INDEX = "inverted-index";
    private static final String CONSTANT_KEY = "constant";
    private JetInstance jet;

    private static Pipeline buildPipeline() {
        Path bookDirectory = TfIdf.getClasspathDirectory("/books");
        Set stopwords = TfIdf.docLines("/stopwords.txt").collect(Collectors.toSet());
        Pipeline p = Pipeline.create();
        BatchStage bookLines = p.readFrom(Sources.filesBuilder((String)bookDirectory.toString()).build(Util::entry));
        BatchStage logDocCountStage = bookLines.groupingKey(Map.Entry::getKey).distinct().aggregate(AggregateOperations.counting()).map(Math::log);
        BatchStage bookWords = bookLines.flatMap((FunctionEx & Serializable)entry -> {
            String filename = (String)entry.getKey();
            return Traversers.traverseArray((Object[])DELIMITER.split((CharSequence)entry.getValue())).map(rawWord -> {
                String word = rawWord.toLowerCase();
                return stopwords.contains(word) ? null : Util.entry((Object)filename, (Object)word);
            });
        });
        BatchStage tf = bookWords.groupingKey(Functions.entryValue()).aggregate(AggregateOperations.toMap((FunctionEx)Functions.entryKey(), (FunctionEx & Serializable)e -> 1L, Long::sum));
        BatchStage idf = tf.hashJoin(logDocCountStage, JoinClause.onKeys((FunctionEx & Serializable)x -> CONSTANT_KEY, (FunctionEx & Serializable)x -> CONSTANT_KEY), (BiFunctionEx & Serializable)(tfPair, logDocCount) -> Util.entry(tfPair.getKey(), TfIdf.docScores(logDocCount, ((Map)tfPair.getValue()).entrySet())));
        idf.writeTo(Sinks.map((String)INVERTED_INDEX));
        return p;
    }

    private static List<Map.Entry<String, Double>> docScores(double logDocCount, Collection<Map.Entry<String, Long>> filenameAndTFs) {
        double logDf = Math.log(filenameAndTFs.size());
        return filenameAndTFs.stream().map(tfe -> TfIdf.tfidfEntry(logDocCount, logDf, tfe)).collect(Collectors.toList());
    }

    private static Map.Entry<String, Double> tfidfEntry(double logDocCount, double logDf, Map.Entry<String, Long> docIdTf) {
        String docId = docIdTf.getKey();
        double tf = docIdTf.getValue().longValue();
        double idf = logDocCount - logDf;
        return Util.entry((Object)docId, (Object)(tf * idf));
    }

    public static void main(String[] args) {
        try {
            new TfIdf().go();
        }
        catch (Throwable t) {
            Jet.shutdownAll();
            throw t;
        }
    }

    private void go() {
        System.out.println("Creating Jet instance 1");
        this.jet = Jet.bootstrappedInstance();
        this.buildInvertedIndex();
        System.out.println("size=" + this.jet.getMap(INVERTED_INDEX).size());
        new SearchGui((Map<String, List<Map.Entry<String, Double>>>)this.jet.getMap(INVERTED_INDEX), TfIdf.docLines("/stopwords.txt").collect(Collectors.toSet()));
    }

    private void buildInvertedIndex() {
        Job job = this.jet.newJob(TfIdf.buildPipeline());
        long start = System.nanoTime();
        job.join();
        System.out.println("Indexing took " + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start) + " milliseconds.");
    }

    private static Path getClasspathDirectory(String name) {
        try {
            return Paths.get(TfIdf.class.getResource(name).toURI());
        }
        catch (URISyntaxException e) {
            throw new RuntimeException(e);
        }
    }

    private static Stream<String> docLines(String name) {
        try {
            return Files.lines(Paths.get(TfIdf.class.getResource(name).toURI()));
        }
        catch (IOException | URISyntaxException e) {
            throw new RuntimeException(e);
        }
    }
}

