/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev.archive;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;

final class StartTokenCounter {
    private StartTokenCounter() {
    }

    public static void main(String[] args) throws IOException {
        long totalCount = 0L;
        File dir = new File("/data/google-ngram-index/en/2grams");
        try (FSDirectory directory = FSDirectory.open((Path)dir.toPath());
             DirectoryReader reader = DirectoryReader.open((Directory)directory);){
            BytesRef next;
            IndexSearcher searcher = new IndexSearcher((IndexReader)reader);
            Fields fields = MultiFields.getFields((IndexReader)reader);
            Terms ngrams = fields.terms("ngram");
            TermsEnum iterator = ngrams.iterator();
            int i = 0;
            while ((next = iterator.next()) != null) {
                String term = next.utf8ToString();
                if (!term.startsWith("_START_") || term.matches(".*_(ADJ|ADV|NUM|VERB|ADP|NOUN|PRON|CONJ|DET|PRT)$")) continue;
                TopDocs topDocs = searcher.search((Query)new TermQuery(new Term("ngram", term)), 3);
                if (topDocs.totalHits == 0) {
                    throw new RuntimeException("No hits for " + term + ": " + topDocs.totalHits);
                }
                if (topDocs.totalHits == 1) {
                    int docId = topDocs.scoreDocs[0].doc;
                    Document document = reader.document(docId);
                    Long count = Long.parseLong(document.get("count"));
                    totalCount += count.longValue();
                    if (++i % 10000 != 0) continue;
                    System.out.println(i + " ... " + totalCount);
                    continue;
                }
                throw new RuntimeException("More hits than expected for " + term + ": " + topDocs.totalHits);
            }
        }
        System.out.println("==> " + totalCount);
    }
}

