/*
 * Decompiled with CFR 0.152.
 */
package edu.usc.irds.agepredictor.spark.authorage;

import edu.usc.irds.agepredictor.spark.authorage.AgeClassifyContextGeneratorWrapper;
import edu.usc.irds.agepredictor.spark.authorage.CreateEvents;
import edu.usc.irds.agepredictor.spark.authorage.EventStreamUtil;
import edu.usc.irds.agepredictor.spark.authorage.EventWrapper;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import opennlp.tools.authorage.AgeClassifyFactory;
import opennlp.tools.authorage.AgeClassifyModel;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.ml.EventTrainer;
import opennlp.tools.ml.authorage.AgeClassifyTrainerFactory;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.featuregen.FeatureGenerator;
import opennlp.tools.util.model.BaseModel;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;

public class AgeClassifySparkTrainer {
    public static AgeClassifyModel createModel(String languageCode, String dataIn, String tokenizer, String featureGenerators, TrainingParameters trainParams) throws IOException {
        SparkConf conf = new SparkConf().setAppName("AgeClassifySparkTrainer");
        JavaSparkContext sc = new JavaSparkContext(conf);
        AgeClassifyContextGeneratorWrapper wrapper = new AgeClassifyContextGeneratorWrapper(tokenizer, featureGenerators);
        JavaRDD data = sc.textFile(dataIn, 8).cache();
        JavaRDD samples = data.map((Function)new CreateEvents(wrapper)).cache();
        JavaRDD validSamples = samples.filter((Function)new Function<EventWrapper, Boolean>(){

            public Boolean call(EventWrapper s) {
                return s != null;
            }
        }).cache();
        ObjectStream<Event> eventStream = EventStreamUtil.createEventStream(validSamples.collect());
        HashMap entries = new HashMap();
        EventTrainer trainer = AgeClassifyTrainerFactory.getEventTrainer((Map)trainParams.getSettings(), entries);
        MaxentModel ageModel = trainer.train(eventStream);
        samples.unpersist();
        data.unpersist();
        sc.stop();
        HashMap manifestInfoEntries = new HashMap();
        AgeClassifyFactory factory = AgeClassifyFactory.create((String)"AgeClassifyFactory", (Tokenizer)wrapper.getTokenizer(), (FeatureGenerator[])wrapper.getFeatureGenerators());
        return new AgeClassifyModel(languageCode, ageModel, manifestInfoEntries, factory);
    }

    public static void main(String[] args) {
        AgeClassifyModel model;
        if (args.length < 2) {
            System.out.println("usage: <input> <output>\n");
            System.exit(0);
        }
        String input = args[0];
        String output = args[1];
        TrainingParameters params = new TrainingParameters();
        params.put("Cutoff", Integer.toString(0));
        params.put("Iterations", Integer.toString(100));
        try {
            model = AgeClassifySparkTrainer.createModel("en", input, "opennlp.tools.tokenize.SentenceTokenizer", "opennlp.tools.tokenize.BagOfWordsTokenizer", params);
        }
        catch (IOException e) {
            throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage(), (Throwable)e);
        }
        CmdLineUtil.writeModel((String)"age classifier", (File)new File(output), (BaseModel)model);
    }
}

