/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.classification.corpus;

import com.hankcs.hanlp.classification.corpus.Catalog;
import com.hankcs.hanlp.classification.corpus.Document;
import com.hankcs.hanlp.classification.corpus.IDataSet;
import com.hankcs.hanlp.classification.corpus.Lexicon;
import com.hankcs.hanlp.classification.models.AbstractModel;
import com.hankcs.hanlp.classification.tokenizers.HanLPTokenizer;
import com.hankcs.hanlp.classification.tokenizers.ITokenizer;
import com.hankcs.hanlp.classification.utilities.MathUtility;
import com.hankcs.hanlp.classification.utilities.TextProcessUtility;
import com.hankcs.hanlp.classification.utilities.io.ConsoleLogger;
import java.io.File;
import java.io.IOException;
import java.util.Map;

public abstract class AbstractDataSet
implements IDataSet {
    protected ITokenizer tokenizer;
    protected Catalog catalog;
    protected Lexicon lexicon;
    protected boolean testingDataSet;

    public AbstractDataSet(AbstractModel model) {
        this.lexicon = new Lexicon(model.wordIdTrie);
        this.tokenizer = model.tokenizer;
        this.catalog = new Catalog(model.catalog);
        this.testingDataSet = true;
    }

    public AbstractDataSet() {
        this.tokenizer = new HanLPTokenizer();
        this.catalog = new Catalog();
        this.lexicon = new Lexicon();
    }

    @Override
    public IDataSet setTokenizer(ITokenizer tokenizer) {
        this.tokenizer = tokenizer;
        return this;
    }

    @Override
    public Document convert(String category, String text) {
        String[] tokenArray = this.tokenizer.segment(text);
        return this.testingDataSet ? new Document(this.catalog.categoryId, this.lexicon.wordId, category, tokenArray) : new Document(this.catalog, this.lexicon, category, tokenArray);
    }

    @Override
    public ITokenizer getTokenizer() {
        return this.tokenizer;
    }

    @Override
    public Catalog getCatalog() {
        return this.catalog;
    }

    @Override
    public Lexicon getLexicon() {
        return this.lexicon;
    }

    @Override
    public IDataSet load(String folderPath, String charsetName) throws IllegalArgumentException, IOException {
        return this.load(folderPath, charsetName, 1.0);
    }

    @Override
    public IDataSet load(String folderPath) throws IllegalArgumentException, IOException {
        return this.load(folderPath, "UTF-8");
    }

    @Override
    public boolean isTestingDataSet() {
        return this.testingDataSet;
    }

    @Override
    public IDataSet load(String folderPath, String charsetName, double percentage) throws IllegalArgumentException, IOException {
        if (folderPath == null) {
            throw new IllegalArgumentException("\u53c2\u6570 folderPath == null");
        }
        File root = new File(folderPath);
        if (!root.exists()) {
            throw new IllegalArgumentException(String.format("\u76ee\u5f55 %s \u4e0d\u5b58\u5728", root.getAbsolutePath()));
        }
        if (!root.isDirectory()) {
            throw new IllegalArgumentException(String.format("\u76ee\u5f55 %s \u4e0d\u662f\u4e00\u4e2a\u76ee\u5f55", root.getAbsolutePath()));
        }
        if (percentage > 1.0 || percentage < -1.0) {
            throw new IllegalArgumentException("percentage \u7684\u7edd\u5bf9\u503c\u5fc5\u987b\u4ecb\u4e8e[0, 1]\u4e4b\u95f4");
        }
        File[] folders = root.listFiles();
        if (folders == null) {
            return null;
        }
        ConsoleLogger.logger.start("\u6a21\u5f0f:%s\n\u6587\u672c\u7f16\u7801:%s\n\u6839\u76ee\u5f55:%s\n\u52a0\u8f7d\u4e2d...\n", this.testingDataSet ? "\u6d4b\u8bd5\u96c6" : "\u8bad\u7ec3\u96c6", charsetName, folderPath);
        for (File folder : folders) {
            int e;
            int b;
            File[] files;
            if (folder.isFile() || (files = folder.listFiles()) == null) continue;
            String category = folder.getName();
            ConsoleLogger.logger.out("[%s]...", category);
            if (percentage > 0.0) {
                b = 0;
                e = (int)((double)files.length * percentage);
            } else {
                b = (int)((double)files.length * (1.0 + percentage));
                e = files.length;
            }
            int logEvery = (int)Math.ceil((float)(e - b) / 10000.0f);
            for (int i = b; i < e; ++i) {
                this.add(folder.getName(), TextProcessUtility.readTxt(files[i], charsetName));
                if (i % logEvery != 0) continue;
                ConsoleLogger.logger.out("%c[%s]...%.2f%%", 13, category, MathUtility.percentage(i - b + 1, e - b));
            }
            ConsoleLogger.logger.out(" %d \u7bc7\u6587\u6863\n", e - b);
        }
        ConsoleLogger.logger.finish(" \u52a0\u8f7d\u4e86 %d \u4e2a\u7c7b\u76ee,\u5171 %d \u7bc7\u6587\u6863\n", this.getCatalog().size(), this.size());
        return this;
    }

    @Override
    public IDataSet load(String folderPath, double rate) throws IllegalArgumentException, IOException {
        return null;
    }

    @Override
    public IDataSet add(Map<String, String[]> testingDataSet) {
        for (Map.Entry<String, String[]> entry : testingDataSet.entrySet()) {
            for (String document : entry.getValue()) {
                this.add(entry.getKey(), document);
            }
        }
        return this;
    }
}

