/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.models.embeddings.loader;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.zip.GZIPInputStream;
import org.apache.commons.compress.compressors.gzip.GzipUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.deeplearning4j.berkeley.Pair;
import org.deeplearning4j.models.embeddings.WeightLookupTable;
import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable;
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors;
import org.deeplearning4j.models.embeddings.wordvectors.WordVectorsImpl;
import org.deeplearning4j.models.glove.Glove;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.Word2Vec;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.models.word2vec.wordstore.inmemory.InMemoryLookupCache;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.ops.transforms.Transforms;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class WordVectorSerializer {
    private static final boolean DEFAULT_LINEBREAKS = false;
    private static final int MAX_SIZE = 50;
    private static final Logger log = LoggerFactory.getLogger(WordVectorSerializer.class);

    public static Word2Vec loadGoogleModel(File modelFile, boolean binary) throws IOException {
        return WordVectorSerializer.loadGoogleModel(modelFile, binary, false);
    }

    public static Word2Vec loadGoogleModel(File modelFile, boolean binary, boolean lineBreaks) throws IOException {
        return binary ? WordVectorSerializer.readBinaryModel(modelFile, lineBreaks) : WordVectorSerializer.readTextModel(modelFile);
    }

    private static Word2Vec readTextModel(File modelFile) throws IOException, NumberFormatException {
        Word2Vec ret = new Word2Vec();
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(GzipUtils.isCompressedFilename((String)modelFile.getName()) ? new GZIPInputStream(new FileInputStream(modelFile)) : new FileInputStream(modelFile)));){
            String line = reader.readLine();
            String[] initial = line.split(" ");
            int words = Integer.parseInt(initial[0]);
            int layerSize = Integer.parseInt(initial[1]);
            INDArray syn0 = Nd4j.create((int)words, (int)layerSize);
            InMemoryLookupCache cache = new InMemoryLookupCache(false);
            int currLine = 0;
            while ((line = reader.readLine()) != null) {
                String[] split = line.split(" ");
                assert (split.length == layerSize + 1);
                String word = split[0];
                float[] vector = new float[split.length - 1];
                for (int i = 1; i < split.length; ++i) {
                    vector[i - 1] = Float.parseFloat(split[i]);
                }
                syn0.putRow(currLine, Transforms.unitVec((INDArray)Nd4j.create((float[])vector)));
                cache.addWordToIndex(cache.numWords(), word);
                cache.addToken(new VocabWord(1.0, word));
                cache.putVocabWord(word);
                ++currLine;
            }
            InMemoryLookupTable lookupTable = (InMemoryLookupTable)new InMemoryLookupTable.Builder().cache(cache).vectorLength(layerSize).build();
            lookupTable.setSyn0(syn0);
            ret.setVocab(cache);
            ret.setLookupTable(lookupTable);
        }
        return ret;
    }

    private static Word2Vec readBinaryModel(File modelFile, boolean linebreaks) throws NumberFormatException, IOException {
        InMemoryLookupTable lookupTable;
        InMemoryLookupCache cache;
        INDArray syn0;
        try (BufferedInputStream bis = new BufferedInputStream(GzipUtils.isCompressedFilename((String)modelFile.getName()) ? new GZIPInputStream(new FileInputStream(modelFile)) : new FileInputStream(modelFile));
             DataInputStream dis = new DataInputStream(bis);){
            int words = Integer.parseInt(WordVectorSerializer.readString(dis));
            int size = Integer.parseInt(WordVectorSerializer.readString(dis));
            syn0 = Nd4j.create((int)words, (int)size);
            cache = new InMemoryLookupCache(false);
            lookupTable = (InMemoryLookupTable)new InMemoryLookupTable.Builder().cache(cache).vectorLength(size).build();
            for (int i = 0; i < words; ++i) {
                String word = WordVectorSerializer.readString(dis);
                log.trace("Loading " + word + " with word " + i);
                float[] vector = new float[size];
                for (int j = 0; j < size; ++j) {
                    vector[j] = WordVectorSerializer.readFloat(dis);
                }
                syn0.putRow(i, Transforms.unitVec((INDArray)Nd4j.create((float[])vector)));
                cache.addWordToIndex(cache.numWords(), word);
                cache.addToken(new VocabWord(1.0, word));
                cache.putVocabWord(word);
                if (!linebreaks) continue;
                dis.readByte();
            }
        }
        Word2Vec ret = new Word2Vec();
        lookupTable.setSyn0(syn0);
        ret.setVocab(cache);
        ret.setLookupTable(lookupTable);
        return ret;
    }

    public static float readFloat(InputStream is) throws IOException {
        byte[] bytes = new byte[4];
        is.read(bytes);
        return WordVectorSerializer.getFloat(bytes);
    }

    public static float getFloat(byte[] b) {
        int accum = 0;
        accum |= (b[0] & 0xFF) << 0;
        accum |= (b[1] & 0xFF) << 8;
        accum |= (b[2] & 0xFF) << 16;
        return Float.intBitsToFloat(accum |= (b[3] & 0xFF) << 24);
    }

    public static String readString(DataInputStream dis) throws IOException {
        byte[] bytes = new byte[50];
        byte b = dis.readByte();
        int i = -1;
        StringBuilder sb = new StringBuilder();
        while (b != 32 && b != 10) {
            bytes[++i] = b;
            b = dis.readByte();
            if (i != 49) continue;
            sb.append(new String(bytes));
            i = -1;
            bytes = new byte[50];
        }
        sb.append(new String(bytes, 0, i + 1));
        return sb.toString();
    }

    public static void writeWordVectors(InMemoryLookupTable lookupTable, InMemoryLookupCache cache, String path) throws IOException {
        BufferedWriter write = new BufferedWriter(new FileWriter(new File(path), false));
        for (int i = 0; i < lookupTable.getSyn0().rows(); ++i) {
            String word = cache.wordAtIndex(i);
            if (word == null) continue;
            StringBuilder sb = new StringBuilder();
            sb.append(word.replaceAll(" ", "_"));
            sb.append(" ");
            INDArray wordVector = lookupTable.vector(word);
            for (int j = 0; j < wordVector.length(); ++j) {
                sb.append(wordVector.getDouble(j));
                if (j >= wordVector.length() - 1) continue;
                sb.append(" ");
            }
            sb.append("\n");
            write.write(sb.toString());
        }
        write.flush();
        write.close();
    }

    public static void writeWordVectors(Word2Vec vec, String path) throws IOException {
        BufferedWriter write = new BufferedWriter(new FileWriter(new File(path), false));
        int words = 0;
        for (String word : vec.vocab().words()) {
            if (word == null) continue;
            StringBuilder sb = new StringBuilder();
            sb.append(word.replaceAll(" ", "_"));
            sb.append(" ");
            INDArray wordVector = vec.getWordVectorMatrix(word);
            for (int j = 0; j < wordVector.length(); ++j) {
                sb.append(wordVector.getDouble(j));
                if (j >= wordVector.length() - 1) continue;
                sb.append(" ");
            }
            sb.append("\n");
            write.write(sb.toString());
            ++words;
        }
        log.info("Wrote " + words + " with size of " + vec.lookupTable().layerSize());
        write.flush();
        write.close();
    }

    public static WordVectors fromTableAndVocab(WeightLookupTable table, VocabCache vocab) {
        WordVectorsImpl vectors = new WordVectorsImpl();
        vectors.setLookupTable(table);
        vectors.setVocab(vocab);
        return vectors;
    }

    public static WordVectors fromPair(Pair<InMemoryLookupTable, VocabCache> pair) {
        WordVectorsImpl vectors = new WordVectorsImpl();
        vectors.setLookupTable((WeightLookupTable)pair.getFirst());
        vectors.setVocab((VocabCache)pair.getSecond());
        return vectors;
    }

    public static WordVectors loadTxtVectors(File vectorsFile) throws FileNotFoundException {
        Pair<InMemoryLookupTable, VocabCache> pair = WordVectorSerializer.loadTxt(vectorsFile);
        return WordVectorSerializer.fromPair(pair);
    }

    public static Pair<InMemoryLookupTable, VocabCache> loadTxt(File vectorsFile) throws FileNotFoundException {
        BufferedReader write = new BufferedReader(new FileReader(vectorsFile));
        InMemoryLookupCache cache = new InMemoryLookupCache();
        LineIterator iter = IOUtils.lineIterator((Reader)write);
        ArrayList<INDArray> arrays = new ArrayList<INDArray>();
        while (iter.hasNext()) {
            String line = iter.nextLine();
            String[] split = line.split(" ");
            String word = split[0];
            VocabWord word1 = new VocabWord(1.0, word);
            cache.addToken(word1);
            cache.addWordToIndex(cache.numWords(), word);
            word1.setIndex(cache.numWords());
            cache.putVocabWord(word);
            INDArray row = Nd4j.create((DataBuffer)Nd4j.createBuffer((int)(split.length - 1)));
            for (int i = 1; i < split.length; ++i) {
                row.putScalar(i - 1, Float.parseFloat(split[i]));
            }
            arrays.add(row);
        }
        INDArray syn = Nd4j.create((int[])new int[]{arrays.size(), ((INDArray)arrays.get(0)).columns()});
        for (int i = 0; i < syn.rows(); ++i) {
            syn.putRow(i, (INDArray)arrays.get(i));
        }
        InMemoryLookupTable lookupTable = (InMemoryLookupTable)new InMemoryLookupTable.Builder().vectorLength(((INDArray)arrays.get(0)).columns()).useAdaGrad(false).cache(cache).build();
        Nd4j.clearNans((INDArray)syn);
        lookupTable.setSyn0(syn);
        iter.close();
        return new Pair((Object)lookupTable, (Object)cache);
    }

    public static void writeTsneFormat(Glove vec, INDArray tsne, File csv) throws Exception {
        BufferedWriter write = new BufferedWriter(new FileWriter(csv));
        int words = 0;
        InMemoryLookupCache l = (InMemoryLookupCache)vec.vocab();
        for (String word : vec.vocab().words()) {
            if (word == null) continue;
            StringBuilder sb = new StringBuilder();
            INDArray wordVector = tsne.getRow(l.wordFor(word).getIndex());
            for (int j = 0; j < wordVector.length(); ++j) {
                sb.append(wordVector.getDouble(j));
                if (j >= wordVector.length() - 1) continue;
                sb.append(",");
            }
            sb.append(",");
            sb.append(word);
            sb.append(" ");
            sb.append("\n");
            write.write(sb.toString());
        }
        log.info("Wrote " + words + " with size of " + vec.lookupTable().getVectorLength());
        write.flush();
        write.close();
    }

    public static void writeTsneFormat(Word2Vec vec, INDArray tsne, File csv) throws Exception {
        BufferedWriter write = new BufferedWriter(new FileWriter(csv));
        int words = 0;
        InMemoryLookupCache l = (InMemoryLookupCache)vec.vocab();
        for (String word : vec.vocab().words()) {
            if (word == null) continue;
            StringBuilder sb = new StringBuilder();
            INDArray wordVector = tsne.getRow(l.wordFor(word).getIndex());
            for (int j = 0; j < wordVector.length(); ++j) {
                sb.append(wordVector.getDouble(j));
                if (j >= wordVector.length() - 1) continue;
                sb.append(",");
            }
            sb.append(",");
            sb.append(word);
            sb.append(" ");
            sb.append("\n");
            write.write(sb.toString());
        }
        log.info("Wrote " + words + " with size of " + vec.lookupTable().layerSize());
        write.flush();
        write.close();
    }
}

