/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.models.word2vec.loader;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.ArrayList;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.Word2Vec;
import org.deeplearning4j.models.word2vec.wordstore.inmemory.InMemoryLookupCache;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.buffer.FloatBuffer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;

public class Word2VecLoader {
    private static final int MAX_SIZE = 50;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static Word2Vec loadGoogleModel(String path, boolean binary) throws IOException {
        if (binary) {
            InMemoryLookupCache cache;
            float[][] data;
            FilterInputStream dis = null;
            BufferedInputStream bis = null;
            int words = 0;
            int size = 0;
            try {
                bis = new BufferedInputStream(new FileInputStream(path));
                dis = new DataInputStream(bis);
                words = Integer.parseInt(Word2VecLoader.readString((DataInputStream)dis));
                size = Integer.parseInt(Word2VecLoader.readString((DataInputStream)dis));
                data = new float[words][size];
                cache = new InMemoryLookupCache.Builder().vectorLength(size).build();
                for (int i = 0; i < words; ++i) {
                    int j;
                    String word = Word2VecLoader.readString((DataInputStream)dis);
                    float[] vectors = new float[size];
                    double len = 0.0;
                    for (j = 0; j < size; ++j) {
                        float vector = Word2VecLoader.readFloat(dis);
                        len += (double)(vector * vector);
                        vectors[j] = vector;
                    }
                    len = Math.sqrt(len);
                    j = 0;
                    while (j < size) {
                        int n = j++;
                        vectors[n] = (float)((double)vectors[n] / len);
                    }
                    data[i] = vectors;
                    cache.addWordToIndex(cache.numWords(), word);
                    cache.addToken(new VocabWord(1.0, word));
                    cache.putVocabWord(word);
                    dis.read();
                }
            }
            finally {
                bis.close();
                dis.close();
            }
            Word2Vec ret = new Word2Vec();
            cache.resetWeights();
            for (int i = 0; i < data.length; ++i) {
                cache.putVector(cache.wordAtIndex(i), Nd4j.create((DataBuffer)new FloatBuffer(data[i])));
            }
            ret.setCache(cache);
            ret.setLayerSize(size);
            return ret;
        }
        BufferedReader reader = new BufferedReader(new FileReader(new File(path)));
        String line = reader.readLine();
        String[] initial = line.split(" ");
        int words = Integer.parseInt(initial[0]);
        int layerSize = Integer.parseInt(initial[1]);
        InMemoryLookupCache cache = new InMemoryLookupCache.Builder().vectorLength(layerSize).build();
        float[][] data = new float[words][layerSize];
        int count = 0;
        while ((line = reader.readLine()) != null) {
            String[] split = line.split(" ");
            String word = split[0];
            float[] buffer = new float[layerSize];
            for (int i = 1; i < split.length; ++i) {
                buffer[i - 1] = Float.parseFloat(split[i]);
            }
            data[count++] = buffer;
            cache.addWordToIndex(cache.numWords(), word);
            cache.addToken(new VocabWord(1.0, word));
            cache.putVocabWord(word);
        }
        cache.resetWeights();
        for (int i = 0; i < data.length; ++i) {
            cache.putVector(cache.wordAtIndex(i), Nd4j.create((DataBuffer)new FloatBuffer(data[i])));
        }
        Word2Vec ret = new Word2Vec();
        ret.setCache(cache);
        ret.setLayerSize(layerSize);
        return ret;
    }

    private static String readString(DataInputStream dis) throws IOException {
        byte[] bytes = new byte[50];
        byte b = dis.readByte();
        int i = -1;
        StringBuilder sb = new StringBuilder();
        while (b != 32 && b != 10) {
            bytes[++i] = b;
            b = dis.readByte();
            if (i != 49) continue;
            sb.append(new String(bytes));
            i = -1;
            bytes = new byte[50];
        }
        sb.append(new String(bytes, 0, i + 1));
        return sb.toString();
    }

    public static float readFloat(InputStream is) throws IOException {
        byte[] bytes = new byte[4];
        is.read(bytes);
        return Word2VecLoader.getFloat(bytes);
    }

    public static float getFloat(byte[] b) {
        int accum = 0;
        accum |= (b[0] & 0xFF) << 0;
        accum |= (b[1] & 0xFF) << 8;
        accum |= (b[2] & 0xFF) << 16;
        return Float.intBitsToFloat(accum |= (b[3] & 0xFF) << 24);
    }

    public static void writeWordVectors(InMemoryLookupCache l, String path) throws IOException {
        BufferedWriter write = new BufferedWriter(new FileWriter(new File(path), false));
        boolean words = false;
        for (int i = 0; i < l.getSyn0().rows(); ++i) {
            String word = l.wordAtIndex(i);
            if (word == null) continue;
            StringBuffer sb = new StringBuffer();
            sb.append(word);
            sb.append(" ");
            INDArray wordVector = l.vector(word);
            for (int j = 0; j < wordVector.length(); ++j) {
                sb.append(wordVector.getDouble(j));
                if (j >= wordVector.length() - 1) continue;
                sb.append(" ");
            }
            sb.append("\n");
            write.write(sb.toString());
        }
        write.flush();
        write.close();
    }

    public static void writeWordVectors(Word2Vec vec, String path) throws IOException {
        BufferedWriter write = new BufferedWriter(new FileWriter(new File(path), false));
        int words = 0;
        for (String word : vec.getCache().words()) {
            if (word == null) continue;
            StringBuffer sb = new StringBuffer();
            sb.append(word);
            sb.append(" ");
            INDArray wordVector = vec.getWordVectorMatrix(word);
            for (int j = 0; j < wordVector.length(); ++j) {
                sb.append(wordVector.getDouble(j));
                if (j >= wordVector.length() - 1) continue;
                sb.append(" ");
            }
            sb.append("\n");
            write.write(sb.toString());
        }
        System.out.println("Wrote " + words + " with size of " + vec.getLayerSize());
        write.flush();
        write.close();
    }

    public static InMemoryLookupCache loadTxt(File path) throws FileNotFoundException {
        BufferedReader write = new BufferedReader(new FileReader(path));
        InMemoryLookupCache l = new InMemoryLookupCache.Builder().vectorLength(100).useAdaGrad(false).build();
        LineIterator iter = IOUtils.lineIterator((Reader)write);
        ArrayList<INDArray> arrays = new ArrayList<INDArray>();
        while (iter.hasNext()) {
            String line = iter.nextLine();
            String[] split = line.split(" ");
            String word = split[0];
            VocabWord word1 = new VocabWord(1.0, word);
            l.addToken(word1);
            l.addWordToIndex(l.numWords(), word);
            l.putVocabWord(word);
            INDArray row = Nd4j.create((DataBuffer)new FloatBuffer(split.length - 1));
            for (int i = 1; i < split.length; ++i) {
                row.putScalar(i - 1, Float.parseFloat(split[i]));
            }
            arrays.add(row);
        }
        INDArray syn = Nd4j.create((int[])new int[]{arrays.size(), ((INDArray)arrays.get(0)).columns()});
        for (int i = 0; i < syn.rows(); ++i) {
            syn.putRow(i, (INDArray)arrays.get(i));
        }
        l.setSyn0(syn);
        iter.close();
        return l;
    }

    public static void writeTsneFormat(Word2Vec vec, INDArray tsne, File csv) throws Exception {
        BufferedWriter write = new BufferedWriter(new FileWriter(csv));
        int words = 0;
        InMemoryLookupCache l = (InMemoryLookupCache)vec.getCache();
        for (String word : vec.getCache().words()) {
            if (word == null) continue;
            StringBuffer sb = new StringBuffer();
            INDArray wordVector = tsne.getRow(l.wordFor(word).getIndex());
            for (int j = 0; j < wordVector.length(); ++j) {
                sb.append(wordVector.getDouble(j));
                if (j >= wordVector.length() - 1) continue;
                sb.append(",");
            }
            sb.append(",");
            sb.append(word);
            sb.append(" ");
            sb.append("\n");
            write.write(sb.toString());
        }
        System.out.println("Wrote " + words + " with size of " + vec.getLayerSize());
        write.flush();
        write.close();
    }
}

