/*
 * Decompiled with CFR 0.152.
 */
package edu.unh.cs.treccar_v2.playground;

import co.nstant.in.cbor.CborException;
import edu.unh.cs.treccar_v2.Data;
import edu.unh.cs.treccar_v2.read_data.DeserializeData;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;

public class SimpleCarTrainData_old {
    public Set<String> forbiddenHeadings = new HashSet<String>();

    public SimpleCarTrainData_old() {
        this.forbiddenHeadings.add("see also");
        this.forbiddenHeadings.add("references");
        this.forbiddenHeadings.add("external links");
        this.forbiddenHeadings.add("notes");
        this.forbiddenHeadings.add("bibliography");
        this.forbiddenHeadings.add("gallery");
        this.forbiddenHeadings.add("publications");
        this.forbiddenHeadings.add("further reading");
        this.forbiddenHeadings.add("track listing");
        this.forbiddenHeadings.add("sources");
        this.forbiddenHeadings.add("cast");
        this.forbiddenHeadings.add("discography");
        this.forbiddenHeadings.add("awards");
    }

    public List<InstanceWithNegatives> extractTrainData(FileInputStream fileInputStream) throws CborException, IOException {
        ArrayList<InstanceWithNegatives> megaresult = new ArrayList<InstanceWithNegatives>();
        for (Data.Page page : DeserializeData.iterableAnnotations(fileInputStream)) {
            try {
                List<Instance> result = this.getInstances(page);
                for (Instance instance1 : result) {
                    InstanceWithNegatives instanceWithNegatives = new InstanceWithNegatives(instance1);
                    Set<Instance> paras = SimpleCarTrainData_old.drawRandomParagraphs(result, 4, instance1.query.getSectionPath());
                    if (paras.size() == 4) {
                        for (Instance instance2 : paras) {
                            instanceWithNegatives.addNegativeParagraph(instance2.paragraphContent);
                        }
                        megaresult.add(instanceWithNegatives);
                        continue;
                    }
                    System.err.println("Could not draw 4 elements from page " + page.getPageName());
                    System.out.println("instanceWithNegatives = " + instanceWithNegatives);
                }
            }
            catch (NotEnoughNegativesException ex) {
                System.err.println("Not enough negatives for sectionpathlist " + ex.getNotSectionPathPrefix() + " in page " + page.getPageName());
            }
        }
        fileInputStream.close();
        return megaresult;
    }

    public List<JudgedInstance> extractTestData(FileInputStream fileInputStream) throws CborException, IOException {
        ArrayList<JudgedInstance> megaresult = new ArrayList<JudgedInstance>();
        for (Data.Page page : DeserializeData.iterableAnnotations(fileInputStream)) {
            try {
                List<Instance> result = this.getInstances(page);
                for (Instance instance : result) {
                    Set<Instance> paras = SimpleCarTrainData_old.drawRandomParagraphs(result, 4, instance.query.getSectionPath());
                    if (paras.size() != 4) continue;
                    for (Instance negInstance : paras) {
                        JudgedInstance negative = new JudgedInstance(instance.query, negInstance.paragraphId, negInstance.paragraphContent, JudgedInstance.Judgment.SameArticleWrongSection);
                        megaresult.add(negative);
                    }
                    JudgedInstance positive = new JudgedInstance(instance, JudgedInstance.Judgment.Relevant);
                    megaresult.add(positive);
                }
            }
            catch (NotEnoughNegativesException ex) {
                System.err.println("Not enough negatives for sectionpathlist " + ex.getNotSectionPathPrefix() + " in page " + page.getPageName());
            }
        }
        fileInputStream.close();
        return megaresult;
    }

    public Map<String, Query> extractQueries(FileInputStream fileInputStream) throws CborException, IOException {
        HashMap<String, Query> queryMap = new HashMap<String, Query>();
        for (Data.Page page : DeserializeData.iterableAnnotations(fileInputStream)) {
            for (List<Data.Section> sectionPath : page.flatSectionPaths()) {
                Query q = new Query(page.getPageName(), Data.sectionPathHeadings(sectionPath), Data.sectionPathId(page.getPageId(), sectionPath));
                queryMap.put(q.getQueryId(), q);
            }
        }
        return queryMap;
    }

    public List<Instance> extractClusteringData(FileInputStream fileInputStream) throws CborException, IOException {
        ArrayList<Instance> megaresult = new ArrayList<Instance>();
        for (Data.Page page : DeserializeData.iterableAnnotations(fileInputStream)) {
            try {
                List<Instance> result = this.getInstances(page);
                megaresult.addAll(result);
            }
            catch (NotEnoughNegativesException ex) {
                System.err.println("Not enough negatives for sectionpathlist " + ex.getNotSectionPathPrefix() + " in page " + page.getPageName());
            }
        }
        fileInputStream.close();
        return megaresult;
    }

    private List<Instance> getInstances(Data.Page page) throws NotEnoughNegativesException {
        ArrayList<Instance> result = new ArrayList<Instance>();
        for (Data.Page.SectionPathParagraphs sectpara : page.flatSectionPathsParagraphs()) {
            List<Data.Section> sectionpathList = sectpara.getSectionPath();
            boolean isExcludeItem = false;
            if (sectpara.getSectionPath().isEmpty()) {
                isExcludeItem = true;
            }
            for (Data.Section section : sectpara.getSectionPath()) {
                if (!this.forbiddenHeadings.contains(section.getHeading().toLowerCase())) continue;
                isExcludeItem = true;
            }
            String paraId = sectpara.getParagraph().getParaId();
            String paratext = sectpara.getParagraph().getTextOnly();
            if (paratext.length() < 10) {
                isExcludeItem = true;
            }
            if (isExcludeItem) continue;
            Instance line = new Instance(new Query(page.getPageName(), Data.sectionPathHeadings(sectionpathList), Data.sectionPathId(page.getPageId(), sectionpathList)), paraId, paratext);
            result.add(line);
        }
        return this.filterInstancesWithFewNegatives(result, 4);
    }

    private List<Instance> filterInstancesWithFewNegatives(List<Instance> result, int minNegs) throws NotEnoughNegativesException {
        int count;
        HashMap<String, Integer> sectionCounts = new HashMap<String, Integer>();
        for (Instance instance : result) {
            String sectionpath = instance.query.getSectionPath();
            count = 0;
            if (sectionCounts.containsKey(sectionpath)) {
                count = (Integer)sectionCounts.get(sectionpath);
            }
            sectionCounts.put(sectionpath, count + 1);
        }
        int totalCount = result.size();
        for (String sectionpath : sectionCounts.keySet()) {
            count = 0;
            for (String sectionOther : sectionCounts.keySet()) {
                if (!sectionOther.startsWith(sectionpath)) continue;
                count += ((Integer)sectionCounts.get(sectionOther)).intValue();
            }
            if (totalCount - count >= minNegs) continue;
            throw new NotEnoughNegativesException(sectionpath);
        }
        return result;
    }

    private static Set<Instance> drawRandomParagraphs(List<Instance> lines, int draws, String notSectionPathPrefix) throws NotEnoughNegativesException {
        HashSet<Instance> negativeHash = new HashSet<Instance>();
        ArrayList<Instance> negatives = new ArrayList<Instance>();
        for (Instance line : lines) {
            if (line.query.getSectionPath().startsWith(notSectionPathPrefix) || negativeHash.contains(line)) continue;
            negatives.add(line);
            negativeHash.add(line);
        }
        Collections.shuffle(negatives);
        if (negatives.size() < draws) {
            System.out.println("negatives.size() < draws; negatives.size()=" + negatives.size());
        }
        HashSet<Instance> samples = new HashSet<Instance>();
        samples.addAll(negatives.subList(0, Math.min(draws, negatives.size())));
        return samples;
    }

    public static void main(String[] args) throws IOException, CborException {
        System.setProperty("file.encoding", "UTF-8");
        String cborArticleInputFile = args[0];
        String trainingOutputFile = args[1];
        String testOutputFile = args[2];
        String clusterOutputFile = args[3];
        String qrelsOutputFile = args[4];
        System.out.println("hashing query ids");
        SimpleCarTrainData_old wikistein = new SimpleCarTrainData_old();
        FileInputStream fileInputStream = new FileInputStream(new File(cborArticleInputFile));
        Map<String, Query> queryMap = wikistein.extractQueries(fileInputStream);
        System.out.println("training");
        wikistein = new SimpleCarTrainData_old();
        fileInputStream = new FileInputStream(new File(cborArticleInputFile));
        List<InstanceWithNegatives> trainData = wikistein.extractTrainData(fileInputStream);
        BufferedWriter trainWriter = new BufferedWriter(new FileWriter(new File(trainingOutputFile)));
        for (InstanceWithNegatives instanceWithNegatives : trainData) {
            trainWriter.write(instanceWithNegatives.toTsvLine());
            trainWriter.newLine();
        }
        trainWriter.close();
        System.out.println("testing");
        wikistein = new SimpleCarTrainData_old();
        fileInputStream = new FileInputStream(new File(cborArticleInputFile));
        List<Instance> testData = wikistein.extractTestData(fileInputStream);
        BufferedWriter testWriter = new BufferedWriter(new FileWriter(new File(testOutputFile)));
        for (JudgedInstance judgedInstance : testData) {
            testWriter.write(judgedInstance.toTsvLine());
            testWriter.newLine();
        }
        testWriter.close();
        System.out.println("cluster");
        wikistein = new SimpleCarTrainData_old();
        fileInputStream = new FileInputStream(new File(cborArticleInputFile));
        testData = wikistein.extractClusteringData(fileInputStream);
        BufferedWriter clusterWriter = new BufferedWriter(new FileWriter(new File(clusterOutputFile)));
        for (Instance instance : testData) {
            clusterWriter.write(instance.toTsvLine());
            clusterWriter.newLine();
        }
        clusterWriter.close();
        System.out.println("qrels");
        wikistein = new SimpleCarTrainData_old();
        fileInputStream = new FileInputStream(new File(cborArticleInputFile));
        testData = wikistein.extractClusteringData(fileInputStream);
        BufferedWriter qrelsWriter = new BufferedWriter(new FileWriter(new File(qrelsOutputFile)));
        for (Instance instance : testData) {
            qrelsWriter.write(instance.toQrelsLine());
            qrelsWriter.newLine();
        }
        qrelsWriter.close();
    }

    private static class NotEnoughNegativesException
    extends Throwable {
        private final String notSectionPathPrefix;

        public NotEnoughNegativesException(String notSectionPathPrefix) {
            this.notSectionPathPrefix = notSectionPathPrefix;
        }

        public String getNotSectionPathPrefix() {
            return this.notSectionPathPrefix;
        }
    }

    static class Query {
        private final String pagename;
        private final List<String> sectionpathlist;
        private final String sectionpath;
        private String queryId;

        public Query(String pagename, List<String> sectionpathlist, String queryId) {
            this.pagename = pagename;
            this.sectionpathlist = sectionpathlist;
            this.sectionpath = StringUtils.join(sectionpathlist, (String)" ");
            this.queryId = queryId;
        }

        public String getSectionPath() {
            return this.sectionpath;
        }

        public String getPagename() {
            return this.pagename;
        }

        public List<String> getSectionPathList() {
            return this.sectionpathlist;
        }

        public String getQueryId() {
            return this.queryId;
        }

        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (!(o instanceof Query)) {
                return false;
            }
            Query query = (Query)o;
            return this.getQueryId() != null ? this.getQueryId().equals(query.getQueryId()) : query.getQueryId() == null;
        }

        public int hashCode() {
            return this.getQueryId() != null ? this.getQueryId().hashCode() : 0;
        }
    }

    public static class InstanceWithNegatives
    extends Instance {
        protected List<String> negativeParagraphs = new ArrayList<String>();

        public InstanceWithNegatives(Query query, String paragraphId, String paragraphContent) {
            super(query, paragraphId, paragraphContent);
        }

        public InstanceWithNegatives(Instance instance) {
            this(instance.query, instance.paragraphId, instance.paragraphContent);
        }

        public void addNegativeParagraph(String paragraphContent) {
            this.negativeParagraphs.add(paragraphContent.replaceAll("[\n\t\r]", " "));
        }

        @Override
        public List<String> toTsvSeqments() {
            ArrayList<String> result = new ArrayList<String>();
            result.add(this.query.getQueryId());
            result.add(this.query.getPagename());
            result.add(this.query.getSectionPath());
            result.add(this.paragraphContent);
            result.addAll(this.negativeParagraphs);
            return result;
        }

        @Override
        public String toTsvLine() {
            return StringUtils.join(this.toTsvSeqments(), (String)"\t");
        }
    }

    public static class Instance {
        protected final Query query;
        protected String paragraphId;
        protected String paragraphContent;

        public Instance(Query query, String paragraphId, String paragraphContent) {
            this.query = query;
            this.paragraphId = paragraphId;
            this.paragraphContent = paragraphContent.replaceAll("[\n\t\r]", " ");
            this.paragraphContent = paragraphContent;
        }

        public List<String> toTsvSeqments() {
            ArrayList<String> result = new ArrayList<String>();
            result.add(this.query.queryId);
            result.add(this.query.pagename);
            result.add(this.query.sectionpath);
            result.add(this.paragraphId);
            result.add(this.paragraphContent);
            return result;
        }

        public String toTsvLine() {
            return StringUtils.join(this.toTsvSeqments(), (String)"\t");
        }

        public String toQrelsLine() {
            ArrayList<String> result = new ArrayList<String>();
            result.add(this.query.getQueryId());
            result.add("0");
            result.add(this.paragraphId);
            result.add("1");
            return StringUtils.join(result, (String)" ");
        }

        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || this.getClass() != o.getClass()) {
                return false;
            }
            Instance instance = (Instance)o;
            return this.paragraphId != null ? this.paragraphId.equals(instance.paragraphId) : instance.paragraphId == null;
        }

        public int hashCode() {
            return this.paragraphId != null ? this.paragraphId.hashCode() : 0;
        }
    }

    public static class JudgedInstance
    extends Instance {
        protected Judgment judgment = Judgment.WrongArticle;

        public JudgedInstance(Query query, String paragraphId, String paragraphContent, Judgment judgment) {
            super(query, paragraphId, paragraphContent);
            this.judgment = judgment;
        }

        public JudgedInstance(Instance instance, Judgment judgment) {
            this(instance.query, instance.paragraphId, instance.paragraphContent, judgment);
        }

        public Judgment getJudgment() {
            return this.judgment;
        }

        @Override
        public List<String> toTsvSeqments() {
            ArrayList<String> result = new ArrayList<String>();
            result.add(this.query.getQueryId());
            result.add(this.query.getPagename());
            result.add(this.query.getSectionPath());
            result.add(this.paragraphId);
            result.add(this.paragraphContent);
            result.add(this.judgment.toString());
            return result;
        }

        @Override
        public String toTsvLine() {
            return StringUtils.join(this.toTsvSeqments(), (String)"\t");
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (!(o instanceof JudgedInstance)) {
                return false;
            }
            if (!super.equals(o)) {
                return false;
            }
            JudgedInstance that = (JudgedInstance)o;
            if (this.judgment != that.judgment) {
                return false;
            }
            if (this.query != null ? !this.query.equals(this.query) : this.query != null) {
                return false;
            }
            return this.paragraphId != null ? this.paragraphId.equals(this.paragraphId) : this.paragraphId == null;
        }

        @Override
        public int hashCode() {
            int result = super.hashCode();
            result = 31 * result + (this.judgment != null ? this.judgment.hashCode() : 0);
            result = 31 * result + (this.query != null ? this.query.hashCode() : 0);
            result = 31 * result + (this.paragraphId != null ? this.paragraphId.hashCode() : 0);
            return result;
        }

        public static enum Judgment {
            Relevant,
            SameArticleWrongSection,
            WrongArticle;

        }
    }
}

