/*
 * Decompiled with CFR 0.152.
 */
package edu.unh.cs.treccar_v2.playground;

import co.nstant.in.cbor.CborException;
import edu.unh.cs.treccar_v2.Data;
import edu.unh.cs.treccar_v2.read_data.DeserializeData;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;

public class LinksWithContextKeywords_ {
    private static boolean paragraphTextContainsKeyword(Data.Paragraph para, List<String> keywords) {
        String normtext = para.getTextOnly().toLowerCase();
        for (String keyword : keywords) {
            if (!normtext.contains(keyword)) continue;
            return true;
        }
        return false;
    }

    private List<LinkInstance> getInstances(Data.Page page, List<String> keywords, boolean addParagraph, boolean filterByKeyword) {
        ArrayList<LinkInstance> result = new ArrayList<LinkInstance>();
        for (Data.Page.SectionPathParagraphs sectparas : page.flatSectionPathsParagraphs()) {
            if (filterByKeyword && !LinksWithContextKeywords_.paragraphTextContainsKeyword(sectparas.getParagraph(), keywords)) continue;
            for (String toPage : sectparas.getParagraph().getEntitiesOnly()) {
                String text = "";
                if (addParagraph) {
                    text = sectparas.getParagraph().getTextOnly();
                }
                String sectPath = StringUtils.join(Data.sectionPathHeadings(sectparas.getSectionPath()), (String)" ");
                result.add(new LinkInstance(page.getPageName(), sectPath, toPage, text));
            }
        }
        return result;
    }

    public static void main(String[] args) throws IOException, CborException {
        System.setProperty("file.encoding", "UTF-8");
        String cborArticleInputFile = args[0];
        String linkOutputFile = args[1];
        ArrayList<String> keywords = new ArrayList<String>();
        int argsLength = args.length;
        for (int i = 2; i < argsLength; ++i) {
            String arg = args[i];
            keywords.add(arg.trim().toLowerCase());
        }
        boolean addParagraph = false;
        boolean filterByKeyword = true;
        if (filterByKeyword) {
            System.out.println("extract links with keyword " + keywords + " from file " + cborArticleInputFile);
        } else {
            System.out.println("extract all links from " + cborArticleInputFile);
        }
        LinksWithContextKeywords_ extract = new LinksWithContextKeywords_();
        FileInputStream fileInputStream = new FileInputStream(new File(cborArticleInputFile));
        BufferedWriter writer = new BufferedWriter(new FileWriter(new File(linkOutputFile)));
        for (Data.Page page : DeserializeData.iterableAnnotations(fileInputStream)) {
            List<LinkInstance> result = extract.getInstances(page, keywords, addParagraph, filterByKeyword);
            for (LinkInstance line : result) {
                writer.write(line.toTsvLine());
                writer.newLine();
            }
        }
        fileInputStream.close();
        writer.close();
    }

    public static class LinkInstance {
        protected String fromPage;
        protected String sectionpath;
        protected String toPage;
        protected String paragraphContent;

        public LinkInstance(String fromPage, String sectionpath, String toPage, String paragraphContent) {
            this.fromPage = fromPage;
            this.sectionpath = sectionpath;
            this.toPage = toPage;
            this.paragraphContent = paragraphContent.replaceAll("[\n\t\r]", " ");
        }

        public List<String> toTsvSeqments() {
            ArrayList<String> result = new ArrayList<String>();
            result.add(this.fromPage);
            result.add(this.toPage);
            result.add(this.sectionpath);
            result.add(this.paragraphContent);
            return result;
        }

        public String toTsvLine() {
            return StringUtils.join(this.toTsvSeqments(), (String)"\t");
        }

        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (!(o instanceof LinkInstance)) {
                return false;
            }
            LinkInstance instance = (LinkInstance)o;
            if (this.fromPage != null ? !this.fromPage.equals(instance.fromPage) : instance.fromPage != null) {
                return false;
            }
            if (this.sectionpath != null ? !this.sectionpath.equals(instance.sectionpath) : instance.sectionpath != null) {
                return false;
            }
            return this.toPage != null ? this.toPage.equals(instance.toPage) : instance.toPage == null;
        }

        public int hashCode() {
            int result = this.fromPage != null ? this.fromPage.hashCode() : 0;
            result = 31 * result + (this.sectionpath != null ? this.sectionpath.hashCode() : 0);
            result = 31 * result + (this.toPage != null ? this.toPage.hashCode() : 0);
            return result;
        }
    }
}

