/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.benchmark.byTask.feeds;

import java.io.IOException;
import java.nio.file.Path;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.benchmark.byTask.feeds.DocData;
import org.apache.lucene.benchmark.byTask.feeds.TrecContentSource;
import org.apache.lucene.benchmark.byTask.feeds.TrecFBISParser;
import org.apache.lucene.benchmark.byTask.feeds.TrecFR94Parser;
import org.apache.lucene.benchmark.byTask.feeds.TrecFTParser;
import org.apache.lucene.benchmark.byTask.feeds.TrecGov2Parser;
import org.apache.lucene.benchmark.byTask.feeds.TrecLATimesParser;

public abstract class TrecDocParser {
    public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2;
    static final Map<ParsePathType, TrecDocParser> pathType2Parser = Collections.unmodifiableMap(new EnumMap<ParsePathType, TrecLATimesParser>(Map.of(ParsePathType.GOV2, new TrecGov2Parser(), ParsePathType.FBIS, new TrecFBISParser(), ParsePathType.FR94, new TrecFR94Parser(), ParsePathType.FT, new TrecFTParser(), ParsePathType.LATIMES, new TrecLATimesParser())));
    static final Map<String, ParsePathType> pathName2Type;
    private static final int MAX_PATH_LENGTH = 10;

    public static ParsePathType pathType(Path f) {
        int pathLength = 0;
        while (f != null && f.getFileName() != null && ++pathLength < 10) {
            ParsePathType ppt = pathName2Type.get(f.getFileName().toString().toUpperCase(Locale.ROOT));
            if (ppt != null) {
                return ppt;
            }
            f = f.getParent();
        }
        return DEFAULT_PATH_TYPE;
    }

    public abstract DocData parse(DocData var1, String var2, TrecContentSource var3, StringBuilder var4, ParsePathType var5) throws IOException;

    public static String stripTags(StringBuilder buf, int start) {
        return TrecDocParser.stripTags(buf.substring(start), 0);
    }

    public static String stripTags(String buf, int start) {
        if (start > 0) {
            buf = buf.substring(start);
        }
        return buf.replaceAll("<[^>]*>", " ");
    }

    public static String extract(StringBuilder buf, String startTag, String endTag, int maxPos, String[] noisePrefixes) {
        int k2;
        int k1 = buf.indexOf(startTag);
        if (!(k1 < 0 || maxPos >= 0 && k1 >= maxPos || (k2 = buf.indexOf(endTag, k1 += startTag.length())) < 0 || maxPos >= 0 && k2 >= maxPos)) {
            if (noisePrefixes != null) {
                for (String noise : noisePrefixes) {
                    int k1a = buf.indexOf(noise, k1);
                    if (k1a < 0 || k1a >= k2) continue;
                    k1 = k1a + noise.length();
                }
            }
            return buf.substring(k1, k2).trim();
        }
        return null;
    }

    static {
        HashMap<String, ParsePathType> name2Type = new HashMap<String, ParsePathType>();
        for (ParsePathType ppt : ParsePathType.values()) {
            name2Type.put(ppt.name().toUpperCase(Locale.ROOT), ppt);
        }
        pathName2Type = Collections.unmodifiableMap(name2Type);
    }

    public static enum ParsePathType {
        GOV2,
        FBIS,
        FT,
        FR94,
        LATIMES;

    }
}

