/*
 * Decompiled with CFR 0.152.
 */
package com.embabel.agent.rag.ingestion;

import com.embabel.agent.rag.ingestion.ContentFormatParser;
import com.embabel.agent.rag.ingestion.ContentFormatParserUtils;
import com.embabel.agent.rag.ingestion.PlainTextContentParser;
import com.embabel.agent.rag.model.LeafSection;
import com.embabel.agent.rag.model.MaterializedDocument;
import com.embabel.agent.rag.model.NavigableSection;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.collections.IntIterator;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.ranges.RangesKt;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import kotlin.text.MatchResult;
import kotlin.text.Regex;
import kotlin.text.RegexOption;
import kotlin.text.StringsKt;
import org.apache.tika.metadata.TikaCoreProperties;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;

@Metadata(mv={2, 1, 0}, k=1, xi=48, d1={"\u0000,\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u000e\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\b\u0000\u0018\u00002\u00020\u0001B\u0017\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0005\u00a2\u0006\u0004\b\u0006\u0010\u0007J \u0010\b\u001a\u00020\t2\u0006\u0010\n\u001a\u00020\u000b2\u0006\u0010\f\u001a\u00020\r2\u0006\u0010\u000e\u001a\u00020\u000bH\u0016R\u000e\u0010\u0002\u001a\u00020\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0004\u001a\u00020\u0005X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u000f"}, d2={"Lcom/embabel/agent/rag/ingestion/HtmlContentParser;", "Lcom/embabel/agent/rag/ingestion/ContentFormatParser;", "logger", "Lorg/slf4j/Logger;", "plainTextParser", "Lcom/embabel/agent/rag/ingestion/PlainTextContentParser;", "<init>", "(Lorg/slf4j/Logger;Lcom/embabel/agent/rag/ingestion/PlainTextContentParser;)V", "parse", "Lcom/embabel/agent/rag/model/MaterializedDocument;", "content", "", "metadata", "Lorg/apache/tika/metadata/Metadata;", "uri", "embabel-agent-rag-tika"})
@SourceDebugExtension(value={"SMAP\nHtmlContentParser.kt\nKotlin\n*S Kotlin\n*F\n+ 1 HtmlContentParser.kt\ncom/embabel/agent/rag/ingestion/HtmlContentParser\n+ 2 fake.kt\nkotlin/jvm/internal/FakeKt\n+ 3 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,129:1\n1#2:130\n774#3:131\n865#3,2:132\n1863#3,2:134\n*S KotlinDebug\n*F\n+ 1 HtmlContentParser.kt\ncom/embabel/agent/rag/ingestion/HtmlContentParser\n*L\n95#1:131\n95#1:132,2\n95#1:134,2\n*E\n"})
public final class HtmlContentParser
implements ContentFormatParser {
    @NotNull
    private final Logger logger;
    @NotNull
    private final PlainTextContentParser plainTextParser;

    public HtmlContentParser(@NotNull Logger logger, @NotNull PlainTextContentParser plainTextParser) {
        Intrinsics.checkNotNullParameter((Object)logger, (String)"logger");
        Intrinsics.checkNotNullParameter((Object)plainTextParser, (String)"plainTextParser");
        this.logger = logger;
        this.plainTextParser = plainTextParser;
    }

    /*
     * WARNING - void declaration
     */
    @Override
    @NotNull
    public MaterializedDocument parse(@NotNull String content, @NotNull org.apache.tika.metadata.Metadata metadata, @NotNull String uri) {
        Intrinsics.checkNotNullParameter((Object)content, (String)"content");
        Intrinsics.checkNotNullParameter((Object)metadata, (String)"metadata");
        Intrinsics.checkNotNullParameter((Object)uri, (String)"uri");
        Regex headingPattern = new Regex("<h([1-6])[^>]*>(.*?)</h\\1>", RegexOption.IGNORE_CASE);
        List headingMatches = SequencesKt.toList((Sequence)Regex.findAll$default((Regex)headingPattern, (CharSequence)content, (int)0, (int)2, null));
        if (headingMatches.isEmpty()) {
            CharSequence charSequence = content;
            Regex regex = new Regex("<[^>]+>");
            String string = " ";
            charSequence = regex.replace(charSequence, string);
            regex = new Regex("\\s+");
            string = " ";
            String cleanContent = ((Object)StringsKt.trim((CharSequence)regex.replace(charSequence, string))).toString();
            return this.plainTextParser.parse(cleanContent, metadata, uri);
        }
        List leafSections = new ArrayList();
        String string = UUID.randomUUID().toString();
        Intrinsics.checkNotNullExpressionValue((Object)string, (String)"toString(...)");
        String rootId = string;
        Map sectionStack = new LinkedHashMap();
        int n = ((Collection)headingMatches).size();
        for (int i = 0; i < n; ++i) {
            void $this$forEach$iv;
            void $this$filterTo$iv$iv;
            String sectionId;
            Iterator iterator;
            String string2;
            String string3;
            int endIdx;
            MatchResult match = (MatchResult)headingMatches.get(i);
            int level = Integer.parseInt((String)match.getGroupValues().get(1));
            CharSequence charSequence = (CharSequence)match.getGroupValues().get(2);
            Regex regex = new Regex("<[^>]+>");
            String string4 = "";
            charSequence = regex.replace(charSequence, string4);
            regex = new Regex("\\s+");
            string4 = " ";
            String title = ((Object)StringsKt.trim((CharSequence)regex.replace(charSequence, string4))).toString();
            int startIdx = match.getRange().getLast() + 1;
            int n2 = endIdx = i + 1 < headingMatches.size() ? ((MatchResult)headingMatches.get(i + 1)).getRange().getFirst() : content.length();
            if (startIdx < endIdx) {
                String string5 = content.substring(startIdx, endIdx);
                string3 = string5;
                Intrinsics.checkNotNullExpressionValue((Object)string5, (String)"substring(...)");
            } else {
                string3 = "";
            }
            String rawContent = string3;
            CharSequence charSequence2 = rawContent;
            Regex regex2 = new Regex("<[^>]+>");
            String string6 = " ";
            charSequence2 = regex2.replace(charSequence2, string6);
            regex2 = new Regex("\\s+");
            string6 = " ";
            String cleanContent = ((Object)StringsKt.trim((CharSequence)regex2.replace(charSequence2, string6))).toString();
            Intrinsics.checkNotNullExpressionValue((Object)UUID.randomUUID().toString(), (String)"toString(...)");
            if (level == 1) {
                string2 = rootId;
            } else if (level > 1) {
                String string7;
                block13: {
                    iterator = ((Iterable)RangesKt.downTo((int)(level - 1), (int)1)).iterator();
                    while (iterator.hasNext()) {
                        int it22 = ((IntIterator)iterator).nextInt();
                        boolean bl = false;
                        String it22 = (String)sectionStack.get(it22);
                        if (it22 == null) continue;
                        string7 = it22;
                        break block13;
                    }
                    string7 = string2 = null;
                }
                if (string7 == null) {
                    string2 = rootId;
                }
            } else {
                string2 = rootId;
            }
            String parentId = string2;
            Integer n3 = level;
            sectionStack.put(n3, sectionId);
            Iterable $this$filter$iv = sectionStack.keySet();
            boolean $i$f$filter = false;
            iterator = $this$filter$iv;
            Collection destination$iv$iv = new ArrayList();
            boolean $i$f$filterTo = false;
            for (Object element$iv$iv : $this$filterTo$iv$iv) {
                int it = ((Number)element$iv$iv).intValue();
                boolean bl = false;
                if (!(it > level)) continue;
                destination$iv$iv.add(element$iv$iv);
            }
            $this$filter$iv = (List)destination$iv$iv;
            boolean $i$f$forEach = false;
            for (Object element$iv : $this$forEach$iv) {
                int it = ((Number)element$iv).intValue();
                boolean bl = false;
                sectionStack.remove(it);
            }
            leafSections.add(ContentFormatParserUtils.INSTANCE.createLeafSection(sectionId, title, cleanContent, parentId, uri, metadata, rootId));
        }
        this.logger.debug("Created {} leaf sections from HTML content", (Object)leafSections.size());
        String string8 = metadata.get(TikaCoreProperties.TITLE);
        if (string8 == null && (string8 = metadata.get("resourceName")) == null) {
            string8 = !((Collection)leafSections).isEmpty() ? ((LeafSection)CollectionsKt.first((List)leafSections)).getTitle() : "Document";
        }
        String documentTitle = string8;
        List<NavigableSection> hierarchicalSections = ContentFormatParserUtils.INSTANCE.buildHierarchy(leafSections, rootId);
        Instant instant = Instant.now();
        Intrinsics.checkNotNullExpressionValue((Object)instant, (String)"now(...)");
        return new MaterializedDocument(rootId, uri, documentTitle, instant, hierarchicalSections, ContentFormatParserUtils.INSTANCE.extractMetadataMap(metadata));
    }
}

