/*
 * Decompiled with CFR 0.152.
 */
package com.alibaba.cloud.ai.parser.bshtml;

import com.alibaba.cloud.ai.document.DocumentParser;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.parser.Parser;
import org.springframework.ai.document.Document;

public class BsHtmlDocumentParser
implements DocumentParser {
    private final String charsetName;
    private final String baseUri;
    private final Parser parser;

    public BsHtmlDocumentParser(Parser parser) {
        this("UTF-8", "", parser);
    }

    public BsHtmlDocumentParser(String charsetName, String baseUri) {
        this(charsetName, baseUri, null);
    }

    public BsHtmlDocumentParser() {
        this("UTF-8", "", Parser.htmlParser().newInstance());
    }

    public BsHtmlDocumentParser(String charsetName, String baseUri, Parser parser) {
        this.charsetName = charsetName;
        this.baseUri = baseUri;
        this.parser = parser;
    }

    public List<Document> parse(InputStream inputStream) {
        try {
            org.jsoup.nodes.Document doc = Jsoup.parse((InputStream)inputStream, (String)this.charsetName, (String)this.baseUri, (Parser)this.parser);
            String text = doc.text();
            String title = doc.title().isEmpty() ? "" : doc.title();
            Document document = new Document(text);
            Map metaData = document.getMetadata();
            metaData.put("title", title);
            metaData.put("source", this.baseUri);
            metaData.put("originalDocument", doc);
            return List.of(document);
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}

