package com.liferay.asset.auto.tagger.opennlp.internal;

import com.liferay.asset.auto.tagger.opennlp.internal.configuration.OpenNLPDocumentAssetAutoTaggerCompanyConfiguration;
import com.liferay.petra.concurrent.DCLSingleton;
import com.liferay.petra.reflect.ReflectionUtil;
import com.liferay.portal.kernel.module.configuration.ConfigurationProvider;
import com.liferay.portal.kernel.util.LocaleUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Set;
import java.util.function.Supplier;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.Span;
import org.osgi.framework.Bundle;
import org.osgi.framework.BundleContext;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;

@Component(service = {OpenNLPDocumentAssetAutoTagger.class})
/* loaded from: input_file:com/liferay/asset/auto/tagger/opennlp/internal/OpenNLPDocumentAssetAutoTagger.class */
public class OpenNLPDocumentAssetAutoTagger {
    private static final Set<String> _supportedContentTypes = new HashSet(Arrays.asList("application/epub+zip", "application/vnd.apple.pages.13", "application/vnd.google-apps.document", "application/vnd.oasis.opendocument.text", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/msword", "application/pdf", "application/text", "text", "text/plain", "text/html", "text/html; charset=UTF-8"));
    private Bundle _bundle;

    @Reference
    private ConfigurationProvider _configurationProvider;
    private final DCLSingleton<SentenceModel> _sentenceModelDCLSingleton = new DCLSingleton<>();
    private final DCLSingleton<TokenizerModel> _tokenizerModelDCLSingleton = new DCLSingleton<>();
    private final DCLSingleton<List<TokenNameFinderModel>> _tokenNameFinderModelsDCLSingleton = new DCLSingleton<>();

    public Collection<String> getTagNames(long j, String str, Locale locale, String str2) throws Exception {
        return getTagNames(j, () -> {
            return str;
        }, locale, str2);
    }

    public Collection<String> getTagNames(long j, String str, String str2) throws Exception {
        return getTagNames(j, str, (Locale) null, str2);
    }

    public Collection<String> getTagNames(long j, Supplier<String> supplier, Locale locale, String str) throws Exception {
        if ((Objects.nonNull(locale) && !Objects.equals(locale.getLanguage(), LocaleUtil.ENGLISH.getLanguage())) || !_supportedContentTypes.contains(str)) {
            return Collections.emptyList();
        }
        SentenceDetectorME sentenceDetectorME = new SentenceDetectorME((SentenceModel) this._sentenceModelDCLSingleton.getSingleton(this::_createSentenceModel));
        TokenizerME tokenizerME = new TokenizerME((TokenizerModel) this._tokenizerModelDCLSingleton.getSingleton(this::_createTokenizerModel));
        List<TokenNameFinderModel> list = (List) this._tokenNameFinderModelsDCLSingleton.getSingleton(this::_createTokenNameFinderModels);
        OpenNLPDocumentAssetAutoTaggerCompanyConfiguration openNLPDocumentAssetAutoTaggerCompanyConfiguration = (OpenNLPDocumentAssetAutoTaggerCompanyConfiguration) this._configurationProvider.getCompanyConfiguration(OpenNLPDocumentAssetAutoTaggerCompanyConfiguration.class, j);
        HashSet hashSet = new HashSet();
        for (String str2 : sentenceDetectorME.sentDetect(supplier.get())) {
            Collections.addAll(hashSet, _getTagNames(list, tokenizerME.tokenize(str2), openNLPDocumentAssetAutoTaggerCompanyConfiguration.confidenceThreshold()));
        }
        return hashSet;
    }

    @Activate
    protected void activate(BundleContext bundleContext) throws IOException {
        this._bundle = bundleContext.getBundle();
    }

    private SentenceModel _createSentenceModel() {
        try {
            return new SentenceModel(this._bundle.getResource("org.apache.opennlp.model.en.sent.bin"));
        } catch (IOException e) {
            return (SentenceModel) ReflectionUtil.throwException(e);
        }
    }

    private TokenizerModel _createTokenizerModel() {
        try {
            return new TokenizerModel(this._bundle.getResource("org.apache.opennlp.model.en.token.bin"));
        } catch (IOException e) {
            return (TokenizerModel) ReflectionUtil.throwException(e);
        }
    }

    private List<TokenNameFinderModel> _createTokenNameFinderModels() {
        try {
            return Arrays.asList(new TokenNameFinderModel(this._bundle.getResource("org.apache.opennlp.model.en.ner.location.bin")), new TokenNameFinderModel(this._bundle.getResource("org.apache.opennlp.model.en.ner.organization.bin")), new TokenNameFinderModel(this._bundle.getResource("org.apache.opennlp.model.en.ner.person.bin")));
        } catch (IOException e) {
            return (List) ReflectionUtil.throwException(e);
        }
    }

    private String[] _getTagNames(List<TokenNameFinderModel> list, String[] strArr, double d) {
        ArrayList arrayList = new ArrayList();
        Iterator<TokenNameFinderModel> it = list.iterator();
        while (it.hasNext()) {
            for (Span span : new NameFinderME(it.next()).find(strArr)) {
                if (span.getProb() > d) {
                    arrayList.add(span);
                }
            }
        }
        return Span.spansToStrings((Span[]) arrayList.toArray(new Span[0]), strArr);
    }
}
