/*
 * Decompiled with CFR 0.152.
 */
package com.atlassian.confluence.extra.officeconnector.index.word;

import com.atlassian.bonnie.search.Extractor;
import com.atlassian.bonnie.search.SearchableAttachment;
import com.atlassian.bonnie.search.extractor.BaseAttachmentContentExtractor;
import com.atlassian.bonnie.search.extractor.ExtractorException;
import com.atlassian.confluence.extra.office.OfficeFile;
import com.atlassian.confluence.extra.officeconnector.index.util.AttachmentTextExtractorAdapter;
import com.atlassian.confluence.index.attachment.AttachmentTextExtractor;
import com.atlassian.confluence.pages.Attachment;
import com.atlassian.confluence.util.io.InputStreamSource;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

public class WordTextExtractor
extends BaseAttachmentContentExtractor
implements AttachmentTextExtractor {
    private static final String[] CONTENT_TYPES = OfficeFile.getMimeTypesFor((OfficeFile.Type)OfficeFile.Type.Word, (OfficeFile.Version)OfficeFile.Version.V97).toArray(new String[0]);
    private static final String[] EXTENSIONS = OfficeFile.getExtensionsFor((OfficeFile.Type)OfficeFile.Type.Word, (OfficeFile.Version)OfficeFile.Version.V97).toArray(new String[0]);

    public List<String> getFileExtensions() {
        return Arrays.asList(EXTENSIONS);
    }

    public List<String> getMimeTypes() {
        return Arrays.asList(CONTENT_TYPES);
    }

    public Optional<InputStreamSource> extract(Attachment attachment) {
        return new AttachmentTextExtractorAdapter((Extractor)this).apply(attachment);
    }

    protected String[] getMatchingContentTypes() {
        return CONTENT_TYPES;
    }

    protected String[] getMatchingFileExtensions() {
        return EXTENSIONS;
    }

    protected String extractText(InputStream inputStream, SearchableAttachment attachment) throws ExtractorException {
        try {
            if (attachment.getFileName().endsWith("doc")) {
                POIFSFileSystem fs = new POIFSFileSystem(inputStream);
                HWPFDocument doc = new HWPFDocument(fs);
                WordExtractor we = new WordExtractor(doc);
                return we.getTextFromPieces();
            }
            XWPFDocument doc = new XWPFDocument(inputStream);
            XWPFWordExtractor ex = new XWPFWordExtractor(doc);
            return ex.getText();
        }
        catch (Exception e) {
            throw new ExtractorException("Error reading content of Word binary document: " + e.getMessage(), (Throwable)e);
        }
    }
}

