/*
 * Decompiled with CFR 0.152.
 */
package gate.corpora;

import gate.Document;
import gate.DocumentFormat;
import gate.FeatureMap;
import gate.Resource;
import gate.corpora.DocumentImpl;
import gate.corpora.MimeType;
import gate.corpora.RepositioningInfo;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.AutoInstance;
import gate.creole.metadata.CreoleResource;
import gate.event.StatusListener;
import gate.util.DocumentFormatException;
import gate.xml.XmlDocumentHandler;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

@CreoleResource(name="Apache Tika Document Format", isPrivate=true, autoinstances={@AutoInstance(hidden=true)})
public class TikaFormat
extends DocumentFormat {
    private static final long serialVersionUID = 1L;
    private static final Logger log = Logger.getLogger(TikaFormat.class);

    @Override
    public Resource init() throws ResourceInstantiationException {
        super.init();
        this.setMimeType(new MimeType("application", "tika"));
        this.assignMime(this.getMimeType(), new String[0]);
        this.assignMime(new MimeType("application", "pdf"), "pdf");
        this.assignMime(new MimeType("application", "msword"), "doc");
        this.assignMime(new MimeType("application", "vnd.ms-powerpoint"), "ppt");
        this.assignMime(new MimeType("application", "vnd.ms-excel"), "xls");
        this.assignMime(new MimeType("application", "vnd.openxmlformats-officedocument.wordprocessingml.document"), "docx");
        this.assignMime(new MimeType("application", "vnd.openxmlformats-officedocument.presentationml.presentation"), "pptx");
        this.assignMime(new MimeType("application", "vnd.openxmlformats-officedocument.spreadsheetml.sheet"), "xlsx");
        this.assignMime(new MimeType("application", "vnd.oasis.opendocument.text"), "odt");
        this.assignMime(new MimeType("application", "vnd.oasis.opendocument.presentation"), "odp");
        this.assignMime(new MimeType("application", "vnd.oasis.opendocument.spreadsheet"), "ods");
        this.assignMime(new MimeType("application", "rtf"), "rtf");
        return this;
    }

    private void assignMime(MimeType mime, String ... exts) {
        String mimeString = mime.getType() + "/" + mime.getSubtype();
        mimeString2ClassHandlerMap.put(mimeString, this);
        mimeString2mimeTypeMap.put(mimeString, mime);
        for (String ext : exts) {
            suffixes2mimeTypeMap.put(ext, mime);
        }
    }

    @Override
    public Boolean supportsRepositioning() {
        return true;
    }

    @Override
    public void unpackMarkup(Document doc) throws DocumentFormatException {
        this.unpackMarkup(doc, null, null);
    }

    /*
     * Loose catch block
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    @Override
    public void unpackMarkup(Document doc, RepositioningInfo repInfo, RepositioningInfo ampCodingInfo) throws DocumentFormatException {
        if (doc == null || doc.getSourceUrl() == null) {
            throw new DocumentFormatException("GATE document is null or no content found. Nothing to parse!");
        }
        StatusListener statusListener = new StatusListener(){

            @Override
            public void statusChanged(String text) {
                TikaFormat.this.fireStatusChanged(text);
            }
        };
        XmlDocumentHandler ch = new XmlDocumentHandler(doc, this.markupElementsMap, this.element2StringMap);
        Metadata metadata = this.extractParserTips(doc);
        ch.addStatusListener(statusListener);
        ch.setRepositioningInfo(repInfo);
        ch.setAmpCodingInfo(ampCodingInfo);
        InputStream input = null;
        try {
            Parser tikaParser = new TikaConfig().getParser();
            input = doc.getSourceUrl().openStream();
            tikaParser.parse(input, (ContentHandler)ch, metadata, new ParseContext());
            this.setDocumentFeatures(metadata, doc);
        }
        catch (IOException e) {
            try {
                throw new DocumentFormatException(e);
                catch (SAXException e2) {
                    throw new DocumentFormatException(e2);
                }
                catch (TikaException e3) {
                    throw new DocumentFormatException((Exception)((Object)e3));
                }
            }
            catch (Throwable throwable) {
                IOUtils.closeQuietly(input);
                ch.removeStatusListener(statusListener);
                throw throwable;
            }
        }
        IOUtils.closeQuietly((InputStream)input);
        ch.removeStatusListener(statusListener);
        if (doc instanceof DocumentImpl) {
            ((DocumentImpl)doc).setNextAnnotationId(ch.getCustomObjectsId());
        }
    }

    private void setDocumentFeatures(Metadata metadata, Document doc) {
        FeatureMap fmap = doc.getFeatures();
        this.setTikaFeature(metadata, TikaCoreProperties.TITLE, fmap);
        this.setTikaFeature(metadata, Office.AUTHOR, fmap);
        this.setTikaFeature(metadata, TikaCoreProperties.COMMENTS, fmap);
        this.setTikaFeature(metadata, TikaCoreProperties.CREATOR, fmap);
        if (fmap.get("AUTHORS") == null && fmap.get("AUTHOR") != null) {
            fmap.put("AUTHORS", fmap.get(Office.AUTHOR));
        }
        fmap.put("MimeType", metadata.get("Content-Type"));
    }

    private void setTikaFeature(Metadata metadata, Property property, FeatureMap fmap) {
        String value = metadata.get(property);
        if (value == null) {
            return;
        }
        if ((value = value.trim()).length() == 0) {
            return;
        }
        String key = property.getName().toUpperCase();
        if (fmap.containsKey(key)) {
            fmap.put("TIKA_" + key, value);
        } else {
            fmap.put(key, value);
            fmap.put("TIKA_" + key, value);
        }
    }

    private Metadata extractParserTips(Document doc) {
        Metadata metadata = new Metadata();
        Object inputMime = doc.getFeatures().get("MimeType");
        if (inputMime instanceof String && !"application/tika".equals(inputMime)) {
            metadata.add("Content-Type", (String)doc.getFeatures().get("MimeType"));
        }
        if (doc instanceof DocumentImpl && ((DocumentImpl)doc).getMimeType() != null) {
            metadata.add("Content-Type", ((DocumentImpl)doc).getMimeType());
        }
        if (doc.getSourceUrl() != null && doc.getSourceUrl().getProtocol().startsWith("file")) {
            try {
                File fn = new File(doc.getSourceUrl().toURI());
                metadata.add("resourceName", fn.getName());
            }
            catch (URISyntaxException e) {
                log.debug((Object)("Could not extract filename from uri: " + doc.getSourceUrl()), (Throwable)e);
            }
            catch (IllegalArgumentException e) {
                log.debug((Object)("Could not extract filename from uri: " + doc.getSourceUrl()), (Throwable)e);
            }
        }
        return metadata;
    }
}

