/*
 * Decompiled with CFR 0.152.
 */
package org.apache.stanbol.enhancer.nlp.utils;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.EnumMap;
import java.util.Map;
import org.apache.clerezza.commons.rdf.BlankNodeOrIRI;
import org.apache.clerezza.commons.rdf.Graph;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.clerezza.commons.rdf.Language;
import org.apache.clerezza.commons.rdf.RDFTerm;
import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl;
import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.commons.io.IOUtils;
import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.Span;
import org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum;
import org.apache.stanbol.enhancer.nlp.model.annotation.Annotated;
import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
import org.apache.stanbol.enhancer.nlp.nif.SsoOntology;
import org.apache.stanbol.enhancer.nlp.nif.StringOntology;
import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
import org.apache.stanbol.enhancer.nlp.pos.Pos;
import org.apache.stanbol.enhancer.nlp.pos.PosTag;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;

public final class NIFHelper {
    private static final LiteralFactory lf = LiteralFactory.getInstance();
    public static final Map<SpanTypeEnum, IRI> SPAN_TYPE_TO_SSO_TYPE;
    public static final Map<LexicalCategory, IRI> LEXICAL_TYPE_TO_PHRASE_TYPE;
    public static final int NIF_HASH_CONTEXT_LENGTH = 10;
    public static final int NIF_HASH_MAX_STRING_LENGTH = 20;
    public static final Charset UTF8;

    private NIFHelper() {
    }

    public static final IRI getNifFragmentURI(IRI base, int start, int end) {
        if (base == null) {
            throw new IllegalArgumentException("Base URI MUST NOT be NULL!");
        }
        StringBuilder sb = new StringBuilder(base.getUnicodeString());
        sb.append("#char=");
        sb.append(start >= 0 ? start : 0).append(',');
        if (end >= 0) {
            if (end < start) {
                throw new IllegalArgumentException("End index '" + end + "' < start '" + start + "'!");
            }
            sb.append(end);
        }
        return new IRI(sb.toString());
    }

    public static final IRI getNifOffsetURI(IRI base, int start, int end) {
        if (base == null) {
            throw new IllegalArgumentException("Base URI MUST NOT be NULL!");
        }
        StringBuilder sb = new StringBuilder(base.getUnicodeString());
        sb.append("#offset_");
        sb.append(start >= 0 ? start : 0).append('_');
        if (end >= 0) {
            if (end < start) {
                throw new IllegalArgumentException("End index '" + end + "' < start '" + start + "'!");
            }
            sb.append(end);
        }
        return new IRI(sb.toString());
    }

    public static final IRI getNifHashURI(IRI base, int start, int end, String text) {
        if (base == null) {
            throw new IllegalArgumentException("Base URI MUST NOT be NULL!");
        }
        start = start < 0 ? 0 : start;
        int n = end = end < 0 ? start : end;
        if (end < start) {
            throw new IllegalArgumentException("End index '" + end + "' < start '" + start + "'!");
        }
        if (end >= text.length()) {
            throw new IllegalArgumentException("The End index '" + end + "' exeeds the " + "length of the text '" + text.length() + "'!");
        }
        int contextStart = Math.max(0, start - 10);
        int contextEnd = Math.min(text.length(), end + 10);
        StringBuilder sb = new StringBuilder(base.getUnicodeString());
        sb.append("#hash_");
        sb.append(10);
        sb.append('_');
        sb.append(end - start);
        sb.append('_');
        sb.append(NIFHelper.getContextDigest(text, contextStart, start, end, contextEnd));
        sb.append('_');
        sb.append(text.substring(start, Math.min(end, start + 20)));
        return new IRI(sb.toString());
    }

    private static String getContextDigest(String text, int contextStart, int start, int end, int contextEnd) {
        ByteArrayOutputStream contextOs = new ByteArrayOutputStream();
        OutputStreamWriter contextWriter = new OutputStreamWriter((OutputStream)contextOs, UTF8);
        try {
            if (contextStart < start) {
                ((Writer)contextWriter).append(text, contextStart, start);
            }
            contextWriter.append('(');
            if (start < end) {
                ((Writer)contextWriter).append(text, start, end);
            }
            contextWriter.append(')');
            if (end < contextEnd) {
                ((Writer)contextWriter).append(text, end, contextEnd);
            }
            ((Writer)contextWriter).flush();
            String string = ContentItemHelper.streamDigest((InputStream)new ByteArrayInputStream(contextOs.toByteArray()), null, (String)"MD5");
            return string;
        }
        catch (IOException e) {
            throw new IllegalStateException(e);
        }
        finally {
            IOUtils.closeQuietly((OutputStream)contextOs);
        }
    }

    public static IRI writeSpan(Graph graph, IRI base, AnalysedText text, Language language, Span span) {
        IRI segment = NIFHelper.getNifOffsetURI(base, span.getStart(), span.getEnd());
        graph.add((Object)new TripleImpl((BlankNodeOrIRI)segment, Properties.RDF_TYPE, (RDFTerm)StringOntology.OffsetBasedString.getUri()));
        graph.add((Object)new TripleImpl((BlankNodeOrIRI)segment, StringOntology.anchorOf.getUri(), (RDFTerm)new PlainLiteralImpl(span.getSpan(), language)));
        graph.add((Object)new TripleImpl((BlankNodeOrIRI)segment, StringOntology.beginIndex.getUri(), (RDFTerm)lf.createTypedLiteral((Object)span.getStart())));
        graph.add((Object)new TripleImpl((BlankNodeOrIRI)segment, StringOntology.endIndex.getUri(), (RDFTerm)lf.createTypedLiteral((Object)span.getEnd())));
        switch (span.getType()) {
            case Token: {
                graph.add((Object)new TripleImpl((BlankNodeOrIRI)segment, Properties.RDF_TYPE, (RDFTerm)SsoOntology.Word.getUri()));
                break;
            }
            case Chunk: {
                graph.add((Object)new TripleImpl((BlankNodeOrIRI)segment, Properties.RDF_TYPE, (RDFTerm)SsoOntology.Phrase.getUri()));
                break;
            }
            case Sentence: {
                graph.add((Object)new TripleImpl((BlankNodeOrIRI)segment, Properties.RDF_TYPE, (RDFTerm)SsoOntology.Sentence.getUri()));
            }
        }
        return segment;
    }

    public static void writePos(Graph graph, Annotated annotated, IRI segmentUri) {
        Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION);
        if (posTag != null) {
            if (posTag.value().isMapped()) {
                for (Pos pos : posTag.value().getPos()) {
                    graph.add((Object)new TripleImpl((BlankNodeOrIRI)segmentUri, SsoOntology.oliaLink.getUri(), (RDFTerm)pos.getUri()));
                }
                for (LexicalCategory cat : posTag.value().getCategories()) {
                    graph.add((Object)new TripleImpl((BlankNodeOrIRI)segmentUri, SsoOntology.oliaLink.getUri(), (RDFTerm)cat.getUri()));
                }
            }
            graph.add((Object)new TripleImpl((BlankNodeOrIRI)segmentUri, SsoOntology.posTag.getUri(), (RDFTerm)lf.createTypedLiteral((Object)posTag.value().getTag())));
            graph.add((Object)new TripleImpl((BlankNodeOrIRI)segmentUri, Properties.ENHANCER_CONFIDENCE, (RDFTerm)lf.createTypedLiteral((Object)posTag.probability())));
        }
    }

    public static void writePhrase(Graph graph, Annotated annotated, IRI segmentUri) {
        IRI phraseTypeUri;
        Value<PhraseTag> phraseTag = annotated.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION);
        if (phraseTag != null && (phraseTypeUri = LEXICAL_TYPE_TO_PHRASE_TYPE.get((Object)phraseTag.value().getCategory())) != null) {
            graph.add((Object)new TripleImpl((BlankNodeOrIRI)segmentUri, SsoOntology.oliaLink.getUri(), (RDFTerm)phraseTypeUri));
            graph.add((Object)new TripleImpl((BlankNodeOrIRI)segmentUri, Properties.ENHANCER_CONFIDENCE, (RDFTerm)lf.createTypedLiteral((Object)phraseTag.probability())));
        }
    }

    static {
        EnumMap<SpanTypeEnum, IRI> mapping = new EnumMap<SpanTypeEnum, IRI>(SpanTypeEnum.class);
        mapping.put(SpanTypeEnum.Sentence, SsoOntology.Sentence.getUri());
        mapping.put(SpanTypeEnum.Chunk, SsoOntology.Phrase.getUri());
        mapping.put(SpanTypeEnum.Token, SsoOntology.Word.getUri());
        SPAN_TYPE_TO_SSO_TYPE = Collections.unmodifiableMap(mapping);
        String olia = "http://purl.org/olia/olia.owl#";
        EnumMap<LexicalCategory, IRI> mapping2 = new EnumMap<LexicalCategory, IRI>(LexicalCategory.class);
        mapping2.put(LexicalCategory.Noun, new IRI(olia + "NounPhrase"));
        mapping2.put(LexicalCategory.Verb, new IRI(olia + "VerbPhrase"));
        mapping2.put(LexicalCategory.Adjective, new IRI(olia + "AdjectivePhrase"));
        mapping2.put(LexicalCategory.Adverb, new IRI(olia + "AdverbPhrase"));
        mapping2.put(LexicalCategory.Conjuction, new IRI(olia + "ConjuctionPhrase"));
        LEXICAL_TYPE_TO_PHRASE_TYPE = Collections.unmodifiableMap(mapping2);
        UTF8 = Charset.forName("UTF8");
    }
}

