/*
 * Decompiled with CFR 0.152.
 */
package gate.corpora;

import gate.AnnotationSet;
import gate.Document;
import gate.Resource;
import gate.corpora.Annotandum;
import gate.corpora.DocumentContentImpl;
import gate.corpora.MimeType;
import gate.corpora.TextualDocumentFormat;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.AutoInstance;
import gate.creole.metadata.CreoleResource;
import gate.util.DocumentFormatException;
import gate.util.InvalidOffsetException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

@CreoleResource(name="GATE CoNLL Document Format", isPrivate=true, autoinstances={@AutoInstance(hidden=true)})
public class ConllDocumentFormat
extends TextualDocumentFormat {
    private static final long serialVersionUID = 5756433194230855515L;
    public static final String ANNOTATION_COLUMN_FEATURE = "column";
    public static final String ANNOTATION_KIND_FEATURE = "kind";
    private static final boolean DEBUG = false;

    @Override
    public void unpackMarkup(Document doc) throws DocumentFormatException {
        if (doc == null || doc.getSourceUrl() == null && doc.getContent() == null) {
            throw new DocumentFormatException("GATE document is null or no content found. Nothing to parse!");
        }
        this.setNewLineProperty(doc);
        String[] lines = doc.getContent().toString().split("[\\n\\r]+");
        StringBuilder newContent = new StringBuilder();
        ArrayList<Annotandum> annotanda = new ArrayList<Annotandum>();
        HashMap<String, Annotandum> inProgress = new HashMap<String, Annotandum>();
        long oldEnd = 0L;
        long start = 0L;
        long end = 0L;
        for (String line : lines) {
            oldEnd = end;
            start = newContent.length();
            String[] items = line.split("\\s+");
            if (items.length == 0) {
                newContent.append("\n");
                end = newContent.length();
                this.finishAllTags(inProgress, annotanda, oldEnd);
                continue;
            }
            String token = items[0];
            newContent.append(token);
            end = newContent.length();
            newContent.append(' ');
            annotanda.add(Annotandum.makeToken(start, end, token));
            annotanda.add(Annotandum.makeSpaceToken(end));
            for (int column = 1; column < items.length; ++column) {
                String type;
                if (items[column].equals("O")) {
                    this.finishAllTags(inProgress, annotanda, oldEnd);
                    continue;
                }
                if (items[column].length() > 2 && items[column].startsWith("U-")) {
                    type = items[column].substring(2);
                    this.finishTag(type, inProgress, annotanda, oldEnd);
                    annotanda.add(new Annotandum(type, start, end, column, true));
                    continue;
                }
                if (items[column].length() > 2 && items[column].startsWith("L-")) {
                    type = items[column].substring(2);
                    if (inProgress.containsKey(type)) {
                        ((Annotandum)inProgress.get((Object)type)).endOffset = end;
                    } else {
                        inProgress.put(type, new Annotandum(type, start, end, column, true));
                    }
                    this.finishTag(type, inProgress, annotanda, end);
                    continue;
                }
                if (items[column].length() > 2 && items[column].startsWith("B-")) {
                    type = items[column].substring(2);
                    this.finishTag(type, inProgress, annotanda, oldEnd);
                    inProgress.put(type, new Annotandum(type, start, end, column, true));
                    continue;
                }
                if (items[column].length() > 2 && items[column].startsWith("I-")) {
                    type = items[column].substring(2);
                    if (inProgress.containsKey(type)) {
                        ((Annotandum)inProgress.get((Object)type)).endOffset = end;
                        continue;
                    }
                    inProgress.put(type, new Annotandum(type, start, end, column, true));
                    continue;
                }
                Annotandum tag = new Annotandum(items[column], start, end, column, false);
                annotanda.add(tag);
            }
        }
        this.finishAllTags(inProgress, annotanda, end);
        try {
            DocumentContentImpl newContentImpl = new DocumentContentImpl(newContent.toString());
            doc.edit(0L, doc.getContent().size(), newContentImpl);
            long newSize = doc.getContent().size();
            AnnotationSet originalMarkups = doc.getAnnotations("Original markups");
            for (Annotandum ann : annotanda) {
                originalMarkups.add(ann.startOffset, ann.endOffset, ann.type, ann.features);
            }
        }
        catch (InvalidOffsetException e) {
            throw new DocumentFormatException(e);
        }
    }

    private void finishAllTags(Map<String, Annotandum> annsUnderway, List<Annotandum> annsFinished, long cutoff) {
        for (Annotandum ann : annsUnderway.values()) {
            if (ann.endOffset == null) {
                ann.endOffset = cutoff;
            }
            annsFinished.add(ann);
        }
        annsUnderway.clear();
    }

    private void finishTag(String type, Map<String, Annotandum> annsUnderway, List<Annotandum> annsFinished, long cutoff) {
        Annotandum ann = annsUnderway.remove(type);
        if (ann != null) {
            if (ann.endOffset == null) {
                ann.endOffset = cutoff;
            }
            annsFinished.add(ann);
        }
    }

    @Override
    public Resource init() throws ResourceInstantiationException {
        MimeType mime = new MimeType("text", "x-conll");
        mimeString2ClassHandlerMap.put(mime.getType() + "/" + mime.getSubtype(), this);
        mimeString2mimeTypeMap.put(mime.getType() + "/" + mime.getSubtype(), mime);
        suffixes2mimeTypeMap.put("conll", mime);
        suffixes2mimeTypeMap.put("iob", mime);
        this.setMimeType(mime);
        return this;
    }
}

