public class BlueCasUtil extends Object
JCasesJCasUtil| Modifier and Type | Class and Description |
|---|---|
static class |
BlueCasUtil.Position |
| Constructor and Description |
|---|
BlueCasUtil() |
| Modifier and Type | Method and Description |
|---|---|
static ArrayList<org.apache.uima.jcas.JCas> |
asList(org.apache.uima.collection.CollectionReader cr) |
static <T extends org.apache.uima.jcas.cas.TOP> |
asList(Iterable<T> select)
(Potentially) Slow, but convenient
|
static int |
distance(org.apache.uima.jcas.tcas.Annotation a1,
org.apache.uima.jcas.tcas.Annotation a2) |
static <T> Collection<T> |
filterStrict(Iterable<T> raw,
Class<?> strictClass) |
static List<org.apache.uima.jcas.tcas.Annotation> |
findOverlapping(org.apache.uima.jcas.JCas jcas,
org.apache.uima.jcas.tcas.Annotation annotIn) |
static void |
fixNoSentences(org.apache.uima.jcas.JCas jCas)
If this cas has no Sentence annotation, creates one with the whole cas
text
|
static void |
fixNoText(org.apache.uima.jcas.JCas jCas)
If this cas has no text, set it to ""
|
static String |
getHeaderDocId(org.apache.uima.jcas.JCas jCas) |
static int |
getHeaderIntDocId(org.apache.uima.jcas.JCas jCas) |
static String |
getHeaderSource(org.apache.uima.jcas.JCas jCas) |
static String |
getSinglePosTag(Token t) |
static String |
getTitle(org.apache.uima.jcas.JCas jCas) |
static boolean |
haveSameBeginEnd(org.apache.uima.jcas.tcas.Annotation a,
org.apache.uima.jcas.tcas.Annotation b) |
static String |
inspect(org.apache.uima.jcas.tcas.Annotation a)
Util to print an annotation for inspection.Prints some text before and
after this annotation, and add '{', '}' around the annotation text.
|
static String |
inspect(org.apache.uima.jcas.JCas jcas,
int begin,
int end) |
static BlueCasUtil.Position |
isBefore(org.apache.uima.jcas.tcas.Annotation a1,
org.apache.uima.jcas.tcas.Annotation a2) |
static boolean |
isContained(org.apache.uima.jcas.tcas.Annotation a1,
org.apache.uima.jcas.tcas.Annotation a2) |
static boolean |
isDocAnnot(org.apache.uima.cas.text.AnnotationFS a) |
static boolean |
isEmptyText(org.apache.uima.jcas.JCas jCas) |
static Iterator<org.apache.uima.jcas.JCas> |
iterator(org.apache.uima.collection.CollectionReader cr) |
static boolean |
keepDoc(org.apache.uima.jcas.JCas jCas)
Whether this document should be kept for analysis, based on:
language == en
OOV < 0.4 (see
TooMuchOOVFilterAnnotator)
Enough tokens per page (see TooFewTokensFilterAnnotator)
|
static int |
maxEnd(org.apache.uima.jcas.tcas.Annotation a1,
org.apache.uima.jcas.tcas.Annotation a2) |
static int |
minBegin(org.apache.uima.jcas.tcas.Annotation a1,
org.apache.uima.jcas.tcas.Annotation a2) |
static List<org.apache.uima.jcas.tcas.Annotation> |
selectCovered(org.apache.uima.cas.CAS cas,
org.apache.uima.cas.text.AnnotationFS coveringAnnotation)
REM: modified from CasUtils..
|
static <T extends org.apache.uima.jcas.cas.TOP> |
selectStrict(org.apache.uima.jcas.JCas jcas,
Class<T> clasz,
Class<?> strictClass) |
static org.apache.uima.jcas.JCas |
setDocId(org.apache.uima.jcas.JCas jCas,
int docId) |
static List<String> |
toList(org.apache.uima.jcas.cas.StringArray tokens) |
public static <T extends org.apache.uima.jcas.cas.TOP> Collection<T> selectStrict(org.apache.uima.jcas.JCas jcas, Class<T> clasz, Class<?> strictClass)
jcas - clasz - strictClass - public static boolean isEmptyText(org.apache.uima.jcas.JCas jCas)
public static List<org.apache.uima.jcas.tcas.Annotation> findOverlapping(org.apache.uima.jcas.JCas jcas, org.apache.uima.jcas.tcas.Annotation annotIn)
public static <T> Collection<T> filterStrict(Iterable<T> raw, Class<?> strictClass)
public static int getHeaderIntDocId(org.apache.uima.jcas.JCas jCas)
Header.getDocId() for this jCas as an
int, if set; -1 otherwisepublic static String getHeaderDocId(org.apache.uima.jcas.JCas jCas)
Header.getDocId() for this jCas, if set;
null otherwisepublic static String getHeaderSource(org.apache.uima.jcas.JCas jCas)
Header.getSource() for this jCas, if
set; null otherwisepublic static void fixNoSentences(org.apache.uima.jcas.JCas jCas)
public static void fixNoText(org.apache.uima.jcas.JCas jCas)
public static Iterator<org.apache.uima.jcas.JCas> iterator(org.apache.uima.collection.CollectionReader cr)
public static ArrayList<org.apache.uima.jcas.JCas> asList(org.apache.uima.collection.CollectionReader cr)
public static String inspect(org.apache.uima.jcas.JCas jcas, int begin, int end)
public static String inspect(org.apache.uima.jcas.tcas.Annotation a)
public static org.apache.uima.jcas.JCas setDocId(org.apache.uima.jcas.JCas jCas,
int docId)
public static boolean haveSameBeginEnd(org.apache.uima.jcas.tcas.Annotation a,
org.apache.uima.jcas.tcas.Annotation b)
public static String getTitle(org.apache.uima.jcas.JCas jCas)
Header.getTitle() for this jCas, if set;
empty string otherwisepublic static List<org.apache.uima.jcas.tcas.Annotation> selectCovered(org.apache.uima.cas.CAS cas, org.apache.uima.cas.text.AnnotationFS coveringAnnotation)
Subiterator. Uses the same approach except that type priorities
are ignored.cas - a CAS.coveringAnnotation - the covering annotation.Subiteratorpublic static boolean keepDoc(org.apache.uima.jcas.JCas jCas)
TooMuchOOVFilterAnnotator)TooFewTokensFilterAnnotator)public static <T extends org.apache.uima.jcas.cas.TOP> List<T> asList(Iterable<T> select)
select - comes from JCasUtil select.public static boolean isDocAnnot(org.apache.uima.cas.text.AnnotationFS a)
public static int distance(org.apache.uima.jcas.tcas.Annotation a1,
org.apache.uima.jcas.tcas.Annotation a2)
annot1 - annot2 - public static int maxEnd(org.apache.uima.jcas.tcas.Annotation a1,
org.apache.uima.jcas.tcas.Annotation a2)
public static int minBegin(org.apache.uima.jcas.tcas.Annotation a1,
org.apache.uima.jcas.tcas.Annotation a2)
public static BlueCasUtil.Position isBefore(org.apache.uima.jcas.tcas.Annotation a1, org.apache.uima.jcas.tcas.Annotation a2)
annot1 - annot2 - public static boolean isContained(org.apache.uima.jcas.tcas.Annotation a1,
org.apache.uima.jcas.tcas.Annotation a2)
annot1 - annot2 - Copyright © 2015 Bluebrain Project. All rights reserved.