001package org.hl7.fhir.r4.elementmodel; 002 003/*- 004 * #%L 005 * org.hl7.fhir.r4 006 * %% 007 * Copyright (C) 2014 - 2019 Health Level 7 008 * %% 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 * #L% 021 */ 022 023 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.OutputStream; 027import java.util.HashSet; 028import java.util.List; 029import java.util.Set; 030 031import org.hl7.fhir.exceptions.FHIRException; 032import org.hl7.fhir.exceptions.FHIRFormatError; 033import org.hl7.fhir.r4.context.IWorkerContext; 034import org.hl7.fhir.r4.elementmodel.Element.SpecialElement; 035import org.hl7.fhir.r4.formats.IParser.OutputStyle; 036import org.hl7.fhir.r4.model.ElementDefinition.TypeRefComponent; 037import org.hl7.fhir.r4.model.StructureDefinition; 038import org.hl7.fhir.r4.utils.SnomedExpressions; 039import org.hl7.fhir.r4.utils.SnomedExpressions.Expression; 040import org.hl7.fhir.r4.utils.formats.Turtle; 041import org.hl7.fhir.r4.utils.formats.Turtle.Complex; 042import org.hl7.fhir.r4.utils.formats.Turtle.Section; 043import org.hl7.fhir.r4.utils.formats.Turtle.Subject; 044import org.hl7.fhir.r4.utils.formats.Turtle.TTLComplex; 045import org.hl7.fhir.r4.utils.formats.Turtle.TTLList; 046import org.hl7.fhir.r4.utils.formats.Turtle.TTLLiteral; 047import org.hl7.fhir.r4.utils.formats.Turtle.TTLObject; 048import org.hl7.fhir.r4.utils.formats.Turtle.TTLURL; 049import org.hl7.fhir.utilities.TextFile; 050import org.hl7.fhir.utilities.Utilities; 051import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity; 052import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType; 053 054 055public class TurtleParser extends ParserBase { 056 057 private String base; 058 059 public static String FHIR_URI_BASE = "http://hl7.org/fhir/"; 060 public static String FHIR_VERSION_BASE = "http://build.fhir.org/"; 061 062 public TurtleParser(IWorkerContext context) { 063 super(context); 064 } 065 @Override 066 public Element parse(InputStream input) throws IOException, FHIRException { 067 Turtle src = new Turtle(); 068 if (policy == ValidationPolicy.EVERYTHING) { 069 try { 070 src.parse(TextFile.streamToString(input)); 071 } catch (Exception e) { 072 logError(-1, -1, "(document)", IssueType.INVALID, "Error parsing Turtle: "+e.getMessage(), IssueSeverity.FATAL); 073 return null; 074 } 075 return parse(src); 076 } else { 077 src.parse(TextFile.streamToString(input)); 078 return parse(src); 079 } 080 } 081 082 private Element parse(Turtle src) throws FHIRException { 083 // we actually ignore the stated URL here 084 for (TTLComplex cmp : src.getObjects().values()) { 085 for (String p : cmp.getPredicates().keySet()) { 086 if ((FHIR_URI_BASE + "nodeRole").equals(p) && cmp.getPredicates().get(p).hasValue(FHIR_URI_BASE + "treeRoot")) { 087 return parse(src, cmp); 088 } 089 } 090 } 091 // still here: well, we didn't find a start point 092 String msg = "Error parsing Turtle: unable to find any node maked as the entry point (where " + FHIR_URI_BASE + "nodeRole = " + FHIR_URI_BASE + "treeRoot)"; 093 if (policy == ValidationPolicy.EVERYTHING) { 094 logError(-1, -1, "(document)", IssueType.INVALID, msg, IssueSeverity.FATAL); 095 return null; 096 } else { 097 throw new FHIRFormatError(msg); 098 } 099 } 100 101 private Element parse(Turtle src, TTLComplex cmp) throws FHIRException { 102 TTLObject type = cmp.getPredicates().get("http://www.w3.org/2000/01/rdf-schema#type"); 103 if (type == null) { 104 logError(cmp.getLine(), cmp.getCol(), "(document)", IssueType.INVALID, "Unknown resource type (missing rdfs:type)", IssueSeverity.FATAL); 105 return null; 106 } 107 if (type instanceof TTLList) { 108 // this is actually broken - really we have to look through the structure definitions at this point 109 for (TTLObject obj : ((TTLList) type).getList()) { 110 if (obj instanceof TTLURL && ((TTLURL) obj).getUri().startsWith(FHIR_URI_BASE)) { 111 type = obj; 112 break; 113 } 114 } 115 } 116 if (!(type instanceof TTLURL)) { 117 logError(cmp.getLine(), cmp.getCol(), "(document)", IssueType.INVALID, "Unexpected datatype for rdfs:type)", IssueSeverity.FATAL); 118 return null; 119 } 120 String name = ((TTLURL) type).getUri(); 121 String ns = name.substring(0, name.lastIndexOf("/")); 122 name = name.substring(name.lastIndexOf("/")+1); 123 String path = "/"+name; 124 125 StructureDefinition sd = getDefinition(cmp.getLine(), cmp.getCol(), ns, name); 126 if (sd == null) 127 return null; 128 129 Element result = new Element(name, new Property(context, sd.getSnapshot().getElement().get(0), sd)); 130 result.markLocation(cmp.getLine(), cmp.getCol()); 131 result.setType(name); 132 parseChildren(src, path, cmp, result, false); 133 result.numberChildren(); 134 return result; 135 } 136 137 private void parseChildren(Turtle src, String path, TTLComplex object, Element context, boolean primitive) throws FHIRException { 138 139 List<Property> properties = context.getProperty().getChildProperties(context.getName(), null); 140 Set<String> processed = new HashSet<String>(); 141 if (primitive) 142 processed.add(FHIR_URI_BASE + "value"); 143 144 // note that we do not trouble ourselves to maintain the wire format order here - we don't even know what it was anyway 145 // first pass: process the properties 146 for (Property property : properties) { 147 if (property.isChoice()) { 148 for (TypeRefComponent type : property.getDefinition().getType()) { 149 String eName = property.getName().substring(0, property.getName().length()-3) + Utilities.capitalize(type.getCode()); 150 parseChild(src, object, context, processed, property, path, getFormalName(property, eName)); 151 } 152 } else { 153 parseChild(src, object, context, processed, property, path, getFormalName(property)); 154 } 155 } 156 157 // second pass: check for things not processed 158 if (policy != ValidationPolicy.NONE) { 159 for (String u : object.getPredicates().keySet()) { 160 if (!processed.contains(u)) { 161 TTLObject n = object.getPredicates().get(u); 162 logError(n.getLine(), n.getCol(), path, IssueType.STRUCTURE, "Unrecognised predicate '"+u+"'", IssueSeverity.ERROR); 163 } 164 } 165 } 166 } 167 168 private void parseChild(Turtle src, TTLComplex object, Element context, Set<String> processed, Property property, String path, String name) throws FHIRException { 169 processed.add(name); 170 String npath = path+"/"+property.getName(); 171 TTLObject e = object.getPredicates().get(FHIR_URI_BASE + name); 172 if (e == null) 173 return; 174 if (property.isList() && (e instanceof TTLList)) { 175 TTLList arr = (TTLList) e; 176 for (TTLObject am : arr.getList()) { 177 parseChildInstance(src, npath, object, context, property, name, am); 178 } 179 } else { 180 parseChildInstance(src, npath, object, context, property, name, e); 181 } 182 } 183 184 private void parseChildInstance(Turtle src, String npath, TTLComplex object, Element context, Property property, String name, TTLObject e) throws FHIRException { 185 if (property.isResource()) 186 parseResource(src, npath, object, context, property, name, e); 187 else if (e instanceof TTLComplex) { 188 TTLComplex child = (TTLComplex) e; 189 Element n = new Element(tail(name), property).markLocation(e.getLine(), e.getCol()); 190 context.getChildren().add(n); 191 if (property.isPrimitive(property.getType(tail(name)))) { 192 parseChildren(src, npath, child, n, true); 193 TTLObject val = child.getPredicates().get(FHIR_URI_BASE + "value"); 194 if (val != null) { 195 if (val instanceof TTLLiteral) { 196 String value = ((TTLLiteral) val).getValue(); 197 String type = ((TTLLiteral) val).getType(); 198 // todo: check type 199 n.setValue(value); 200 } else 201 logError(object.getLine(), object.getCol(), npath, IssueType.INVALID, "This property must be a Literal, not a "+e.getClass().getName(), IssueSeverity.ERROR); 202 } 203 } else 204 parseChildren(src, npath, child, n, false); 205 206 } else 207 logError(object.getLine(), object.getCol(), npath, IssueType.INVALID, "This property must be a URI or bnode, not a "+e.getClass().getName(), IssueSeverity.ERROR); 208 } 209 210 211 private String tail(String name) { 212 return name.substring(name.lastIndexOf(".")+1); 213 } 214 215 private void parseResource(Turtle src, String npath, TTLComplex object, Element context, Property property, String name, TTLObject e) throws FHIRException { 216 TTLComplex obj; 217 if (e instanceof TTLComplex) 218 obj = (TTLComplex) e; 219 else if (e instanceof TTLURL) { 220 String url = ((TTLURL) e).getUri(); 221 obj = src.getObject(url); 222 if (obj == null) { 223 logError(e.getLine(), e.getCol(), npath, IssueType.INVALID, "reference to "+url+" cannot be resolved", IssueSeverity.FATAL); 224 return; 225 } 226 } else 227 throw new FHIRFormatError("Wrong type for resource"); 228 229 TTLObject type = obj.getPredicates().get("http://www.w3.org/2000/01/rdf-schema#type"); 230 if (type == null) { 231 logError(object.getLine(), object.getCol(), npath, IssueType.INVALID, "Unknown resource type (missing rdfs:type)", IssueSeverity.FATAL); 232 return; 233 } 234 if (type instanceof TTLList) { 235 // this is actually broken - really we have to look through the structure definitions at this point 236 for (TTLObject tobj : ((TTLList) type).getList()) { 237 if (tobj instanceof TTLURL && ((TTLURL) tobj).getUri().startsWith(FHIR_URI_BASE)) { 238 type = tobj; 239 break; 240 } 241 } 242 } 243 if (!(type instanceof TTLURL)) { 244 logError(object.getLine(), object.getCol(), npath, IssueType.INVALID, "Unexpected datatype for rdfs:type)", IssueSeverity.FATAL); 245 return; 246 } 247 String rt = ((TTLURL) type).getUri(); 248 String ns = rt.substring(0, rt.lastIndexOf("/")); 249 rt = rt.substring(rt.lastIndexOf("/")+1); 250 251 StructureDefinition sd = getDefinition(object.getLine(), object.getCol(), ns, rt); 252 if (sd == null) 253 return; 254 255 Element n = new Element(tail(name), property).markLocation(object.getLine(), object.getCol()); 256 context.getChildren().add(n); 257 n.updateProperty(new Property(this.context, sd.getSnapshot().getElement().get(0), sd), SpecialElement.fromProperty(n.getProperty()), property); 258 n.setType(rt); 259 parseChildren(src, npath, obj, n, false); 260 } 261 262 private String getFormalName(Property property) { 263 String en = property.getDefinition().getBase().getPath(); 264 if (en == null) 265 en = property.getDefinition().getPath(); 266// boolean doType = false; 267// if (en.endsWith("[x]")) { 268// en = en.substring(0, en.length()-3); 269// doType = true; 270// } 271// if (doType || (element.getProperty().getDefinition().getType().size() > 1 && !allReference(element.getProperty().getDefinition().getType()))) 272// en = en + Utilities.capitalize(element.getType()); 273 return en; 274 } 275 276 private String getFormalName(Property property, String elementName) { 277 String en = property.getDefinition().getBase().getPath(); 278 if (en == null) 279 en = property.getDefinition().getPath(); 280 if (!en.endsWith("[x]")) 281 throw new Error("Attempt to replace element name for a non-choice type"); 282 return en.substring(0, en.lastIndexOf(".")+1)+elementName; 283 } 284 285 286 @Override 287 public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException, FHIRException { 288 this.base = base; 289 290 Turtle ttl = new Turtle(); 291 compose(e, ttl, base); 292 ttl.commit(stream, false); 293 } 294 295 296 297 public void compose(Element e, Turtle ttl, String base) throws FHIRException { 298 ttl.prefix("fhir", FHIR_URI_BASE); 299 ttl.prefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); 300 ttl.prefix("owl", "http://www.w3.org/2002/07/owl#"); 301 ttl.prefix("xsd", "http://www.w3.org/2001/XMLSchema#"); 302 303 304 Section section = ttl.section("resource"); 305 String subjId = genSubjectId(e); 306 307 String ontologyId = subjId.replace(">", ".ttl>"); 308 Section ontology = ttl.section("ontology header"); 309 ontology.triple(ontologyId, "a", "owl:Ontology"); 310 ontology.triple(ontologyId, "owl:imports", "fhir:fhir.ttl"); 311 if(ontologyId.startsWith("<" + FHIR_URI_BASE)) 312 ontology.triple(ontologyId, "owl:versionIRI", ontologyId.replace(FHIR_URI_BASE, FHIR_VERSION_BASE)); 313 314 Subject subject = section.triple(subjId, "a", "fhir:" + e.getType()); 315 subject.linkedPredicate("fhir:nodeRole", "fhir:treeRoot", linkResolver == null ? null : linkResolver.resolvePage("rdf.html#tree-root")); 316 317 for (Element child : e.getChildren()) { 318 composeElement(section, subject, child, null); 319 } 320 321 } 322 323 protected String getURIType(String uri) { 324 if(uri.startsWith("<" + FHIR_URI_BASE)) 325 if(uri.substring(FHIR_URI_BASE.length() + 1).contains("/")) 326 return uri.substring(FHIR_URI_BASE.length() + 1, uri.indexOf('/', FHIR_URI_BASE.length() + 1)); 327 return null; 328 } 329 330 protected String getReferenceURI(String ref) { 331 if (ref != null && (ref.startsWith("http://") || ref.startsWith("https://"))) 332 return "<" + ref + ">"; 333 else if (base != null && ref != null && ref.contains("/")) 334 return "<" + Utilities.appendForwardSlash(base) + ref + ">"; 335 else 336 return null; 337 } 338 339 protected void decorateReference(Complex t, Element coding) { 340 String refURI = getReferenceURI(coding.getChildValue("reference")); 341 if(refURI != null) 342 t.linkedPredicate("fhir:link", refURI, linkResolver == null ? null : linkResolver.resolvePage("rdf.html#reference")); 343 } 344 345 protected void decorateCanonical(Complex t, Element canonical) { 346 String refURI = getReferenceURI(canonical.primitiveValue()); 347 if(refURI != null) 348 t.linkedPredicate("fhir:link", refURI, linkResolver == null ? null : linkResolver.resolvePage("rdf.html#reference")); 349 } 350 351 private String genSubjectId(Element e) { 352 String id = e.getChildValue("id"); 353 if (base == null || id == null) 354 return ""; 355 else if (base.endsWith("#")) 356 return "<" + base + e.getType() + "-" + id + ">"; 357 else 358 return "<" + Utilities.pathURL(base, e.getType(), id) + ">"; 359 } 360 361 private String urlescape(String s) { 362 StringBuilder b = new StringBuilder(); 363 for (char ch : s.toCharArray()) { 364 if (Utilities.charInSet(ch, ':', ';', '=', ',')) 365 b.append("%"+Integer.toHexString(ch)); 366 else 367 b.append(ch); 368 } 369 return b.toString(); 370 } 371 372 private void composeElement(Section section, Complex ctxt, Element element, Element parent) throws FHIRException { 373// "Extension".equals(element.getType())? 374// (element.getProperty().getDefinition().getIsModifier()? "modifierExtension" : "extension") ; 375 String en = getFormalName(element); 376 377 Complex t; 378 if (element.getSpecial() == SpecialElement.BUNDLE_ENTRY && parent != null && parent.getNamedChildValue("fullUrl") != null) { 379 String url = "<"+parent.getNamedChildValue("fullUrl")+">"; 380 ctxt.linkedPredicate("fhir:"+en, url, linkResolver == null ? null : linkResolver.resolveProperty(element.getProperty())); 381 t = section.subject(url); 382 } else { 383 t = ctxt.linkedPredicate("fhir:"+en, linkResolver == null ? null : linkResolver.resolveProperty(element.getProperty())); 384 } 385 if (element.getSpecial() != null) 386 t.linkedPredicate("a", "fhir:"+element.fhirType(), linkResolver == null ? null : linkResolver.resolveType(element.fhirType())); 387 if (element.hasValue()) 388 t.linkedPredicate("fhir:value", ttlLiteral(element.getValue(), element.getType()), linkResolver == null ? null : linkResolver.resolveType(element.getType())); 389 if (element.getProperty().isList() && (!element.isResource() || element.getSpecial() == SpecialElement.CONTAINED)) 390 t.linkedPredicate("fhir:index", Integer.toString(element.getIndex()), linkResolver == null ? null : linkResolver.resolvePage("rdf.html#index")); 391 392 if ("Coding".equals(element.getType())) 393 decorateCoding(t, element, section); 394 if (Utilities.existsInList(element.getType(), "Reference")) 395 decorateReference(t, element); 396 else if (Utilities.existsInList(element.getType(), "canonical")) 397 decorateCanonical(t, element); 398 399 if("canonical".equals(element.getType())) { 400 String refURI = element.primitiveValue(); 401 if (refURI != null) { 402 String uriType = getURIType(refURI); 403 if(uriType != null && !section.hasSubject(refURI)) 404 section.triple(refURI, "a", "fhir:" + uriType); 405 } 406 } 407 408 if("Reference".equals(element.getType())) { 409 String refURI = getReferenceURI(element.getChildValue("reference")); 410 if (refURI != null) { 411 String uriType = getURIType(refURI); 412 if(uriType != null && !section.hasSubject(refURI)) 413 section.triple(refURI, "a", "fhir:" + uriType); 414 } 415 } 416 417 for (Element child : element.getChildren()) { 418 if ("xhtml".equals(child.getType())) { 419 String childfn = getFormalName(child); 420 t.predicate("fhir:" + childfn, ttlLiteral(child.getValue(), child.getType())); 421 } else 422 composeElement(section, t, child, element); 423 } 424 } 425 426 private String getFormalName(Element element) { 427 String en = null; 428 if (element.getSpecial() == null) { 429 if (element.getProperty().getDefinition().hasBase()) 430 en = element.getProperty().getDefinition().getBase().getPath(); 431 } 432 else if (element.getSpecial() == SpecialElement.BUNDLE_ENTRY) 433 en = "Bundle.entry.resource"; 434 else if (element.getSpecial() == SpecialElement.BUNDLE_OUTCOME) 435 en = "Bundle.entry.response.outcome"; 436 else if (element.getSpecial() == SpecialElement.PARAMETER) 437 en = element.getElementProperty().getDefinition().getPath(); 438 else // CONTAINED 439 en = "DomainResource.contained"; 440 441 if (en == null) 442 en = element.getProperty().getDefinition().getPath(); 443 boolean doType = false; 444 if (en.endsWith("[x]")) { 445 en = en.substring(0, en.length()-3); 446 doType = true; 447 } 448 if (doType || (element.getProperty().getDefinition().getType().size() > 1 && !allReference(element.getProperty().getDefinition().getType()))) 449 en = en + Utilities.capitalize(element.getType()); 450 return en; 451 } 452 453 private boolean allReference(List<TypeRefComponent> types) { 454 for (TypeRefComponent t : types) { 455 if (!t.getCode().equals("Reference")) 456 return false; 457 } 458 return true; 459 } 460 461 static public String ttlLiteral(String value, String type) { 462 String xst = ""; 463 if (type.equals("boolean")) 464 xst = "^^xsd:boolean"; 465 else if (type.equals("integer")) 466 xst = "^^xsd:integer"; 467 else if (type.equals("unsignedInt")) 468 xst = "^^xsd:nonNegativeInteger"; 469 else if (type.equals("positiveInt")) 470 xst = "^^xsd:positiveInteger"; 471 else if (type.equals("decimal")) 472 xst = "^^xsd:decimal"; 473 else if (type.equals("base64Binary")) 474 xst = "^^xsd:base64Binary"; 475 else if (type.equals("instant")) 476 xst = "^^xsd:dateTime"; 477 else if (type.equals("time")) 478 xst = "^^xsd:time"; 479 else if (type.equals("date") || type.equals("dateTime") ) { 480 String v = value; 481 if (v.length() > 10) { 482 int i = value.substring(10).indexOf("-"); 483 if (i == -1) 484 i = value.substring(10).indexOf("+"); 485 v = i == -1 ? value : value.substring(0, 10+i); 486 } 487 if (v.length() > 10) 488 xst = "^^xsd:dateTime"; 489 else if (v.length() == 10) 490 xst = "^^xsd:date"; 491 else if (v.length() == 7) 492 xst = "^^xsd:gYearMonth"; 493 else if (v.length() == 4) 494 xst = "^^xsd:gYear"; 495 } 496 497 return "\"" +Turtle.escape(value, true) + "\""+xst; 498 } 499 500 protected void decorateCoding(Complex t, Element coding, Section section) throws FHIRException { 501 String system = coding.getChildValue("system"); 502 String code = coding.getChildValue("code"); 503 504 if (system == null || code == null) 505 return; 506 if ("http://snomed.info/sct".equals(system)) { 507 t.prefix("sct", "http://snomed.info/id/"); 508 if (code.contains(":") || code.contains("=")) 509 generateLinkedPredicate(t, code); 510 else 511 t.linkedPredicate("a", "sct:" + urlescape(code), null); 512 } else if ("http://loinc.org".equals(system)) { 513 t.prefix("loinc", "http://loinc.org/rdf#"); 514 t.linkedPredicate("a", "loinc:"+urlescape(code).toUpperCase(), null); 515 } 516 } 517 private void generateLinkedPredicate(Complex t, String code) throws FHIRException { 518 Expression expression = SnomedExpressions.parse(code); 519 520 } 521 522 523// 128045006|cellulitis (disorder)|:{363698007|finding site|=56459004|foot structure|} 524// Grahame Grieve: or 525// 526// 64572001|disease|:{116676008|associated morphology|=72704001|fracture|,363698007|finding site|=(12611008|bone structure of tibia|:272741003|laterality|=7771000|left|)} 527// Harold Solbrig: 528// a sct:128045006, 529// rdfs:subClassOf [ 530// a owl:Restriction; 531// owl:onProperty sct:609096000 ; 532// owl:someValuesFrom [ 533// a owl:Restriction; 534// owl:onProperty sct:363698007 ; 535// owl:someValuesFrom sct:56459004 ] ] ; 536// and 537// 538// a sct:64572001, 539// rdfs:subclassOf [ 540// a owl:Restriction ; 541// owl:onProperty sct:60909600 ; 542// owl:someValuesFrom [ 543// a owl:Class ; 544// owl:intersectionOf ( [ 545// a owl:Restriction; 546// owl:onProperty sct:116676008; 547// owl:someValuesFrom sct:72704001 ] 548// [ a owl:Restriction; 549// owl:onProperty sct:363698007 550// owl:someValuesFrom [ 551// a owl:Class ; 552// owl:intersectionOf( 553// sct:12611008 554// owl:someValuesFrom [ 555// a owl:Restriction; 556// owl:onProperty sct:272741003; 557// owl:someValuesFrom sct:7771000 558// ] ) ] ] ) ] ] 559// (an approximation -- I'll have to feed it into a translator to be sure I've got it 100% right) 560// 561 562}