001package org.hl7.fhir.r4.utils.formats; 002 003/*- 004 * #%L 005 * org.hl7.fhir.r4 006 * %% 007 * Copyright (C) 2014 - 2019 Health Level 7 008 * %% 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 * #L% 021 */ 022 023 024import java.io.IOException; 025import java.io.OutputStream; 026import java.io.OutputStreamWriter; 027import java.io.UnsupportedEncodingException; 028import java.util.ArrayList; 029import java.util.Collections; 030import java.util.HashMap; 031import java.util.HashSet; 032import java.util.List; 033import java.util.Map; 034import java.util.Set; 035import java.util.UUID; 036 037import org.hl7.fhir.exceptions.FHIRFormatError; 038import org.hl7.fhir.utilities.Utilities; 039 040public class Turtle { 041 042 public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE"; 043 044 public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+"; 045 public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?"; 046 047 // Object model 048 public abstract class Triple { 049 private String uri; 050 } 051 052 public class StringType extends Triple { 053 private String value; 054 055 public StringType(String value) { 056 super(); 057 this.value = value; 058 } 059 } 060 061 public class Complex extends Triple { 062 protected List<Predicate> predicates = new ArrayList<Predicate>(); 063 064 public Complex predicate(String predicate, String object) { 065 predicateSet.add(predicate); 066 objectSet.add(object); 067 return predicate(predicate, new StringType(object)); 068 } 069 070 public Complex linkedPredicate(String predicate, String object, String link) { 071 predicateSet.add(predicate); 072 objectSet.add(object); 073 return linkedPredicate(predicate, new StringType(object), link); 074 } 075 076 public Complex predicate(String predicate, Triple object) { 077 Predicate p = getPredicate(predicate); 078 if (p == null) { 079 p = new Predicate(); 080 p.predicate = predicate; 081 predicateSet.add(predicate); 082 predicates.add(p); 083 } 084 if (object instanceof StringType) 085 objectSet.add(((StringType) object).value); 086 p.objects.add(object); 087 return this; 088 } 089 090 protected Predicate getPredicate(String predicate) { 091 for (Predicate p : predicates) 092 if (p.predicate.equals(predicate)) 093 return p; 094 return null; 095 } 096 097 public Complex linkedPredicate(String predicate, Triple object, String link) { 098 Predicate p = getPredicate(predicate); 099 if (p == null) { 100 p = new Predicate(); 101 p.predicate = predicate; 102 p.link = link; 103 predicateSet.add(predicate); 104 predicates.add(p); 105 } 106 if (object instanceof StringType) 107 objectSet.add(((StringType) object).value); 108 p.objects.add(object); 109 return this; 110 } 111 112 public Complex predicate(String predicate) { 113 predicateSet.add(predicate); 114 Complex c = complex(); 115 predicate(predicate, c); 116 return c; 117 } 118 119 public Complex linkedPredicate(String predicate, String link) { 120 predicateSet.add(predicate); 121 Complex c = complex(); 122 linkedPredicate(predicate, c, link); 123 return c; 124 } 125 126 public void prefix(String code, String url) { 127 Turtle.this.prefix(code, url); 128 } 129 } 130 131 private class Predicate { 132 protected String predicate; 133 protected String link; 134 protected List<Triple> objects = new ArrayList<Turtle.Triple>(); 135 protected String comment; 136 137 public String getPredicate() { 138 return predicate; 139 } 140 public String makelink() { 141 if (link == null) 142 return predicate; 143 else 144 return "<a href=\""+link+"\">"+Utilities.escapeXml(predicate)+"</a>"; 145 } 146 147 public List<Triple> getObjects() { 148 return objects; 149 } 150 public String getComment() { 151 return comment; 152 } 153 } 154 155 public class Subject extends Complex { 156 private String id; 157 158 public Predicate predicate(String predicate, Triple object, String comment) { 159 Predicate p = getPredicate(predicate); 160 if (p == null) { 161 p = new Predicate(); 162 p.predicate = predicate; 163 predicateSet.add(predicate); 164 predicates.add(p); 165 p.comment = comment; 166 } 167 if (object instanceof StringType) 168 objectSet.add(((StringType) object).value); 169 p.objects.add(object); 170 return p; 171 } 172 173 public void comment(String comment) { 174 if (!Utilities.noString(comment)) { 175 predicate("rdfs:comment", literal(comment)); 176 predicate("dcterms:description", literal(comment)); 177 } 178 } 179 180 public void label(String label) { 181 if (!Utilities.noString(label)) { 182 predicate("rdfs:label", literal(label)); 183 predicate("dc:title", literal(label)); 184 } 185 } 186 187 } 188 189 public class Section { 190 private String name; 191 private List<Subject> subjects = new ArrayList<Subject>(); 192 193 public Subject triple(String subject, String predicate, String object, String comment) { 194 return triple(subject, predicate, new StringType(object), comment); 195 } 196 197 public Subject triple(String subject, String predicate, String object) { 198 return triple(subject, predicate, new StringType(object)); 199 } 200 201 public Subject triple(String subject, String predicate, Triple object) { 202 return triple(subject, predicate, object, null); 203 } 204 205 public Subject triple(String subject, String predicate, Triple object, String comment) { 206 Subject s = subject(subject); 207 s.predicate(predicate, object, comment); 208 return s; 209 } 210 211 public void comment(String subject, String comment) { 212 triple(subject, "rdfs:comment", literal(comment)); 213 triple(subject, "dcterms:description", literal(comment)); 214 } 215 216 public void label(String subject, String comment) { 217 triple(subject, "rdfs:label", literal(comment)); 218 triple(subject, "dc:title", literal(comment)); 219 } 220 221 public Subject subject(String subject) { 222 for (Subject ss : subjects) 223 if (ss.id.equals(subject)) 224 return ss; 225 Subject s = new Subject(); 226 s.id = subject; 227 subjects.add(s); 228 return s; 229 } 230 231 public boolean hasSubject(String subject) { 232 for (Subject ss : subjects) 233 if (ss.id.equals(subject)) 234 return true; 235 return false; 236 } 237 } 238 239 private List<Section> sections = new ArrayList<Section>(); 240 protected Set<String> subjectSet = new HashSet<String>(); 241 protected Set<String> predicateSet = new HashSet<String>(); 242 protected Set<String> objectSet = new HashSet<String>(); 243 protected Map<String, String> prefixes = new HashMap<String, String>(); 244 245 public void prefix(String code, String url) { 246 prefixes.put(code, url); 247 } 248 249 protected boolean hasSection(String sn) { 250 for (Section s : sections) 251 if (s.name.equals(sn)) 252 return true; 253 return false; 254 255 } 256 257 public Section section(String sn) { 258 if (hasSection(sn)) 259 throw new Error("Duplicate section name "+sn); 260 Section s = new Section(); 261 s.name = sn; 262 sections.add(s); 263 return s; 264 } 265 266 protected String matches(String url, String prefixUri, String prefix) { 267 if (url.startsWith(prefixUri)) { 268 prefixes.put(prefix, prefixUri); 269 return prefix+":"+escape(url.substring(prefixUri.length()), false); 270 } 271 return null; 272 } 273 274 protected Complex complex() { 275 return new Complex(); 276 } 277 278 private void checkPrefix(Triple object) { 279 if (object instanceof StringType) 280 checkPrefix(((StringType) object).value); 281 else { 282 Complex obj = (Complex) object; 283 for (Predicate po : obj.predicates) { 284 checkPrefix(po.getPredicate()); 285 for (Triple o : po.getObjects()) 286 checkPrefix(o); 287 } 288 } 289 } 290 291 protected void checkPrefix(String pname) { 292 if (pname.startsWith("(")) 293 return; 294 if (pname.startsWith("\"")) 295 return; 296 if (pname.startsWith("<")) 297 return; 298 299 if (pname.contains(":")) { 300 String prefix = pname.substring(0, pname.indexOf(":")); 301 if (!prefixes.containsKey(prefix) && !prefix.equals("http")&& !prefix.equals("urn")) 302 throw new Error("undefined prefix "+prefix); 303 } 304 } 305 306 protected StringType literal(String s) { 307 return new StringType("\""+escape(s, true)+"\""); 308 } 309 310 protected StringType literalTyped(String s, String t) { 311 return new StringType("\""+escape(s, true)+"\"^^xs:"+t); 312 } 313 314 public static String escape(String s, boolean string) { 315 if (s == null) 316 return ""; 317 318 StringBuilder b = new StringBuilder(); 319 for (char c : s.toCharArray()) { 320 if (c == '\r') 321 b.append("\\r"); 322 else if (c == '\n') 323 b.append("\\n"); 324 else if (c == '"') 325 b.append("\\\""); 326 else if (c == '\\') 327 b.append("\\\\"); 328 else if (c == '/' && !string) 329 b.append("\\/"); 330 else 331 b.append(c); 332 } 333 return b.toString(); 334 } 335 336 protected String pctEncode(String s) { 337 if (s == null) 338 return ""; 339 340 StringBuilder b = new StringBuilder(); 341 for (char c : s.toCharArray()) { 342 if (c >= 'A' && c <= 'Z') 343 b.append(c); 344 else if (c >= 'a' && c <= 'z') 345 b.append(c); 346 else if (c >= '0' && c <= '9') 347 b.append(c); 348 else if (c == '.') 349 b.append(c); 350 else 351 b.append("%"+Integer.toHexString(c)); 352 } 353 return b.toString(); 354 } 355 356 protected List<String> sorted(Set<String> keys) { 357 List<String> names = new ArrayList<String>(); 358 names.addAll(keys); 359 Collections.sort(names); 360 return names; 361 } 362 363 public void commit(OutputStream destination, boolean header) throws IOException { 364 LineOutputStreamWriter writer = new LineOutputStreamWriter(destination); 365 commitPrefixes(writer, header); 366 for (Section s : sections) { 367 commitSection(writer, s); 368 } 369 writer.ln("# -------------------------------------------------------------------------------------"); 370 writer.ln(); 371 writer.flush(); 372 writer.close(); 373 } 374 375 public String asHtml() throws Exception { 376 StringBuilder b = new StringBuilder(); 377 b.append("<pre class=\"rdf\">\r\n"); 378 commitPrefixes(b); 379 for (Section s : sections) { 380 commitSection(b, s); 381 } 382 b.append("</pre>\r\n"); 383 b.append("\r\n"); 384 return b.toString(); 385 } 386 387 private void commitPrefixes(LineOutputStreamWriter writer, boolean header) throws IOException { 388 if (header) { 389 writer.ln("# FHIR Sub-definitions"); 390 writer.write("# This is work in progress, and may change rapidly \r\n"); 391 writer.ln(); 392 writer.write("# A note about policy: the focus here is providing the knowledge from \r\n"); 393 writer.write("# the FHIR specification as a set of triples for knowledge processing. \r\n"); 394 writer.write("# Where appopriate, predicates defined external to FHIR are used. \"Where \r\n"); 395 writer.write("# appropriate\" means that the predicates are a faithful representation \r\n"); 396 writer.write("# of the FHIR semantics, and do not involve insane (or owful) syntax. \r\n"); 397 writer.ln(); 398 writer.write("# Where the community agrees on additional predicate statements (such \r\n"); 399 writer.write("# as OWL constraints) these are added in addition to the direct FHIR \r\n"); 400 writer.write("# predicates \r\n"); 401 writer.ln(); 402 writer.write("# This it not a formal ontology, though it is possible it may start to become one eventually\r\n"); 403 writer.ln(); 404 writer.write("# this file refers to concepts defined in rim.ttl and to others defined elsewhere outside HL7 \r\n"); 405 writer.ln(); 406 } 407 for (String p : sorted(prefixes.keySet())) 408 writer.ln("@prefix "+p+": <"+prefixes.get(p)+"> ."); 409 writer.ln(); 410 if (header) { 411 writer.ln("# Predicates used in this file:"); 412 for (String s : sorted(predicateSet)) 413 writer.ln(" # "+s); 414 writer.ln(); 415 } 416 } 417 418 private void commitPrefixes(StringBuilder b) throws Exception { 419 for (String p : sorted(prefixes.keySet())) 420 b.append("@prefix "+p+": <"+prefixes.get(p)+"> .\r\n"); 421 b.append("\r\n"); 422 } 423 424 // private String lastSubject = null; 425 // private String lastComment = ""; 426 427 private void commitSection(LineOutputStreamWriter writer, Section section) throws IOException { 428 writer.ln("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())); 429 writer.ln(); 430 for (Subject sbj : section.subjects) { 431 if (Utilities.noString(sbj.id)) { 432 writer.write("["); 433 } else { 434 writer.write(sbj.id); 435 writer.write(" "); 436 } 437 int i = 0; 438 439 for (Predicate p : sbj.predicates) { 440 writer.write(p.getPredicate()); 441 writer.write(" "); 442 boolean first = true; 443 for (Triple o : p.getObjects()) { 444 if (first) 445 first = false; 446 else 447 writer.write(", "); 448 if (o instanceof StringType) 449 writer.write(((StringType) o).value); 450 else { 451 writer.write("["); 452 if (write((Complex) o, writer, 4)) 453 writer.write("\r\n ]"); 454 else 455 writer.write("]"); 456 } 457 } 458 String comment = p.comment == null? "" : " # "+p.comment; 459 i++; 460 if (i < sbj.predicates.size()) 461 writer.write(";"+comment+"\r\n "); 462 else { 463 if (Utilities.noString(sbj.id)) 464 writer.write("]"); 465 writer.write(" ."+comment+"\r\n\r\n"); 466 } 467 } 468 } 469 } 470 471 private void commitSection(StringBuilder b, Section section) throws Exception { 472 b.append("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())+"\r\n"); 473 b.append("\r\n"); 474 for (Subject sbj : section.subjects) { 475 b.append(Utilities.escapeXml(sbj.id)); 476 b.append(" "); 477 int i = 0; 478 479 for (Predicate p : sbj.predicates) { 480 b.append(p.makelink()); 481 b.append(" "); 482 boolean first = true; 483 for (Triple o : p.getObjects()) { 484 if (first) 485 first = false; 486 else 487 b.append(", "); 488 if (o instanceof StringType) 489 b.append(Utilities.escapeXml(((StringType) o).value)); 490 else { 491 b.append("["); 492 if (write((Complex) o, b, 4)) 493 b.append("\r\n ]"); 494 else 495 b.append("]"); 496 } 497 } 498 String comment = p.comment == null? "" : " # "+p.comment; 499 i++; 500 if (i < sbj.predicates.size()) 501 b.append(";"+Utilities.escapeXml(comment)+"\r\n "); 502 else 503 b.append("."+Utilities.escapeXml(comment)+"\r\n\r\n"); 504 } 505 } 506 } 507 508 protected class LineOutputStreamWriter extends OutputStreamWriter { 509 private LineOutputStreamWriter(OutputStream out) throws UnsupportedEncodingException { 510 super(out, "UTF-8"); 511 } 512 513 private void ln() throws IOException { 514 write("\r\n"); 515 } 516 517 private void ln(String s) throws IOException { 518 write(s); 519 write("\r\n"); 520 } 521 } 522 523 public boolean write(Complex complex, LineOutputStreamWriter writer, int indent) throws IOException { 524 if (complex.predicates.isEmpty()) 525 return false; 526 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) { 527 writer.write(" "+complex.predicates.get(0).predicate+" "+((StringType) complex.predicates.get(0).getObjects().get(0)).value); 528 return false; 529 } 530 String left = Utilities.padLeft("", ' ', indent); 531 int i = 0; 532 for (Predicate po : complex.predicates) { 533 writer.write("\r\n"); 534 boolean first = true; 535 for (Triple o : po.getObjects()) { 536 if (first) { 537 first = false; 538 writer.write(left+" "+po.getPredicate()+" "); 539 } else 540 writer.write(", "); 541 if (o instanceof StringType) 542 writer.write(((StringType) o).value); 543 else { 544 writer.write("["); 545 if (write((Complex) o, writer, indent+2)) 546 writer.write("\r\n"+left+" ]"); 547 else 548 writer.write(" ]"); 549 } 550 } 551 i++; 552 if (i < complex.predicates.size()) 553 writer.write(";"); 554 if (!Utilities.noString(po.comment)) 555 writer.write(" # "+escape(po.comment, false)); 556 } 557 return true; 558 } 559 560 public boolean write(Complex complex, StringBuilder b, int indent) throws Exception { 561 if (complex.predicates.isEmpty()) 562 return false; 563 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) { 564 b.append(" "+complex.predicates.get(0).makelink()+" "+Utilities.escapeXml(((StringType) complex.predicates.get(0).getObjects().get(0)).value)); 565 return false; 566 } 567 String left = Utilities.padLeft("", ' ', indent); 568 int i = 0; 569 for (Predicate po : complex.predicates) { 570 b.append("\r\n"); 571 boolean first = true; 572 for (Triple o : po.getObjects()) { 573 if (first) { 574 first = false; 575 b.append(left+" "+po.makelink()+" "); 576 } else 577 b.append(", "); 578 if (o instanceof StringType) 579 b.append(Utilities.escapeXml(((StringType) o).value)); 580 else { 581 b.append("["); 582 if (write((Complex) o, b, indent+2)) 583 b.append(left+" ]"); 584 else 585 b.append(" ]"); 586 } 587 } 588 i++; 589 if (i < complex.predicates.size()) 590 b.append(";"); 591 if (!Utilities.noString(po.comment)) 592 b.append(" # "+Utilities.escapeXml(escape(po.comment, false))); 593 } 594 return true; 595 } 596 597 598 public abstract class TTLObject { 599 protected int line; 600 protected int col; 601 602 abstract public boolean hasValue(String value); 603 604 public int getLine() { 605 return line; 606 } 607 608 public int getCol() { 609 return col; 610 } 611 612 613 } 614 615 616 public class TTLLiteral extends TTLObject { 617 618 private String value; 619 private String type; 620 protected TTLLiteral(int line, int col) { 621 this.line = line; 622 this.col = col; 623 } 624 @Override 625 public boolean hasValue(String value) { 626 return value.equals(this.value); 627 } 628 public String getValue() { 629 return value; 630 } 631 public String getType() { 632 return type; 633 } 634 635 } 636 637 public class TTLURL extends TTLObject { 638 private String uri; 639 640 protected TTLURL(int line, int col) { 641 this.line = line; 642 this.col = col; 643 } 644 645 public String getUri() { 646 return uri; 647 } 648 649 public void setUri(String uri) throws FHIRFormatError { 650 if (!uri.matches(IRI_URL)) 651 throw new FHIRFormatError("Illegal URI "+uri); 652 this.uri = uri; 653 } 654 655 @Override 656 public boolean hasValue(String value) { 657 return value.equals(this.uri); 658 } 659 } 660 661 public class TTLList extends TTLObject { 662 private List<TTLObject> list = new ArrayList<Turtle.TTLObject>(); 663 664 public TTLList(TTLObject obj) { 665 super(); 666 list.add(obj); 667 } 668 669 @Override 670 public boolean hasValue(String value) { 671 for (TTLObject obj : list) 672 if (obj.hasValue(value)) 673 return true; 674 return false; 675 } 676 677 public List<TTLObject> getList() { 678 return list; 679 } 680 681 } 682 public class TTLComplex extends TTLObject { 683 private Map<String, TTLObject> predicates = new HashMap<String, Turtle.TTLObject>(); 684 protected TTLComplex(int line, int col) { 685 this.line = line; 686 this.col = col; 687 } 688 public Map<String, TTLObject> getPredicates() { 689 return predicates; 690 } 691 @Override 692 public boolean hasValue(String value) { 693 return false; 694 } 695 public void addPredicate(String uri, TTLObject obj) { 696 if (!predicates.containsKey(uri)) 697 predicates.put(uri, obj); 698 else { 699 TTLObject eo = predicates.get(uri); 700 TTLList list = null; 701 if (eo instanceof TTLList) 702 list = (TTLList) eo; 703 else { 704 list = new TTLList(eo); 705 predicates.put(uri, list); 706 } 707 list.list.add(obj); 708 } 709 } 710 public void addPredicates(Map<String, TTLObject> values) { 711 for (String s : values.keySet()) { 712 addPredicate(s, values.get(s)); 713 } 714 } 715 } 716 717 private Map<TTLURL, TTLComplex> objects = new HashMap<TTLURL, Turtle.TTLComplex>(); 718 719 private Object base; 720 721 public enum LexerTokenType { 722 TOKEN, // [, ], :, @ 723 WORD, // a word 724 URI, // a URI <> 725 LITERAL // "..." 726 } 727 728 public class Lexer { 729 730 731 private String source; 732 private LexerTokenType type; 733 private int cursor, line, col, startLine, startCol; 734 private String token; 735 736 public Lexer(String source) throws FHIRFormatError { 737 this.source = source; 738 cursor = 0; 739 line = 1; 740 col = 1; 741 readNext(false); 742 } 743 744 private void skipWhitespace() { 745 while (cursor < source.length()) { 746 char ch = source.charAt(cursor); 747 if (Character.isWhitespace(ch)) 748 grab(); 749 else if (ch == '#') { 750 ch = grab(); 751 while (cursor < source.length()) { 752 ch = grab(); 753 if (ch == '\r' || ch == '\n') { 754 break; 755 } 756 } 757 } else 758 break; 759 } 760 } 761 762 private char grab() { 763 char c = source.charAt(cursor); 764 if (c == '\n') { 765 line++; 766 col = 1; 767 } else 768 col++; 769 770 cursor++; 771 return c; 772 } 773 774 private void readNext(boolean postColon) throws FHIRFormatError { 775 token = null; 776 type = null; 777 skipWhitespace(); 778 if (cursor >= source.length()) 779 return; 780 startLine = line; 781 startCol = col; 782 char ch = grab(); 783 StringBuilder b = new StringBuilder(); 784 switch (ch) { 785 case '@': 786 case '.': 787 case ':': 788 case ';': 789 case '^': 790 case ',': 791 case ']': 792 case '[': 793 case '(': 794 case ')': 795 type = LexerTokenType.TOKEN; 796 b.append(ch); 797 token = b.toString(); 798 return; 799 case '<': 800 while (cursor < source.length()) { 801 ch = grab(); 802 if (ch == '>') 803 break; 804 b.append(ch); 805 } 806 type = LexerTokenType.URI; 807 token = unescape(b.toString(), true); 808 return; 809 case '"': 810 b.append(ch); 811 String end = "\""; 812 while (cursor < source.length()) { 813 ch = grab(); 814 if (b.length() == 2 && ch != '"' && b.equals("\"\"")) { 815 cursor--; 816 break; 817 } 818 b.append(ch); 819 if (ch == '"') 820 if (b.toString().equals("\"\"\"")) 821 end = "\"\"\""; 822 else if (!b.toString().equals("\"\"") && b.toString().endsWith(end) && !b.toString().endsWith("\\"+end)) 823 break; 824 } 825 type = LexerTokenType.LITERAL; 826 token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false); 827 return; 828 case '\'': 829 b.append(ch); 830 end = "'"; 831 while (cursor < source.length()) { 832 ch = grab(); 833 if (b.equals("''") && ch != '\'') { 834 cursor--; 835 break; 836 } 837 b.append(ch); 838 if (b.toString().equals("'''")) 839 end = "'''"; 840 else if (!b.toString().equals("''") && b.toString().endsWith(end)) 841 break; 842 } 843 type = LexerTokenType.LITERAL; 844 token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false); 845 return; 846 default: 847 if (Utilities.charInRange(ch, '0', '9') || Utilities.charInRange(ch, 'a', 'z') || Utilities.charInRange(ch, 'A', 'Z') || Utilities.charInSet(ch, '_', '-', '+', '%')) { 848 b.append(ch); 849 while (cursor < source.length()) { 850 ch = grab(); 851 // if (!Utilities.charInRange(ch, '0', '9') && !Utilities.charInRange(ch, 'a', 'z') && !Utilities.charInRange(ch, 'A', 'Z') && !Utilities.charInSet(ch, '_', '-', '+', '.', '\\', '#')) 852 if (Character.isWhitespace(ch) || Utilities.charInSet(ch, ';', ']', ')', '~') || (( ch == ':') && !postColon)) 853 break; 854 b.append(ch); 855 } 856 type = LexerTokenType.WORD; 857 token = b.toString(); 858 cursor--; 859 return; 860 } else 861 throw error("unexpected lexer char "+ch); 862 } 863 } 864 865 private String unescape(String s, boolean isUri) throws FHIRFormatError { 866 StringBuilder b = new StringBuilder(); 867 int i = 0; 868 while (i < s.length()) { 869 char ch = s.charAt(i); 870 if (ch == '\\' && i < s.length()-1) { 871 i++; 872 switch (s.charAt(i)) { 873 case 't': 874 b.append('\t'); 875 break; 876 case 'r': 877 b.append('\r'); 878 break; 879 case 'n': 880 b.append('\n'); 881 break; 882 case 'f': 883 b.append('\f'); 884 break; 885 case '\'': 886 b.append('\''); 887 break; 888 case '\"': 889 b.append('\"'); 890 break; 891 case '\\': 892 b.append('\\'); 893 break; 894 case '/': 895 b.append('\\'); 896 break; 897 case 'U': 898 case 'u': 899 i++; 900 int l = 4; 901 int uc = Integer.parseInt(s.substring(i, i+l), 16); 902 if (uc < (isUri ? 33 : 32)) { 903 l = 8; 904 uc = Integer.parseInt(s.substring(i, i+8), 16); 905 } 906 if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E))) 907 throw new FHIRFormatError("Illegal unicode character"); 908 b.append((char) uc); 909 i = i + l; 910 break; 911 default: 912 throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i)); 913 } 914 } else { 915 b.append(ch); 916 } 917 i++; 918 } 919 return b.toString(); 920 } 921 922 public boolean done() { 923 return type == null; 924 } 925 926 public String next(LexerTokenType type, boolean postColon) throws FHIRFormatError { 927 if (type != null && this.type != type) 928 throw error("Unexpected type. Found "+this.type.toString()+" looking for a "+type.toString()); 929 String res = token; 930 readNext(postColon); 931 return res; 932 } 933 934 public String peek() throws Exception { 935 return token; 936 } 937 938 public LexerTokenType peekType() { 939 return type; 940 } 941 942 public void token(String token) throws FHIRFormatError { 943 if (!token.equals(this.token)) 944 throw error("Unexpected word "+this.token+" looking for "+token); 945 next(LexerTokenType.TOKEN, token.equals(":")); 946 } 947 948 public void word(String word) throws Exception { 949 if (!word.equals(this.token)) 950 throw error("Unexpected word "+this.token+" looking for "+word); 951 next(LexerTokenType.WORD, false); 952 } 953 954 public String word() throws FHIRFormatError { 955 String t = token; 956 next(LexerTokenType.WORD, false); 957 return t; 958 } 959 960 public String uri() throws FHIRFormatError { 961 if (this.type != LexerTokenType.URI) 962 throw error("Unexpected type. Found "+this.type.toString()+" looking for a URI"); 963 String t = token; 964 next(LexerTokenType.URI, false); 965 return t; 966 } 967 968 public String literal() throws FHIRFormatError { 969 if (this.type != LexerTokenType.LITERAL) 970 throw error("Unexpected type. Found "+this.type.toString()+" looking for a Literal"); 971 String t = token; 972 next(LexerTokenType.LITERAL, false); 973 return t; 974 } 975 976 public boolean peek(LexerTokenType type, String token) { 977 return this.type == type && this.token.equals(token); 978 } 979 980 public FHIRFormatError error(String message) { 981 return new FHIRFormatError("Syntax Error parsing Turtle on line "+Integer.toString(line)+" col "+Integer.toString(col)+": "+message); 982 } 983 984 } 985 // 986 // public void importTtl(Section sct, String ttl) throws Exception { 987 // if (!Utilities.noString(ttl)) { 988 // // System.out.println("import ttl: "+ttl); 989 // Lexer lexer = new Lexer(ttl); 990 // String subject = null; 991 // String predicate = null; 992 // while (!lexer.done()) { 993 // if (subject == null) 994 // subject = lexer.next(); 995 // if (predicate == null) 996 // predicate = lexer.next(); 997 // if (lexer.peekType() == null) { 998 // throw new Error("Unexpected end of input parsing turtle"); 999 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1000 // sct.triple(subject, predicate, lexer.next()); 1001 // } else if (lexer.peek() == null) { 1002 // throw new Error("Unexected - turtle lexer found no token"); 1003 // } else if (lexer.peek().equals("[")) { 1004 // sct.triple(subject, predicate, importComplex(lexer)); 1005 // } else 1006 // throw new Exception("Not done yet"); 1007 // String n = lexer.next(); 1008 // if (Utilities.noString(n)) 1009 // break; 1010 // if (n.equals(".")) { 1011 // subject = null; 1012 // predicate = null; 1013 // } else if (n.equals(";")) { 1014 // predicate = null; 1015 // } else if (!n.equals(",")) 1016 // throw new Exception("Unexpected token "+n); 1017 // } 1018 // } 1019 // } 1020 1021 public void parse(String source) throws FHIRFormatError { 1022 prefixes.clear(); 1023 prefixes.put("_", "urn:uuid:4425b440-2c33-4488-b9fc-cf9456139995#"); 1024 parse(new Lexer(source)); 1025 } 1026 1027 private void parse(Lexer lexer) throws FHIRFormatError { 1028 boolean doPrefixes = true; 1029 while (!lexer.done()) { 1030 if (doPrefixes && (lexer.peek(LexerTokenType.TOKEN, "@") || lexer.peek(LexerTokenType.WORD, "PREFIX") || lexer.peek(LexerTokenType.WORD, "BASE"))) { 1031 boolean sparqlStyle = false; 1032 boolean base = false; 1033 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1034 lexer.token("@"); 1035 String p = lexer.word(); 1036 if (p.equals("base")) 1037 base = true; 1038 else if (!p.equals("prefix")) 1039 throw new FHIRFormatError("Unexpected token "+p); 1040 } else { 1041 sparqlStyle = true; 1042 String p = lexer.word(); 1043 if (p.equals("BASE")) 1044 base = true; 1045 else if (!p.equals("PREFIX")) 1046 throw new FHIRFormatError("Unexpected token "+p); 1047 } 1048 String prefix = null; 1049 if (!base) { 1050 prefix = lexer.peekType() == LexerTokenType.WORD ? lexer.next(LexerTokenType.WORD, false) : null; 1051 lexer.token(":"); 1052 } 1053 String url = lexer.next(LexerTokenType.URI, false); 1054 if (!sparqlStyle) 1055 lexer.token("."); 1056 if (!base) 1057 prefix(prefix, url); 1058 else if (this.base == null) 1059 this.base = url; 1060 else 1061 throw new FHIRFormatError("Duplicate @base"); 1062 } else if (lexer.peekType() == LexerTokenType.URI) { 1063 doPrefixes = false; 1064 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1065 uri.setUri(lexer.uri()); 1066 TTLComplex complex = parseComplex(lexer); 1067 objects.put(uri, complex); 1068 lexer.token("."); 1069 } else if (lexer.peekType() == LexerTokenType.WORD) { 1070 doPrefixes = false; 1071 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1072 String pfx = lexer.word(); 1073 if (!prefixes.containsKey(pfx)) 1074 throw new FHIRFormatError("Unknown prefix "+pfx); 1075 lexer.token(":"); 1076 uri.setUri(prefixes.get(pfx)+lexer.word()); 1077 TTLComplex complex = parseComplex(lexer); 1078 objects.put(uri, complex); 1079 lexer.token("."); 1080 } else if (lexer.peek(LexerTokenType.TOKEN, ":")) { 1081 doPrefixes = false; 1082 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1083 lexer.token(":"); 1084 if (!prefixes.containsKey(null)) 1085 throw new FHIRFormatError("Unknown prefix ''"); 1086 uri.setUri(prefixes.get(null)+lexer.word()); 1087 TTLComplex complex = parseComplex(lexer); 1088 objects.put(uri, complex); 1089 lexer.token("."); 1090 } else if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1091 doPrefixes = false; 1092 lexer.token("["); 1093 TTLComplex bnode = parseComplex(lexer); 1094 lexer.token("]"); 1095 TTLComplex complex = null; 1096 if (!lexer.peek(LexerTokenType.TOKEN, ".")) { 1097 complex = parseComplex(lexer); 1098 // at this point, we collapse bnode and complex, and give bnode a fictional identity 1099 bnode.addPredicates(complex.predicates); 1100 } 1101 1102 objects.put(anonymousId(), bnode); 1103 lexer.token("."); 1104 } else 1105 throw lexer.error("Unknown token "+lexer.token); 1106 } 1107 } 1108 1109 private TTLURL anonymousId() throws FHIRFormatError { 1110 TTLURL url = new TTLURL(-1, -1); 1111 url.setUri("urn:uuid:"+UUID.randomUUID().toString().toLowerCase()); 1112 return url; 1113 } 1114 1115 private TTLComplex parseComplex(Lexer lexer) throws FHIRFormatError { 1116 TTLComplex result = new TTLComplex(lexer.startLine, lexer.startCol); 1117 1118 boolean done = lexer.peek(LexerTokenType.TOKEN, "]"); 1119 while (!done) { 1120 String uri = null; 1121 if (lexer.peekType() == LexerTokenType.URI) 1122 uri = lexer.uri(); 1123 else { 1124 String t = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1125 if (lexer.type == LexerTokenType.TOKEN && lexer.token.equals(":")) { 1126 lexer.token(":"); 1127 if (!prefixes.containsKey(t)) 1128 throw new FHIRFormatError("unknown prefix "+t); 1129 uri = prefixes.get(t)+lexer.word(); 1130 } else if (t.equals("a")) 1131 uri = prefixes.get("rdfs")+"type"; 1132 else 1133 throw lexer.error("unexpected token"); 1134 } 1135 1136 boolean inlist = false; 1137 if (lexer.peek(LexerTokenType.TOKEN, "(")) { 1138 inlist = true; 1139 lexer.token("("); 1140 } 1141 1142 boolean rpt = false; 1143 do { 1144 if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1145 lexer.token("["); 1146 result.addPredicate(uri, parseComplex(lexer)); 1147 lexer.token("]"); 1148 } else if (lexer.peekType() == LexerTokenType.URI) { 1149 TTLURL u = new TTLURL(lexer.startLine, lexer.startCol); 1150 u.setUri(lexer.uri()); 1151 result.addPredicate(uri, u); 1152 } else if (lexer.peekType() == LexerTokenType.LITERAL) { 1153 TTLLiteral u = new TTLLiteral(lexer.startLine, lexer.startCol); 1154 u.value = lexer.literal(); 1155 if (lexer.peek(LexerTokenType.TOKEN, "^")) { 1156 lexer.token("^"); 1157 lexer.token("^"); 1158 if (lexer.peekType() == LexerTokenType.URI) { 1159 u.type = lexer.uri(); 1160 } else { 1161 String l = lexer.word(); 1162 lexer.token(":"); 1163 u.type = prefixes.get(l)+ lexer.word(); 1164 } 1165 } 1166 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1167 //lang tag - skip it 1168 lexer.token("@"); 1169 String lang = lexer.word(); 1170 if (!lang.matches(LANG_REGEX)) { 1171 throw new FHIRFormatError("Invalid Language tag "+lang); 1172 } 1173 } 1174 result.addPredicate(uri, u); 1175 } else if (lexer.peekType() == LexerTokenType.WORD || lexer.peek(LexerTokenType.TOKEN, ":")) { 1176 int sl = lexer.startLine; 1177 int sc = lexer.startCol; 1178 String pfx = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1179 if (Utilities.isDecimal(pfx, true) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1180 TTLLiteral u = new TTLLiteral(sl, sc); 1181 u.value = pfx; 1182 result.addPredicate(uri, u); 1183 } else if (("false".equals(pfx) || "true".equals(pfx)) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1184 TTLLiteral u = new TTLLiteral(sl, sc); 1185 u.value = pfx; 1186 result.addPredicate(uri, u); 1187 } else { 1188 if (!prefixes.containsKey(pfx)) 1189 throw new FHIRFormatError("Unknown prefix "+(pfx == null ? "''" : pfx)); 1190 TTLURL u = new TTLURL(sl, sc); 1191 lexer.token(":"); 1192 u.setUri(prefixes.get(pfx)+lexer.word()); 1193 result.addPredicate(uri, u); 1194 } 1195 } else if (!lexer.peek(LexerTokenType.TOKEN, ";") && (!inlist || !lexer.peek(LexerTokenType.TOKEN, ")"))) { 1196 throw new FHIRFormatError("unexpected token "+lexer.token); 1197 } 1198 1199 if (inlist) 1200 rpt = !lexer.peek(LexerTokenType.TOKEN, ")"); 1201 else { 1202 rpt = lexer.peek(LexerTokenType.TOKEN, ","); 1203 if (rpt) 1204 lexer.readNext(false); 1205 } 1206 } while (rpt); 1207 if (inlist) 1208 lexer.token(")"); 1209 1210 if (lexer.peek(LexerTokenType.TOKEN, ";")) { 1211 while ((lexer.peek(LexerTokenType.TOKEN, ";"))) 1212 lexer.token(";"); 1213 done = lexer.peek(LexerTokenType.TOKEN, ".") || lexer.peek(LexerTokenType.TOKEN, "]"); 1214 } else { 1215 done = true; 1216 } 1217 } 1218 return result; 1219 } 1220 1221 public Map<TTLURL, TTLComplex> getObjects() { 1222 return objects; 1223 } 1224 1225 public TTLComplex getObject(String url) { 1226 for (TTLURL t : objects.keySet()) { 1227 if (t.getUri().equals(url)) 1228 return objects.get(t); 1229 } 1230 return null; 1231 } 1232 1233 // public void parseFragment(Lexer lexer) throws Exception { 1234 // lexer.next(); // read [ 1235 // Complex obj = new Complex(); 1236 // while (!lexer.peek().equals("]")) { 1237 // String predicate = lexer.next(); 1238 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) { 1239 // obj.predicate(predicate, lexer.next()); 1240 // } else if (lexer.peek().equals("[")) { 1241 // obj.predicate(predicate, importComplex(lexer)); 1242 // } else 1243 // throw new Exception("Not done yet"); 1244 // if (lexer.peek().equals(";")) 1245 // lexer.next(); 1246 // } 1247 // lexer.next(); // read ] 1248 // //return obj; 1249 // } 1250 // 1251 // public void importTtl(Section sct, String ttl) throws Exception { 1252 // if (!Utilities.noString(ttl)) { 1253 // // System.out.println("import ttl: "+ttl); 1254 // Lexer lexer = new Lexer(ttl); 1255 // String subject = null; 1256 // String predicate = null; 1257 // while (!lexer.done()) { 1258 // if (subject == null) 1259 // subject = lexer.next(); 1260 // if (predicate == null) 1261 // predicate = lexer.next(); 1262 // if (lexer.peekType() == null) { 1263 // throw new Error("Unexpected end of input parsing turtle"); 1264 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1265 // sct.triple(subject, predicate, lexer.next()); 1266 // } else if (lexer.peek() == null) { 1267 // throw new Error("Unexected - turtle lexer found no token"); 1268 // } else if (lexer.peek().equals("[")) { 1269 // sct.triple(subject, predicate, importComplex(lexer)); 1270 // } else 1271 // throw new Exception("Not done yet"); 1272 // String n = lexer.next(); 1273 // if (Utilities.noString(n)) 1274 // break; 1275 // if (n.equals(".")) { 1276 // subject = null; 1277 // predicate = null; 1278 // } else if (n.equals(";")) { 1279 // predicate = null; 1280 // } else if (!n.equals(",")) 1281 // throw new Exception("Unexpected token "+n); 1282 // } 1283 // } 1284 //} 1285 1286 // private Complex importComplex(Lexer lexer) throws Exception { 1287 // lexer.next(); // read [ 1288 // Complex obj = new Complex(); 1289 // while (!lexer.peek().equals("]")) { 1290 // String predicate = lexer.next(); 1291 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) { 1292 // obj.predicate(predicate, lexer.next()); 1293 // } else if (lexer.peek().equals("[")) { 1294 // obj.predicate(predicate, importComplex(lexer)); 1295 // } else 1296 // throw new Exception("Not done yet"); 1297 // if (lexer.peek().equals(";")) 1298 // lexer.next(); 1299 // } 1300 // lexer.next(); // read ] 1301 // return obj; 1302 // } 1303 1304}