001package org.hl7.fhir.r4.utils.formats;
002
003/*-
004 * #%L
005 * org.hl7.fhir.r4
006 * %%
007 * Copyright (C) 2014 - 2019 Health Level 7
008 * %%
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 * 
013 *      http://www.apache.org/licenses/LICENSE-2.0
014 * 
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 * #L%
021 */
022
023
024import java.io.IOException;
025import java.io.OutputStream;
026import java.io.OutputStreamWriter;
027import java.io.UnsupportedEncodingException;
028import java.util.ArrayList;
029import java.util.Collections;
030import java.util.HashMap;
031import java.util.HashSet;
032import java.util.List;
033import java.util.Map;
034import java.util.Set;
035import java.util.UUID;
036
037import org.hl7.fhir.exceptions.FHIRFormatError;
038import org.hl7.fhir.utilities.Utilities;
039
040public class Turtle {
041
042        public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE";
043
044  public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+"; 
045  public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?";
046
047        // Object model
048        public abstract class Triple {
049                private String uri;
050        }
051
052        public class StringType extends Triple {
053                private String value;
054
055                public StringType(String value) {
056                        super();
057                        this.value = value;
058                }
059        }
060
061        public class Complex extends Triple {
062                protected List<Predicate> predicates = new ArrayList<Predicate>();
063
064                public Complex predicate(String predicate, String object) {
065                        predicateSet.add(predicate);
066                        objectSet.add(object);
067                        return predicate(predicate, new StringType(object));
068                }
069
070    public Complex linkedPredicate(String predicate, String object, String link) {
071      predicateSet.add(predicate);
072      objectSet.add(object);
073      return linkedPredicate(predicate, new StringType(object), link);
074    }
075
076                public Complex predicate(String predicate, Triple object) {
077      Predicate p = getPredicate(predicate);
078      if (p == null) {
079        p = new Predicate();
080                        p.predicate = predicate;
081                        predicateSet.add(predicate);
082        predicates.add(p);
083      }
084                        if (object instanceof StringType)
085                                objectSet.add(((StringType) object).value);
086      p.objects.add(object);
087                        return this;
088                }
089
090    protected Predicate getPredicate(String predicate) {
091      for (Predicate p : predicates)
092        if (p.predicate.equals(predicate))
093          return p;
094      return null;
095    }
096
097    public Complex linkedPredicate(String predicate, Triple object, String link) {
098      Predicate p = getPredicate(predicate);
099      if (p == null) {
100        p = new Predicate();
101      p.predicate = predicate;
102      p.link = link;
103      predicateSet.add(predicate);
104        predicates.add(p);
105      }
106      if (object instanceof StringType)
107        objectSet.add(((StringType) object).value);
108      p.objects.add(object);
109      return this;
110    }
111
112                public Complex predicate(String predicate) {
113                        predicateSet.add(predicate);
114                        Complex c = complex();
115                        predicate(predicate, c);
116                        return c;
117                }
118
119    public Complex linkedPredicate(String predicate, String link) {
120      predicateSet.add(predicate);
121      Complex c = complex();
122      linkedPredicate(predicate, c, link);
123      return c;
124    }
125
126                public void prefix(String code, String url) {
127                        Turtle.this.prefix(code, url);
128                }
129        }
130
131        private class Predicate {
132                protected String predicate;
133                protected String link;
134    protected List<Triple> objects = new ArrayList<Turtle.Triple>();
135                protected String comment;
136
137                public String getPredicate() {
138                        return predicate;
139                }
140                public String makelink() {
141      if (link == null)
142        return predicate;
143      else
144        return "<a href=\""+link+"\">"+Utilities.escapeXml(predicate)+"</a>";
145    }
146                
147    public List<Triple> getObjects() {
148      return objects;
149                }
150                public String getComment() {
151                        return comment;
152                }
153        }
154
155        public class Subject extends Complex {
156                private String id;
157
158                public Predicate predicate(String predicate, Triple object, String comment) {
159      Predicate p = getPredicate(predicate);
160      if (p == null) {
161        p = new Predicate();
162                        p.predicate = predicate;
163                        predicateSet.add(predicate);
164                        predicates.add(p);
165                        p.comment = comment; 
166      }
167      if (object instanceof StringType)
168        objectSet.add(((StringType) object).value);
169      p.objects.add(object);
170                        return p;
171                }
172
173                public void comment(String comment) {
174                        if (!Utilities.noString(comment)) {
175                                predicate("rdfs:comment", literal(comment));
176                                predicate("dcterms:description", literal(comment));
177                        }
178                }
179
180                public void label(String label) {
181                        if (!Utilities.noString(label)) {
182                                predicate("rdfs:label", literal(label));
183                                predicate("dc:title", literal(label));
184                        }
185                }
186
187        }
188
189        public class Section {
190                private String name;
191                private List<Subject> subjects = new ArrayList<Subject>();
192
193                public Subject triple(String subject, String predicate, String object, String comment) {
194                        return triple(subject, predicate, new StringType(object), comment);
195                }
196
197                public Subject triple(String subject, String predicate, String object) {
198                        return triple(subject, predicate, new StringType(object));
199                }
200
201                public Subject triple(String subject, String predicate, Triple object) {
202                        return triple(subject, predicate, object, null);     
203                }
204
205                public Subject triple(String subject, String predicate, Triple object, String comment) {
206                        Subject s = subject(subject);
207                        s.predicate(predicate, object, comment);
208                        return s;
209                }
210
211                public void comment(String subject, String comment) {
212                        triple(subject, "rdfs:comment", literal(comment));
213                        triple(subject, "dcterms:description", literal(comment));
214                }
215
216                public void label(String subject, String comment) {
217                        triple(subject, "rdfs:label", literal(comment));
218                        triple(subject, "dc:title", literal(comment));
219                }
220
221                public Subject subject(String subject) {
222                        for (Subject ss : subjects) 
223                                if (ss.id.equals(subject))
224                                        return ss;
225                        Subject s = new Subject();
226                        s.id = subject;
227                        subjects.add(s);
228                        return s;
229                }
230
231    public boolean hasSubject(String subject) {
232      for (Subject ss : subjects) 
233        if (ss.id.equals(subject))
234          return true;
235      return false;
236    }
237        }
238
239        private List<Section> sections = new ArrayList<Section>();
240        protected Set<String> subjectSet = new HashSet<String>();
241        protected Set<String> predicateSet = new HashSet<String>();
242        protected Set<String> objectSet = new HashSet<String>();
243        protected Map<String, String> prefixes = new HashMap<String, String>();
244
245        public void prefix(String code, String url) {
246                prefixes.put(code, url);
247        }
248
249        protected boolean hasSection(String sn) {
250                for (Section s : sections)
251                        if (s.name.equals(sn))
252                                return true;
253                return false;
254
255        }
256
257        public Section section(String sn) {
258                if (hasSection(sn))
259                        throw new Error("Duplicate section name "+sn);
260                Section s = new Section();
261                s.name = sn;
262                sections.add(s);
263                return s;
264        }
265
266        protected String matches(String url, String prefixUri, String prefix) {
267                if (url.startsWith(prefixUri)) {
268                        prefixes.put(prefix, prefixUri);
269                        return prefix+":"+escape(url.substring(prefixUri.length()), false);
270                }
271                return null;
272        }
273
274        protected Complex complex() {
275                return new Complex();
276        }
277
278        private void checkPrefix(Triple object) {
279                if (object instanceof StringType)
280                        checkPrefix(((StringType) object).value);
281                else {
282                        Complex obj = (Complex) object;
283                        for (Predicate po : obj.predicates) {
284                                checkPrefix(po.getPredicate());
285        for (Triple o : po.getObjects())
286          checkPrefix(o);
287                        }
288                }
289        }
290
291        protected void checkPrefix(String pname) {
292                if (pname.startsWith("("))
293                        return;
294                if (pname.startsWith("\""))
295                        return;
296                if (pname.startsWith("<"))
297                        return;
298
299                if (pname.contains(":")) {
300                        String prefix = pname.substring(0, pname.indexOf(":"));
301                        if (!prefixes.containsKey(prefix) && !prefix.equals("http")&& !prefix.equals("urn"))
302                                throw new Error("undefined prefix "+prefix); 
303                }
304        }
305
306        protected StringType literal(String s) {
307                return new StringType("\""+escape(s, true)+"\"");
308        }
309
310  protected StringType literalTyped(String s, String t) {
311    return new StringType("\""+escape(s, true)+"\"^^xs:"+t);
312  }
313
314        public static String escape(String s, boolean string) {
315                if (s == null)
316                        return "";
317
318                StringBuilder b = new StringBuilder();
319                for (char c : s.toCharArray()) {
320                        if (c == '\r')
321                                b.append("\\r");
322                        else if (c == '\n')
323                                b.append("\\n");
324                        else if (c == '"')
325                                b.append("\\\"");
326                        else if (c == '\\')
327                                b.append("\\\\");
328                        else if (c == '/' && !string)
329                                b.append("\\/");
330                        else 
331                                b.append(c);
332                }   
333                return b.toString();
334        }
335
336        protected String pctEncode(String s) {
337                if (s == null)
338                        return "";
339
340                StringBuilder b = new StringBuilder();
341                for (char c : s.toCharArray()) {
342                        if (c >= 'A' && c <= 'Z')
343                                b.append(c);
344                        else if (c >= 'a' && c <= 'z')
345                                b.append(c);
346                        else if (c >= '0' && c <= '9')
347                                b.append(c);
348                        else if (c == '.')
349                                b.append(c);
350                        else 
351                                b.append("%"+Integer.toHexString(c));
352                }   
353                return b.toString();
354        }
355
356        protected List<String> sorted(Set<String> keys) {
357                List<String> names = new ArrayList<String>();
358                names.addAll(keys);
359                Collections.sort(names);
360                return names;
361        }
362
363        public void commit(OutputStream destination, boolean header) throws IOException {
364                LineOutputStreamWriter writer = new LineOutputStreamWriter(destination);
365                commitPrefixes(writer, header);
366                for (Section s : sections) {
367                        commitSection(writer, s);
368                }
369                writer.ln("# -------------------------------------------------------------------------------------");
370                writer.ln();
371                writer.flush();
372                writer.close();
373        }
374
375  public String asHtml() throws Exception {
376    StringBuilder b = new StringBuilder();
377    b.append("<pre class=\"rdf\">\r\n");
378    commitPrefixes(b);
379    for (Section s : sections) {
380      commitSection(b, s);
381    }
382    b.append("</pre>\r\n");
383    b.append("\r\n");
384    return b.toString();
385  }
386
387        private void commitPrefixes(LineOutputStreamWriter writer, boolean header) throws IOException {
388                if (header) {
389                        writer.ln("# FHIR Sub-definitions");
390                        writer.write("# This is work in progress, and may change rapidly \r\n");
391                        writer.ln();
392                        writer.write("# A note about policy: the focus here is providing the knowledge from \r\n"); 
393                        writer.write("# the FHIR specification as a set of triples for knowledge processing. \r\n");
394                        writer.write("# Where appopriate, predicates defined external to FHIR are used. \"Where \r\n");
395                        writer.write("# appropriate\" means that the predicates are a faithful representation \r\n");
396                        writer.write("# of the FHIR semantics, and do not involve insane (or owful) syntax. \r\n");
397                        writer.ln();
398                        writer.write("# Where the community agrees on additional predicate statements (such \r\n");
399                        writer.write("# as OWL constraints) these are added in addition to the direct FHIR \r\n");
400                        writer.write("# predicates \r\n");
401                        writer.ln();
402                        writer.write("# This it not a formal ontology, though it is possible it may start to become one eventually\r\n");
403                        writer.ln();
404                        writer.write("# this file refers to concepts defined in rim.ttl and to others defined elsewhere outside HL7 \r\n");
405                        writer.ln();
406                }
407                for (String p : sorted(prefixes.keySet()))
408                        writer.ln("@prefix "+p+": <"+prefixes.get(p)+"> .");
409                writer.ln();
410                if (header) {
411                        writer.ln("# Predicates used in this file:");
412                        for (String s : sorted(predicateSet)) 
413                                writer.ln(" # "+s);
414                        writer.ln();
415                }
416        }
417
418  private void commitPrefixes(StringBuilder b) throws Exception {
419    for (String p : sorted(prefixes.keySet()))
420      b.append("@prefix "+p+": &lt;"+prefixes.get(p)+"&gt; .\r\n");
421    b.append("\r\n");
422  }
423
424        //  private String lastSubject = null;
425        //  private String lastComment = "";
426
427        private void commitSection(LineOutputStreamWriter writer, Section section) throws IOException {
428                writer.ln("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length()));
429                writer.ln();
430                for (Subject sbj : section.subjects) {
431      if (Utilities.noString(sbj.id)) {
432        writer.write("[");
433      } else {
434                        writer.write(sbj.id);
435                        writer.write(" ");
436      }
437                        int i = 0;
438
439                        for (Predicate p : sbj.predicates) {
440                                writer.write(p.getPredicate());
441                                writer.write(" ");
442        boolean first = true;
443        for (Triple o : p.getObjects()) {
444          if (first)
445            first = false;
446          else
447            writer.write(", ");
448          if (o instanceof StringType)
449            writer.write(((StringType) o).value);
450                                else {
451                                        writer.write("[");
452            if (write((Complex) o, writer, 4))
453                                                writer.write("\r\n  ]");
454                                        else
455                                                writer.write("]");
456                                }
457        }
458                                String comment = p.comment == null? "" : " # "+p.comment;
459                                i++;
460                                if (i < sbj.predicates.size())
461                                        writer.write(";"+comment+"\r\n  ");
462        else {
463          if (Utilities.noString(sbj.id)) 
464            writer.write("]");
465          writer.write(" ."+comment+"\r\n\r\n");
466                }
467          }
468        }
469  }
470
471  private void commitSection(StringBuilder b, Section section) throws Exception {
472    b.append("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())+"\r\n");
473    b.append("\r\n");
474    for (Subject sbj : section.subjects) {
475      b.append(Utilities.escapeXml(sbj.id));
476      b.append(" ");
477      int i = 0;
478
479      for (Predicate p : sbj.predicates) {
480        b.append(p.makelink());
481        b.append(" ");
482        boolean first = true;
483        for (Triple o : p.getObjects()) {
484          if (first)
485            first = false;
486          else
487            b.append(", ");
488          if (o instanceof StringType)
489            b.append(Utilities.escapeXml(((StringType) o).value));
490        else {
491          b.append("[");
492            if (write((Complex) o, b, 4))
493            b.append("\r\n  ]");
494          else
495            b.append("]");
496        }
497        }
498        String comment = p.comment == null? "" : " # "+p.comment;
499        i++;
500        if (i < sbj.predicates.size())
501          b.append(";"+Utilities.escapeXml(comment)+"\r\n  ");
502        else
503          b.append("."+Utilities.escapeXml(comment)+"\r\n\r\n");
504      }
505    }
506  }
507
508        protected class LineOutputStreamWriter extends OutputStreamWriter {
509                private LineOutputStreamWriter(OutputStream out) throws UnsupportedEncodingException {
510                        super(out, "UTF-8");
511                }
512
513                private void ln() throws IOException {
514                        write("\r\n");
515                }
516
517                private void ln(String s) throws IOException {
518                        write(s);
519                        write("\r\n");
520                }
521        }
522
523        public boolean write(Complex complex, LineOutputStreamWriter writer, int indent) throws IOException {
524                if (complex.predicates.isEmpty()) 
525                        return false;
526    if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) {
527      writer.write(" "+complex.predicates.get(0).predicate+" "+((StringType) complex.predicates.get(0).getObjects().get(0)).value);
528                        return false;
529                }
530                String left = Utilities.padLeft("", ' ', indent);
531                int i = 0;
532                for (Predicate po : complex.predicates) {
533                        writer.write("\r\n");
534      boolean first = true;
535      for (Triple o : po.getObjects()) {
536        if (first) {
537          first = false;
538          writer.write(left+" "+po.getPredicate()+" ");
539        } else
540          writer.write(", ");
541        if (o instanceof StringType)
542          writer.write(((StringType) o).value);
543                        else {
544          writer.write("[");
545          if (write((Complex) o, writer, indent+2))
546            writer.write("\r\n"+left+" ]");
547                                else
548                                        writer.write(" ]");
549                        }
550      }
551                        i++;
552                        if (i < complex.predicates.size())
553                                writer.write(";");
554                        if (!Utilities.noString(po.comment)) 
555                                writer.write(" # "+escape(po.comment, false));
556                }
557                return true;      
558        }
559
560  public boolean write(Complex complex, StringBuilder b, int indent) throws Exception {
561    if (complex.predicates.isEmpty()) 
562      return false;
563    if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) {
564      b.append(" "+complex.predicates.get(0).makelink()+" "+Utilities.escapeXml(((StringType) complex.predicates.get(0).getObjects().get(0)).value));
565      return false;
566    }
567    String left = Utilities.padLeft("", ' ', indent);
568    int i = 0;
569    for (Predicate po : complex.predicates) {
570      b.append("\r\n");
571      boolean first = true;
572      for (Triple o : po.getObjects()) {
573        if (first) {
574          first = false;
575          b.append(left+" "+po.makelink()+" ");
576        } else
577          b.append(", ");
578        if (o instanceof StringType)
579          b.append(Utilities.escapeXml(((StringType) o).value));
580      else {
581          b.append("[");
582          if (write((Complex) o, b, indent+2))
583          b.append(left+" ]");
584        else
585          b.append(" ]");
586      }
587      }
588      i++;
589      if (i < complex.predicates.size())
590        b.append(";");
591      if (!Utilities.noString(po.comment)) 
592        b.append(" # "+Utilities.escapeXml(escape(po.comment, false)));
593    }
594    return true;      
595  }
596
597
598  public abstract class TTLObject {
599                protected int line;
600                protected int col;
601
602    abstract public boolean hasValue(String value);
603
604    public int getLine() {
605      return line;
606    }
607
608    public int getCol() {
609      return col;
610    }
611    
612    
613        }
614
615
616        public class TTLLiteral extends TTLObject {
617
618                private String value;
619                private String type;
620                protected TTLLiteral(int line, int col) {
621                        this.line = line;
622                        this.col = col;
623                }
624    @Override
625    public boolean hasValue(String value) {
626      return value.equals(this.value);
627    }
628    public String getValue() {
629      return value;
630    }
631    public String getType() {
632      return type;
633    }
634
635        }
636
637        public class TTLURL extends TTLObject {
638                private String uri;
639
640                protected TTLURL(int line, int col) {
641                        this.line = line;
642                        this.col = col;
643                }
644
645                public String getUri() {
646                        return uri;
647                }
648
649    public void setUri(String uri) throws FHIRFormatError {
650                        if (!uri.matches(IRI_URL))
651        throw new FHIRFormatError("Illegal URI "+uri);
652                        this.uri = uri;
653                }
654
655    @Override
656    public boolean hasValue(String value) {
657      return value.equals(this.uri);
658        }
659  }
660
661  public class TTLList extends TTLObject {
662    private List<TTLObject> list = new ArrayList<Turtle.TTLObject>();
663
664    public TTLList(TTLObject obj) {
665      super();
666      list.add(obj);
667    }
668    
669    @Override
670    public boolean hasValue(String value) {
671      for (TTLObject obj : list)
672        if (obj.hasValue(value))
673          return true;
674      return false;
675    }
676
677    public List<TTLObject> getList() {
678      return list;
679    }
680    
681  }
682        public class TTLComplex extends TTLObject {
683                private Map<String, TTLObject> predicates = new HashMap<String, Turtle.TTLObject>();
684                protected TTLComplex(int line, int col) {
685                        this.line = line;
686                        this.col = col;
687                }
688    public Map<String, TTLObject> getPredicates() {
689      return predicates;
690        }
691    @Override
692    public boolean hasValue(String value) {
693      return false;
694    }
695    public void addPredicate(String uri, TTLObject obj) {
696      if (!predicates.containsKey(uri))
697        predicates.put(uri, obj);
698      else {
699        TTLObject eo = predicates.get(uri);
700        TTLList list = null; 
701        if (eo instanceof TTLList) 
702          list = (TTLList) eo; 
703        else {
704          list = new TTLList(eo);
705          predicates.put(uri, list);
706        }
707        list.list.add(obj);
708      }
709    }
710    public void addPredicates(Map<String, TTLObject> values) {
711      for (String s : values.keySet()) {
712        addPredicate(s, values.get(s));
713      }
714    }
715  }
716
717  private Map<TTLURL, TTLComplex> objects = new HashMap<TTLURL, Turtle.TTLComplex>();
718
719  private Object base;
720
721        public enum LexerTokenType {
722                TOKEN, // [, ], :, @
723                WORD, // a word 
724                URI, // a URI <>
725                LITERAL // "..."
726        }
727
728        public class Lexer {
729
730
731                private String source;
732                private LexerTokenType type;
733                private int cursor, line, col, startLine, startCol;
734                private String token;
735
736    public Lexer(String source) throws FHIRFormatError {
737                        this.source = source;
738                        cursor = 0;
739                        line = 1;
740                        col = 1;
741      readNext(false);
742                }
743
744                private void skipWhitespace() {
745                        while (cursor < source.length()) {
746                                char ch = source.charAt(cursor);
747                                if (Character.isWhitespace(ch))
748                                        grab();
749                                else if (ch == '#') {
750                                        ch = grab();
751                                        while (cursor < source.length()) {
752                                                ch = grab();
753                                                if (ch == '\r' || ch == '\n') {
754                                                        break;
755                                                }
756                                        }          
757                                } else
758                                        break;
759                        }
760                }
761
762                private char grab() {
763                        char c = source.charAt(cursor);
764                        if (c == '\n') {
765                                line++;
766                                col = 1;
767                        } else
768                                col++;
769
770                        cursor++;
771                        return c;
772                }
773
774    private void readNext(boolean postColon) throws FHIRFormatError {    
775                        token = null;
776                        type = null;
777                        skipWhitespace();
778                        if (cursor >= source.length())
779                                return;
780                        startLine = line;
781                        startCol = col;
782                        char ch = grab();
783                        StringBuilder b = new StringBuilder();
784                        switch (ch) {
785                        case '@':
786                        case '.': 
787                        case ':': 
788                        case ';': 
789                        case '^': 
790                        case ',': 
791                        case ']': 
792                        case '[': 
793                        case '(': 
794                        case ')': 
795                                type = LexerTokenType.TOKEN;
796                                b.append(ch);
797                                token = b.toString();
798                                return;
799                        case '<': 
800                                while (cursor < source.length()) {
801                                        ch = grab();
802                                        if (ch == '>')
803                                                break;
804                                        b.append(ch);
805                                }
806                                type = LexerTokenType.URI;
807                                token = unescape(b.toString(), true);
808                                return;        
809                        case '"': 
810                                b.append(ch);
811                                String end = "\"";
812                                while (cursor < source.length()) {
813                                        ch = grab();
814          if (b.length() == 2 && ch != '"' && b.equals("\"\"")) {
815                                                cursor--;
816                                                break;
817                                        }
818                                        b.append(ch);
819          if (ch == '"')
820                                        if (b.toString().equals("\"\"\""))
821                                                end = "\"\"\"";
822            else if (!b.toString().equals("\"\"") && b.toString().endsWith(end) && !b.toString().endsWith("\\"+end))
823                                                break;
824                                }
825                                type = LexerTokenType.LITERAL;
826                                token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false);
827                                return;        
828                        case '\'': 
829                                b.append(ch);
830                                end = "'";
831                                while (cursor < source.length()) {
832                                        ch = grab();
833                                        if (b.equals("''") && ch != '\'') {
834                                                cursor--;
835                                                break;
836                                        }
837                                        b.append(ch);
838                                        if (b.toString().equals("'''"))
839                                                end = "'''";
840                                        else if (!b.toString().equals("''") && b.toString().endsWith(end))
841                                                break;
842                                }
843                                type = LexerTokenType.LITERAL;
844                                token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false);
845                                return;        
846                        default:
847        if (Utilities.charInRange(ch, '0', '9') || Utilities.charInRange(ch, 'a', 'z') || Utilities.charInRange(ch, 'A', 'Z') || Utilities.charInSet(ch, '_', '-', '+', '%')) {
848                                        b.append(ch);
849                                        while (cursor < source.length()) {
850                                                ch = grab();
851            //                                          if (!Utilities.charInRange(ch, '0', '9') && !Utilities.charInRange(ch, 'a', 'z') && !Utilities.charInRange(ch, 'A', 'Z') && !Utilities.charInSet(ch, '_', '-', '+', '.', '\\', '#'))
852            if (Character.isWhitespace(ch) || Utilities.charInSet(ch, ';', ']', ')', '~') || (( ch == ':') && !postColon))
853                                                        break;
854                                                b.append(ch);
855                                        }
856                                        type = LexerTokenType.WORD;
857                                        token = b.toString();
858                                        cursor--;
859                                        return;        
860                                } else
861                                        throw error("unexpected lexer char "+ch);
862                        }
863                }
864
865    private String unescape(String s, boolean isUri) throws FHIRFormatError {
866                        StringBuilder b = new StringBuilder();
867                        int i = 0;
868                        while (i < s.length()) {
869                                char ch = s.charAt(i);
870                                if (ch == '\\' && i < s.length()-1) {
871                                        i++;
872                                        switch (s.charAt(i)) {
873                                        case 't': 
874                                                b.append('\t');
875                                                break;
876                                        case 'r':
877                                                b.append('\r');
878                                                break;
879                                        case 'n': 
880                                                b.append('\n');
881                                                break;
882                                        case 'f': 
883                                                b.append('\f');
884                                                break;
885                                        case '\'':
886                                                b.append('\'');
887                                                break;
888          case '\"':
889            b.append('\"');
890            break;
891                                        case '\\': 
892                                                b.append('\\');
893                                                break;
894                                        case '/': 
895                                                b.append('\\');
896                                                break;
897                                        case 'U':
898                                        case 'u':
899                                                i++;
900                                                int l = 4;
901                                                int uc = Integer.parseInt(s.substring(i, i+l), 16);
902                                                if (uc < (isUri ? 33 : 32)) {
903                                                        l = 8;
904                                                        uc = Integer.parseInt(s.substring(i, i+8), 16);
905                                                }
906                                                if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
907              throw new FHIRFormatError("Illegal unicode character");
908                                                b.append((char) uc);
909                                                i = i + l;
910                                                break;
911                                        default:
912            throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i));
913                                        }
914                                } else {
915                                        b.append(ch);
916                                }
917        i++;
918                        }
919                        return b.toString();
920                }
921
922                public boolean done() {
923                        return type == null;
924                }
925
926    public String next(LexerTokenType type, boolean postColon) throws FHIRFormatError {
927                        if (type != null && this.type != type)
928                                throw error("Unexpected type. Found "+this.type.toString()+" looking for a "+type.toString());
929                        String res = token;
930      readNext(postColon);
931                        return res;
932                }
933
934                public String peek() throws Exception {
935                        return token;
936                }
937
938                public LexerTokenType peekType() {
939                        return type;
940                }
941
942    public void token(String token) throws FHIRFormatError {
943                        if (!token.equals(this.token))
944                                throw error("Unexpected word "+this.token+" looking for "+token);
945      next(LexerTokenType.TOKEN, token.equals(":"));
946                }
947
948                public void word(String word) throws Exception {
949                        if (!word.equals(this.token))
950                                throw error("Unexpected word "+this.token+" looking for "+word);
951      next(LexerTokenType.WORD, false);
952                }
953
954    public String word() throws FHIRFormatError {
955                        String t = token;
956      next(LexerTokenType.WORD, false);
957                        return t;
958                }
959
960    public String uri() throws FHIRFormatError {
961                        if (this.type != LexerTokenType.URI)
962                                throw error("Unexpected type. Found "+this.type.toString()+" looking for a URI");
963                        String t = token;
964      next(LexerTokenType.URI, false);
965                        return t;
966                }
967
968    public String literal() throws FHIRFormatError {
969                        if (this.type != LexerTokenType.LITERAL)
970                                throw error("Unexpected type. Found "+this.type.toString()+" looking for a Literal");
971                        String t = token;
972      next(LexerTokenType.LITERAL, false);
973                        return t;
974                }
975
976                public boolean peek(LexerTokenType type, String token) {
977                        return this.type == type && this.token.equals(token);
978                }
979
980    public FHIRFormatError error(String message) {
981      return new FHIRFormatError("Syntax Error parsing Turtle on line "+Integer.toString(line)+" col "+Integer.toString(col)+": "+message);
982                }
983
984        }
985        //      
986        //      public void importTtl(Section sct, String ttl) throws Exception {
987        //              if (!Utilities.noString(ttl)) {
988        //                      //        System.out.println("import ttl: "+ttl);
989        //                      Lexer lexer = new Lexer(ttl);
990        //                      String subject = null;
991        //                      String predicate = null;
992        //                      while (!lexer.done()) {
993        //                              if (subject == null)
994        //                                      subject = lexer.next();
995        //                              if (predicate == null)
996        //                                      predicate = lexer.next();
997        //                              if (lexer.peekType() == null) {
998        //                                      throw new Error("Unexpected end of input parsing turtle");
999        //                              } if (lexer.peekType() == LexerTokenType.TOKEN) {
1000        //                                      sct.triple(subject, predicate, lexer.next());
1001        //                              } else if (lexer.peek() == null) {
1002        //                                      throw new Error("Unexected - turtle lexer found no token");
1003        //                              } else if (lexer.peek().equals("[")) {
1004        //                                      sct.triple(subject, predicate, importComplex(lexer));
1005        //                              } else
1006        //                                      throw new Exception("Not done yet");
1007        //                              String n = lexer.next();
1008        //                              if (Utilities.noString(n))
1009        //                                      break;
1010        //                              if (n.equals(".")) {
1011        //                                      subject = null;
1012        //                                      predicate = null;
1013        //                              } else if (n.equals(";")) {
1014        //                                      predicate = null;
1015        //                              } else if (!n.equals(","))
1016        //                                      throw new Exception("Unexpected token "+n);          
1017        //                      }
1018        //              }
1019        //      }
1020
1021  public void parse(String source) throws FHIRFormatError {
1022                prefixes.clear();
1023                prefixes.put("_", "urn:uuid:4425b440-2c33-4488-b9fc-cf9456139995#");
1024                parse(new Lexer(source));
1025        }
1026
1027  private void parse(Lexer lexer) throws FHIRFormatError {
1028                boolean doPrefixes = true;
1029                while (!lexer.done()) {
1030      if (doPrefixes && (lexer.peek(LexerTokenType.TOKEN, "@") || lexer.peek(LexerTokenType.WORD, "PREFIX") || lexer.peek(LexerTokenType.WORD, "BASE"))) {
1031                                boolean sparqlStyle = false;
1032        boolean base = false;
1033                                if (lexer.peek(LexerTokenType.TOKEN, "@")) {
1034                                        lexer.token("@");
1035          String p = lexer.word();
1036          if (p.equals("base"))
1037            base = true;
1038          else if (!p.equals("prefix"))
1039            throw new FHIRFormatError("Unexpected token "+p);  
1040                                } else {
1041                                        sparqlStyle = true;
1042          String p = lexer.word();
1043          if (p.equals("BASE"))
1044            base = true;
1045          else if (!p.equals("PREFIX"))
1046            throw new FHIRFormatError("Unexpected token "+p);  
1047        }
1048        String prefix = null; 
1049        if (!base) {
1050          prefix = lexer.peekType() == LexerTokenType.WORD ? lexer.next(LexerTokenType.WORD, false) : null;
1051                                lexer.token(":");
1052        }
1053        String url = lexer.next(LexerTokenType.URI, false);
1054                                if (!sparqlStyle)
1055                                        lexer.token(".");
1056        if (!base)
1057                                prefix(prefix, url);
1058        else if (this.base == null)
1059          this.base = url;
1060        else
1061          throw new FHIRFormatError("Duplicate @base");  
1062                        } else if (lexer.peekType() == LexerTokenType.URI) {
1063                                doPrefixes = false;
1064                                TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1065                                uri.setUri(lexer.uri());
1066                                TTLComplex complex = parseComplex(lexer);
1067                                objects.put(uri, complex);
1068                                lexer.token(".");
1069                        } else if (lexer.peekType() == LexerTokenType.WORD) {
1070                                doPrefixes = false;
1071                                TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1072                                String pfx = lexer.word();
1073                                if (!prefixes.containsKey(pfx))
1074          throw new FHIRFormatError("Unknown prefix "+pfx);
1075                                lexer.token(":");
1076                                uri.setUri(prefixes.get(pfx)+lexer.word());
1077                                TTLComplex complex = parseComplex(lexer);
1078                                objects.put(uri, complex);
1079                                lexer.token(".");
1080                        } else if (lexer.peek(LexerTokenType.TOKEN, ":")) {
1081                                doPrefixes = false;
1082                                TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol);
1083                                lexer.token(":");
1084                                if (!prefixes.containsKey(null))
1085          throw new FHIRFormatError("Unknown prefix ''");
1086                                uri.setUri(prefixes.get(null)+lexer.word());
1087                                TTLComplex complex = parseComplex(lexer);
1088                                objects.put(uri, complex);
1089                                lexer.token(".");
1090                        } else if (lexer.peek(LexerTokenType.TOKEN, "[")) {
1091                                doPrefixes = false;
1092                                lexer.token("[");
1093                                TTLComplex bnode = parseComplex(lexer);
1094                                lexer.token("]");
1095                                TTLComplex complex = null;
1096        if (!lexer.peek(LexerTokenType.TOKEN, ".")) {
1097                                        complex = parseComplex(lexer);
1098          // at this point, we collapse bnode and complex, and give bnode a fictional identity
1099          bnode.addPredicates(complex.predicates);
1100        }
1101        
1102        objects.put(anonymousId(), bnode);
1103                                lexer.token(".");
1104                        } else 
1105                                throw lexer.error("Unknown token "+lexer.token);
1106                }
1107        }
1108
1109  private TTLURL anonymousId() throws FHIRFormatError {
1110    TTLURL url = new TTLURL(-1, -1);
1111    url.setUri("urn:uuid:"+UUID.randomUUID().toString().toLowerCase());
1112    return url;
1113  }
1114
1115  private TTLComplex parseComplex(Lexer lexer) throws FHIRFormatError {
1116                TTLComplex result = new TTLComplex(lexer.startLine, lexer.startCol);
1117
1118                boolean done = lexer.peek(LexerTokenType.TOKEN, "]");
1119                while (!done) {
1120                        String uri = null;
1121                        if (lexer.peekType() == LexerTokenType.URI)
1122                                uri = lexer.uri();
1123                        else {
1124                                String t = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null;
1125                                if (lexer.type == LexerTokenType.TOKEN && lexer.token.equals(":")) {
1126                                        lexer.token(":");
1127                                        if (!prefixes.containsKey(t))
1128            throw new FHIRFormatError("unknown prefix "+t);
1129                                        uri = prefixes.get(t)+lexer.word();
1130                                } else if (t.equals("a"))
1131                                        uri = prefixes.get("rdfs")+"type";
1132                                else
1133                                        throw lexer.error("unexpected token");
1134                        }
1135
1136                        boolean inlist = false;
1137                        if (lexer.peek(LexerTokenType.TOKEN, "(")) {
1138                                inlist = true;
1139                                lexer.token("(");
1140                        }
1141
1142                        boolean rpt = false;
1143                        do {
1144                                if (lexer.peek(LexerTokenType.TOKEN, "[")) {
1145                                        lexer.token("[");
1146          result.addPredicate(uri, parseComplex(lexer));
1147                                        lexer.token("]");
1148                                } else if (lexer.peekType() == LexerTokenType.URI) {
1149                                        TTLURL u = new TTLURL(lexer.startLine, lexer.startCol);
1150                                        u.setUri(lexer.uri());
1151          result.addPredicate(uri, u);
1152                                } else if (lexer.peekType() == LexerTokenType.LITERAL) {
1153                                        TTLLiteral u = new TTLLiteral(lexer.startLine, lexer.startCol);
1154                                        u.value = lexer.literal();
1155                                        if (lexer.peek(LexerTokenType.TOKEN, "^")) {
1156                                                lexer.token("^");
1157                                                lexer.token("^");
1158                                                if (lexer.peekType() == LexerTokenType.URI) {
1159                                                        u.type = lexer.uri();
1160                                                } else {
1161                                                        String l = lexer.word();
1162                                                        lexer.token(":");
1163                                                        u.type = prefixes.get(l)+ lexer.word();
1164                                                }
1165                                        }
1166                                        if (lexer.peek(LexerTokenType.TOKEN, "@")) {
1167                                                //lang tag - skip it 
1168                                                lexer.token("@");
1169            String lang = lexer.word();
1170            if (!lang.matches(LANG_REGEX)) {
1171              throw new FHIRFormatError("Invalid Language tag "+lang);
1172            }
1173                                        }
1174          result.addPredicate(uri, u);
1175                                } else if (lexer.peekType() == LexerTokenType.WORD || lexer.peek(LexerTokenType.TOKEN, ":")) {
1176                                        int sl = lexer.startLine;
1177                                        int sc = lexer.startCol;
1178                                        String pfx = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null;
1179                                        if (Utilities.isDecimal(pfx, true) && !lexer.peek(LexerTokenType.TOKEN, ":")) {
1180                                                TTLLiteral u = new TTLLiteral(sl, sc);
1181                                                u.value = pfx;
1182            result.addPredicate(uri, u);                                        
1183                                        } else if (("false".equals(pfx) || "true".equals(pfx)) && !lexer.peek(LexerTokenType.TOKEN, ":")) {
1184                                                TTLLiteral u = new TTLLiteral(sl, sc);
1185                                                u.value = pfx;
1186            result.addPredicate(uri, u);                                        
1187                                        } else {
1188                                                if (!prefixes.containsKey(pfx))
1189              throw new FHIRFormatError("Unknown prefix "+(pfx == null ? "''" : pfx));                                          
1190                                                TTLURL u = new TTLURL(sl, sc);
1191                                                lexer.token(":");
1192                                                u.setUri(prefixes.get(pfx)+lexer.word());
1193            result.addPredicate(uri, u);
1194                                        } 
1195                                } else if (!lexer.peek(LexerTokenType.TOKEN, ";") && (!inlist || !lexer.peek(LexerTokenType.TOKEN, ")"))) {
1196          throw new FHIRFormatError("unexpected token "+lexer.token);
1197                                }
1198
1199                                if (inlist)
1200                                        rpt = !lexer.peek(LexerTokenType.TOKEN, ")");
1201                                else {
1202                                        rpt = lexer.peek(LexerTokenType.TOKEN, ",");
1203                                        if (rpt)
1204            lexer.readNext(false);
1205                                }
1206                        } while (rpt);
1207                        if (inlist)
1208                                lexer.token(")");
1209
1210                        if (lexer.peek(LexerTokenType.TOKEN, ";")) {
1211        while ((lexer.peek(LexerTokenType.TOKEN, ";")))
1212                                lexer.token(";");
1213        done = lexer.peek(LexerTokenType.TOKEN, ".") || lexer.peek(LexerTokenType.TOKEN, "]");
1214                        } else {
1215                                done = true;
1216                        }
1217                }
1218                return result;
1219        }
1220
1221  public Map<TTLURL, TTLComplex> getObjects() {
1222    return objects;
1223  }
1224
1225  public TTLComplex getObject(String url) {
1226    for (TTLURL t : objects.keySet()) {
1227      if (t.getUri().equals(url))
1228        return objects.get(t);
1229    }
1230    return null;
1231  }
1232
1233        //      public void parseFragment(Lexer lexer) throws Exception {
1234        //              lexer.next(); // read [
1235        //              Complex obj = new Complex();
1236        //              while (!lexer.peek().equals("]")) {
1237        //                      String predicate = lexer.next();
1238        //                      if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) {
1239        //                              obj.predicate(predicate, lexer.next());
1240        //                      } else if (lexer.peek().equals("[")) {
1241        //                              obj.predicate(predicate, importComplex(lexer));
1242        //                      } else
1243        //                              throw new Exception("Not done yet");
1244        //                      if (lexer.peek().equals(";")) 
1245        //                              lexer.next();
1246        //              }
1247        //              lexer.next(); // read ]
1248        //              //return obj;
1249        //      }
1250        //
1251        //      public void importTtl(Section sct, String ttl) throws Exception {
1252        //              if (!Utilities.noString(ttl)) {
1253        //                      //        System.out.println("import ttl: "+ttl);
1254        //                      Lexer lexer = new Lexer(ttl);
1255        //                      String subject = null;
1256        //                      String predicate = null;
1257        //                      while (!lexer.done()) {
1258        //                              if (subject == null)
1259        //                                      subject = lexer.next();
1260        //                              if (predicate == null)
1261        //                                      predicate = lexer.next();
1262        //                              if (lexer.peekType() == null) {
1263        //                                      throw new Error("Unexpected end of input parsing turtle");
1264        //                              } if (lexer.peekType() == LexerTokenType.TOKEN) {
1265        //                                      sct.triple(subject, predicate, lexer.next());
1266        //                              } else if (lexer.peek() == null) {
1267        //                                      throw new Error("Unexected - turtle lexer found no token");
1268        //                              } else if (lexer.peek().equals("[")) {
1269        //                                      sct.triple(subject, predicate, importComplex(lexer));
1270        //                              } else
1271        //                                      throw new Exception("Not done yet");
1272        //                              String n = lexer.next();
1273        //                              if (Utilities.noString(n))
1274        //                                      break;
1275        //                              if (n.equals(".")) {
1276        //                                      subject = null;
1277        //                                      predicate = null;
1278        //                              } else if (n.equals(";")) {
1279        //                                      predicate = null;
1280        //                              } else if (!n.equals(","))
1281        //                                      throw new Exception("Unexpected token "+n);          
1282        //                      }
1283        //              }
1284        //}
1285
1286        //      private Complex importComplex(Lexer lexer) throws Exception {
1287        //              lexer.next(); // read [
1288        //              Complex obj = new Complex();
1289        //              while (!lexer.peek().equals("]")) {
1290        //                      String predicate = lexer.next();
1291        //                      if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) {
1292        //                              obj.predicate(predicate, lexer.next());
1293        //                      } else if (lexer.peek().equals("[")) {
1294        //                              obj.predicate(predicate, importComplex(lexer));
1295        //                      } else
1296        //                              throw new Exception("Not done yet");
1297        //                      if (lexer.peek().equals(";")) 
1298        //                              lexer.next();
1299        //              }
1300        //              lexer.next(); // read ]
1301        //              return obj;
1302        //      }
1303
1304}