001package org.hl7.fhir.r4.formats;
002
003/*-
004 * #%L
005 * org.hl7.fhir.r4
006 * %%
007 * Copyright (C) 2014 - 2019 Health Level 7
008 * %%
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 * 
013 *      http://www.apache.org/licenses/LICENSE-2.0
014 * 
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 * #L%
021 */
022
023
024public class TurtleLexer {
025
026  public enum TurtleTokenType {
027    NULL, 
028    TOKEN, SPECIAL, LITERAL
029  }
030
031  private String source;
032  private int cursor; 
033  private String token;
034  private TurtleTokenType type;
035  
036  public TurtleLexer(String source) throws Exception {
037    this.source = source;
038    cursor = 0;
039    readNext();
040  }
041
042  private void readNext() throws Exception {    
043    if (cursor >= source.length()) {
044      token = null;
045      type = TurtleTokenType.NULL;
046    } else if (source.charAt(cursor) == '"')
047      readLiteral();
048    else if (source.charAt(cursor) == '[' || source.charAt(cursor) == ']')
049      readDelimiter();
050    else if (source.charAt(cursor) == '(')
051      throw new Exception("not supported yet");
052    else if (source.charAt(cursor) == ';' || source.charAt(cursor) == '.' || source.charAt(cursor) == ',')
053      readDelimiter();
054    else if (Character.isLetter(source.charAt(cursor)))
055      readToken();
056    
057  }
058
059  private void readLiteral() {
060    StringBuilder b = new StringBuilder();
061    cursor++; // skip "        
062    while (cursor < source.length() && source.charAt(cursor) != '"') {
063      if (source.charAt(cursor) == '\\') {
064        b.append(source.charAt(cursor));
065        cursor++;        
066      } 
067      b.append(source.charAt(cursor));
068      cursor++;
069    }
070    token = "\""+b.toString()+"\"";
071    type = TurtleTokenType.LITERAL;
072    cursor++; // skip "
073    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 
074      cursor++;    
075  }
076
077  private void readDelimiter() {
078    StringBuilder b = new StringBuilder();
079    b.append(source.charAt(cursor));
080    cursor++;
081    token = b.toString();
082    type = TurtleTokenType.SPECIAL;
083    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 
084      cursor++;
085  }
086
087  private void readToken() {
088    StringBuilder b = new StringBuilder();
089    while (cursor < source.length() && isValidTokenChar(source.charAt(cursor))) {
090      if (source.charAt(cursor) == '\\') {
091        b.append(source.charAt(cursor));
092        cursor++;        
093      } 
094      b.append(source.charAt(cursor));
095      cursor++;
096    }
097    token = b.toString();
098    type = TurtleTokenType.TOKEN;
099    if (token.endsWith(".")) {
100      cursor--;
101      token = token.substring(0, token.length()-1);
102    }
103    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 
104      cursor++;
105  }
106
107  private boolean isValidTokenChar(char c) {
108    return Character.isLetter(c) || Character.isDigit(c) || c == ':' || c == '\\' || c == '.';
109  }
110
111  public boolean done() {
112    return type == TurtleTokenType.NULL;
113  }
114
115  public String next() throws Exception {
116    String res = token;
117    readNext();
118    return res;
119  }
120
121  public String peek() throws Exception {
122    return token;
123  }
124
125  public TurtleTokenType peekType() {
126    return type;
127  }
128  
129  
130}