001package org.hl7.fhir.r4.formats; 002 003/*- 004 * #%L 005 * org.hl7.fhir.r4 006 * %% 007 * Copyright (C) 2014 - 2019 Health Level 7 008 * %% 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 * #L% 021 */ 022 023 024public class TurtleLexer { 025 026 public enum TurtleTokenType { 027 NULL, 028 TOKEN, SPECIAL, LITERAL 029 } 030 031 private String source; 032 private int cursor; 033 private String token; 034 private TurtleTokenType type; 035 036 public TurtleLexer(String source) throws Exception { 037 this.source = source; 038 cursor = 0; 039 readNext(); 040 } 041 042 private void readNext() throws Exception { 043 if (cursor >= source.length()) { 044 token = null; 045 type = TurtleTokenType.NULL; 046 } else if (source.charAt(cursor) == '"') 047 readLiteral(); 048 else if (source.charAt(cursor) == '[' || source.charAt(cursor) == ']') 049 readDelimiter(); 050 else if (source.charAt(cursor) == '(') 051 throw new Exception("not supported yet"); 052 else if (source.charAt(cursor) == ';' || source.charAt(cursor) == '.' || source.charAt(cursor) == ',') 053 readDelimiter(); 054 else if (Character.isLetter(source.charAt(cursor))) 055 readToken(); 056 057 } 058 059 private void readLiteral() { 060 StringBuilder b = new StringBuilder(); 061 cursor++; // skip " 062 while (cursor < source.length() && source.charAt(cursor) != '"') { 063 if (source.charAt(cursor) == '\\') { 064 b.append(source.charAt(cursor)); 065 cursor++; 066 } 067 b.append(source.charAt(cursor)); 068 cursor++; 069 } 070 token = "\""+b.toString()+"\""; 071 type = TurtleTokenType.LITERAL; 072 cursor++; // skip " 073 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 074 cursor++; 075 } 076 077 private void readDelimiter() { 078 StringBuilder b = new StringBuilder(); 079 b.append(source.charAt(cursor)); 080 cursor++; 081 token = b.toString(); 082 type = TurtleTokenType.SPECIAL; 083 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 084 cursor++; 085 } 086 087 private void readToken() { 088 StringBuilder b = new StringBuilder(); 089 while (cursor < source.length() && isValidTokenChar(source.charAt(cursor))) { 090 if (source.charAt(cursor) == '\\') { 091 b.append(source.charAt(cursor)); 092 cursor++; 093 } 094 b.append(source.charAt(cursor)); 095 cursor++; 096 } 097 token = b.toString(); 098 type = TurtleTokenType.TOKEN; 099 if (token.endsWith(".")) { 100 cursor--; 101 token = token.substring(0, token.length()-1); 102 } 103 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 104 cursor++; 105 } 106 107 private boolean isValidTokenChar(char c) { 108 return Character.isLetter(c) || Character.isDigit(c) || c == ':' || c == '\\' || c == '.'; 109 } 110 111 public boolean done() { 112 return type == TurtleTokenType.NULL; 113 } 114 115 public String next() throws Exception { 116 String res = token; 117 readNext(); 118 return res; 119 } 120 121 public String peek() throws Exception { 122 return token; 123 } 124 125 public TurtleTokenType peekType() { 126 return type; 127 } 128 129 130}