001package org.hl7.fhir.r4.utils; 002 003import org.hl7.fhir.exceptions.FHIRException; 004 005/*- 006 * #%L 007 * org.hl7.fhir.r4 008 * %% 009 * Copyright (C) 2014 - 2019 Health Level 7 010 * %% 011 * Licensed under the Apache License, Version 2.0 (the "License"); 012 * you may not use this file except in compliance with the License. 013 * You may obtain a copy of the License at 014 * 015 * http://www.apache.org/licenses/LICENSE-2.0 016 * 017 * Unless required by applicable law or agreed to in writing, software 018 * distributed under the License is distributed on an "AS IS" BASIS, 019 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 020 * See the License for the specific language governing permissions and 021 * limitations under the License. 022 * #L% 023 */ 024 025 026import org.hl7.fhir.r4.model.ExpressionNode; 027import org.hl7.fhir.r4.model.ExpressionNode.SourceLocation; 028import org.hl7.fhir.utilities.Utilities; 029 030// shared lexer for concrete syntaxes 031// - FluentPath 032// - Mapping language 033 034public class FHIRLexer { 035 public class FHIRLexerException extends FHIRException { 036 037 public FHIRLexerException() { 038 super(); 039 } 040 041 public FHIRLexerException(String message, Throwable cause) { 042 super(message, cause); 043 } 044 045 public FHIRLexerException(String message) { 046 super(message); 047 } 048 049 public FHIRLexerException(Throwable cause) { 050 super(cause); 051 } 052 053 } 054 private String source; 055 private int cursor; 056 private int currentStart; 057 private String current; 058 private SourceLocation currentLocation; 059 private SourceLocation currentStartLocation; 060 private int id; 061 private String name; 062 063 public FHIRLexer(String source, String name) throws FHIRLexerException { 064 this.source = source; 065 this.name = name == null ? "??" : name; 066 currentLocation = new SourceLocation(1, 1); 067 next(); 068 } 069 public FHIRLexer(String source, int i) throws FHIRLexerException { 070 this.source = source; 071 this.cursor = i; 072 currentLocation = new SourceLocation(1, 1); 073 next(); 074 } 075 public String getCurrent() { 076 return current; 077 } 078 public SourceLocation getCurrentLocation() { 079 return currentLocation; 080 } 081 082 public boolean isConstant() { 083 return current != null && (current.charAt(0) == '\'' || current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 084 current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 085 current.equals("true") || current.equals("false") || current.equals("{}"); 086 } 087 088 public boolean isFixedName() { 089 return current != null && (current.charAt(0) == '`'); 090 } 091 092 public boolean isStringConstant() { 093 return current.charAt(0) == '\'' || current.charAt(0) == '"' || current.charAt(0) == '`'; 094 } 095 096 public String take() throws FHIRLexerException { 097 String s = current; 098 next(); 099 return s; 100 } 101 102 public int takeInt() throws FHIRLexerException { 103 String s = current; 104 if (!Utilities.isInteger(s)) 105 throw error("Found "+current+" expecting an integer"); 106 next(); 107 return Integer.parseInt(s); 108 } 109 110 public boolean isToken() { 111 if (Utilities.noString(current)) 112 return false; 113 114 if (current.startsWith("$")) 115 return true; 116 117 if (current.equals("*") || current.equals("**")) 118 return true; 119 120 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 121 for (int i = 1; i < current.length(); i++) 122 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 123 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 124 return false; 125 return true; 126 } 127 return false; 128 } 129 130 public FHIRLexerException error(String msg) { 131 return error(msg, currentLocation.toString()); 132 } 133 134 public FHIRLexerException error(String msg, String location) { 135 return new FHIRLexerException("Error in "+name+" at "+location+": "+msg); 136 } 137 138 public void next() throws FHIRLexerException { 139 current = null; 140 boolean last13 = false; 141 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) { 142 if (source.charAt(cursor) == '\r') { 143 currentLocation.setLine(currentLocation.getLine() + 1); 144 currentLocation.setColumn(1); 145 last13 = true; 146 } else if (!last13 && (source.charAt(cursor) == '\n')) { 147 currentLocation.setLine(currentLocation.getLine() + 1); 148 currentLocation.setColumn(1); 149 last13 = false; 150 } else { 151 last13 = false; 152 currentLocation.setColumn(currentLocation.getColumn() + 1); 153 } 154 cursor++; 155 } 156 currentStart = cursor; 157 currentStartLocation = currentLocation; 158 if (cursor < source.length()) { 159 char ch = source.charAt(cursor); 160 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 161 cursor++; 162 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 163 cursor++; 164 current = source.substring(currentStart, cursor); 165 } else if (ch == '.' ) { 166 cursor++; 167 if (cursor < source.length() && (source.charAt(cursor) == '.')) 168 cursor++; 169 current = source.substring(currentStart, cursor); 170 } else if (ch >= '0' && ch <= '9') { 171 cursor++; 172 boolean dotted = false; 173 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 174 if (source.charAt(cursor) == '.') 175 dotted = true; 176 cursor++; 177 } 178 if (source.charAt(cursor-1) == '.') 179 cursor--; 180 current = source.substring(currentStart, cursor); 181 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 182 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 183 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 184 cursor++; 185 current = source.substring(currentStart, cursor); 186 } else if (ch == '%') { 187 cursor++; 188 if (cursor < source.length() && (source.charAt(cursor) == '`')) { 189 cursor++; 190 while (cursor < source.length() && (source.charAt(cursor) != '`')) 191 cursor++; 192 cursor++; 193 } else 194 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 195 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-')) 196 cursor++; 197 current = source.substring(currentStart, cursor); 198 } else if (ch == '/') { 199 cursor++; 200 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 201 cursor++; 202 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 203 cursor++; 204 } 205 current = source.substring(currentStart, cursor); 206 } else if (ch == '$') { 207 cursor++; 208 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 209 cursor++; 210 current = source.substring(currentStart, cursor); 211 } else if (ch == '{') { 212 cursor++; 213 ch = source.charAt(cursor); 214 if (ch == '}') 215 cursor++; 216 current = source.substring(currentStart, cursor); 217 } else if (ch == '"') { 218 cursor++; 219 boolean escape = false; 220 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 221 if (escape) 222 escape = false; 223 else 224 escape = (source.charAt(cursor) == '\\'); 225 cursor++; 226 } 227 if (cursor == source.length()) 228 throw error("Unterminated string"); 229 cursor++; 230 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 231 } else if (ch == '`') { 232 cursor++; 233 boolean escape = false; 234 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 235 if (escape) 236 escape = false; 237 else 238 escape = (source.charAt(cursor) == '\\'); 239 cursor++; 240 } 241 if (cursor == source.length()) 242 throw error("Unterminated string"); 243 cursor++; 244 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 245 } else if (ch == '\''){ 246 cursor++; 247 char ech = ch; 248 boolean escape = false; 249 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 250 if (escape) 251 escape = false; 252 else 253 escape = (source.charAt(cursor) == '\\'); 254 cursor++; 255 } 256 if (cursor == source.length()) 257 throw error("Unterminated string"); 258 cursor++; 259 current = source.substring(currentStart, cursor); 260 if (ech == '\'') 261 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 262 } else if (ch == '`') { 263 cursor++; 264 boolean escape = false; 265 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 266 if (escape) 267 escape = false; 268 else 269 escape = (source.charAt(cursor) == '\\'); 270 cursor++; 271 } 272 if (cursor == source.length()) 273 throw error("Unterminated string"); 274 cursor++; 275 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 276 } else if (ch == '@'){ 277 int start = cursor; 278 cursor++; 279 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 280 cursor++; 281 current = source.substring(currentStart, cursor); 282 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 283 cursor++; 284 current = source.substring(currentStart, cursor); 285 } 286 } 287 } 288 289 290 private boolean isDateChar(char ch,int start) { 291 int eot = source.charAt(start+1) == 'T' ? 10 : 20; 292 293 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1))); 294 } 295 public boolean isOp() { 296 return ExpressionNode.Operation.fromCode(current) != null; 297 } 298 public boolean done() { 299 return currentStart >= source.length(); 300 } 301 public int nextId() { 302 id++; 303 return id; 304 } 305 public SourceLocation getCurrentStartLocation() { 306 return currentStartLocation; 307 } 308 309 // special case use 310 public void setCurrent(String current) { 311 this.current = current; 312 } 313 314 public boolean hasComment() { 315 return !done() && current.startsWith("//"); 316 } 317 public boolean hasToken(String kw) { 318 return !done() && kw.equals(current); 319 } 320 public boolean hasToken(String... names) { 321 if (done()) 322 return false; 323 for (String s : names) 324 if (s.equals(current)) 325 return true; 326 return false; 327 } 328 329 public void token(String kw) throws FHIRLexerException { 330 if (!kw.equals(current)) 331 throw error("Found \""+current+"\" expecting \""+kw+"\""); 332 next(); 333 } 334 335 public String readConstant(String desc) throws FHIRLexerException { 336 if (!isStringConstant()) 337 throw error("Found "+current+" expecting \"["+desc+"]\""); 338 339 return processConstant(take()); 340 } 341 342 public String readFixedName(String desc) throws FHIRLexerException { 343 if (!isFixedName()) 344 throw error("Found "+current+" expecting \"["+desc+"]\""); 345 346 return processFixedName(take()); 347 } 348 349 public String processConstant(String s) throws FHIRLexerException { 350 StringBuilder b = new StringBuilder(); 351 int i = 1; 352 while (i < s.length()-1) { 353 char ch = s.charAt(i); 354 if (ch == '\\') { 355 i++; 356 switch (s.charAt(i)) { 357 case 't': 358 b.append('\t'); 359 break; 360 case 'r': 361 b.append('\r'); 362 break; 363 case 'n': 364 b.append('\n'); 365 break; 366 case 'f': 367 b.append('\f'); 368 break; 369 case '\'': 370 b.append('\''); 371 break; 372 case '"': 373 b.append('"'); 374 break; 375 case '`': 376 b.append('`'); 377 break; 378 case '\\': 379 b.append('\\'); 380 break; 381 case '/': 382 b.append('/'); 383 break; 384 case 'u': 385 i++; 386 int uc = Integer.parseInt(s.substring(i, i+4), 16); 387 b.append((char) uc); 388 i = i + 4; 389 break; 390 default: 391 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 392 } 393 } else { 394 b.append(ch); 395 i++; 396 } 397 } 398 return b.toString(); 399 } 400 401 public String processFixedName(String s) throws FHIRLexerException { 402 StringBuilder b = new StringBuilder(); 403 int i = 1; 404 while (i < s.length()-1) { 405 char ch = s.charAt(i); 406 if (ch == '\\') { 407 i++; 408 switch (s.charAt(i)) { 409 case 't': 410 b.append('\t'); 411 break; 412 case 'r': 413 b.append('\r'); 414 break; 415 case 'n': 416 b.append('\n'); 417 break; 418 case 'f': 419 b.append('\f'); 420 break; 421 case '\'': 422 b.append('\''); 423 break; 424 case '"': 425 b.append('"'); 426 break; 427 case '\\': 428 b.append('\\'); 429 break; 430 case '/': 431 b.append('/'); 432 break; 433 case 'u': 434 i++; 435 int uc = Integer.parseInt(s.substring(i, i+4), 16); 436 b.append((char) uc); 437 i = i + 4; 438 break; 439 default: 440 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 441 } 442 } else { 443 b.append(ch); 444 i++; 445 } 446 } 447 return b.toString(); 448 } 449 450 public void skipToken(String token) throws FHIRLexerException { 451 if (getCurrent().equals(token)) 452 next(); 453 454 } 455 public String takeDottedToken() throws FHIRLexerException { 456 StringBuilder b = new StringBuilder(); 457 b.append(take()); 458 while (!done() && getCurrent().equals(".")) { 459 b.append(take()); 460 b.append(take()); 461 } 462 return b.toString(); 463 } 464 465 void skipComments() throws FHIRLexerException { 466 while (!done() && hasComment()) 467 next(); 468 } 469 public int getCurrentStart() { 470 return currentStart; 471 } 472 473}