001package org.hl7.fhir.r4.utils;
002
003import org.hl7.fhir.exceptions.FHIRException;
004
005/*-
006 * #%L
007 * org.hl7.fhir.r4
008 * %%
009 * Copyright (C) 2014 - 2019 Health Level 7
010 * %%
011 * Licensed under the Apache License, Version 2.0 (the "License");
012 * you may not use this file except in compliance with the License.
013 * You may obtain a copy of the License at
014 * 
015 *      http://www.apache.org/licenses/LICENSE-2.0
016 * 
017 * Unless required by applicable law or agreed to in writing, software
018 * distributed under the License is distributed on an "AS IS" BASIS,
019 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
020 * See the License for the specific language governing permissions and
021 * limitations under the License.
022 * #L%
023 */
024
025
026import org.hl7.fhir.r4.model.ExpressionNode;
027import org.hl7.fhir.r4.model.ExpressionNode.SourceLocation;
028import org.hl7.fhir.utilities.Utilities;
029
030// shared lexer for concrete syntaxes 
031// - FluentPath
032// - Mapping language
033
034public class FHIRLexer {
035  public class FHIRLexerException extends FHIRException {
036
037    public FHIRLexerException() {
038      super();
039    }
040
041    public FHIRLexerException(String message, Throwable cause) {
042      super(message, cause);
043    }
044
045    public FHIRLexerException(String message) {
046      super(message);
047    }
048
049    public FHIRLexerException(Throwable cause) {
050      super(cause);
051    }
052
053  }
054  private String source;
055  private int cursor;
056  private int currentStart;
057  private String current;
058  private SourceLocation currentLocation;
059  private SourceLocation currentStartLocation;
060  private int id;
061  private String name;
062
063  public FHIRLexer(String source, String name) throws FHIRLexerException {
064    this.source = source;
065    this.name = name == null ? "??" : name;
066    currentLocation = new SourceLocation(1, 1);
067    next();
068  }
069  public FHIRLexer(String source, int i) throws FHIRLexerException {
070    this.source = source;
071    this.cursor = i;
072    currentLocation = new SourceLocation(1, 1);
073    next();
074  }
075  public String getCurrent() {
076    return current;
077  }
078  public SourceLocation getCurrentLocation() {
079    return currentLocation;
080  }
081
082  public boolean isConstant() {
083    return current != null && (current.charAt(0) == '\'' || current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 
084        current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 
085        current.equals("true") || current.equals("false") || current.equals("{}");
086  }
087
088  public boolean isFixedName() {
089    return current != null && (current.charAt(0) == '`');
090  }
091
092  public boolean isStringConstant() {
093    return current.charAt(0) == '\'' || current.charAt(0) == '"' || current.charAt(0) == '`';
094  }
095
096  public String take() throws FHIRLexerException {
097    String s = current;
098    next();
099    return s;
100  }
101
102  public int takeInt() throws FHIRLexerException {
103    String s = current;
104    if (!Utilities.isInteger(s))
105      throw error("Found "+current+" expecting an integer");
106    next();
107    return Integer.parseInt(s);
108  }
109
110  public boolean isToken() {
111    if (Utilities.noString(current))
112      return false;
113
114    if (current.startsWith("$"))
115      return true;
116
117    if (current.equals("*") || current.equals("**"))
118      return true;
119
120    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
121      for (int i = 1; i < current.length(); i++) 
122        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
123            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
124          return false;
125      return true;
126    }
127    return false;
128  }
129
130  public FHIRLexerException error(String msg) {
131    return error(msg, currentLocation.toString());
132  }
133
134  public FHIRLexerException error(String msg, String location) {
135    return new FHIRLexerException("Error in "+name+" at "+location+": "+msg);
136  }
137
138  public void next() throws FHIRLexerException {
139    current = null;
140    boolean last13 = false;
141    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) {
142      if (source.charAt(cursor) == '\r') {
143        currentLocation.setLine(currentLocation.getLine() + 1);
144        currentLocation.setColumn(1);
145        last13 = true;
146      } else if (!last13 && (source.charAt(cursor) == '\n')) {
147        currentLocation.setLine(currentLocation.getLine() + 1);
148        currentLocation.setColumn(1);
149        last13 = false;
150      } else {
151        last13 = false;
152        currentLocation.setColumn(currentLocation.getColumn() + 1);
153      }
154      cursor++;
155    }
156    currentStart = cursor;
157    currentStartLocation = currentLocation;
158    if (cursor < source.length()) {
159      char ch = source.charAt(cursor);
160      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
161        cursor++;
162        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 
163          cursor++;
164        current = source.substring(currentStart, cursor);
165      } else if (ch == '.' ) {
166        cursor++;
167        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
168          cursor++;
169        current = source.substring(currentStart, cursor);
170      } else if (ch >= '0' && ch <= '9') {
171          cursor++;
172        boolean dotted = false;
173        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
174          if (source.charAt(cursor) == '.')
175            dotted = true;
176          cursor++;
177        }
178        if (source.charAt(cursor-1) == '.')
179          cursor--;
180        current = source.substring(currentStart, cursor);
181      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
182        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
183            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
184          cursor++;
185        current = source.substring(currentStart, cursor);
186      } else if (ch == '%') {
187        cursor++;
188        if (cursor < source.length() && (source.charAt(cursor) == '`')) {
189          cursor++;
190          while (cursor < source.length() && (source.charAt(cursor) != '`'))
191            cursor++;
192          cursor++;
193        } else
194        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
195            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-'))
196          cursor++;
197        current = source.substring(currentStart, cursor);
198      } else if (ch == '/') {
199        cursor++;
200        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
201          cursor++;
202          while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 
203            cursor++;
204        }
205        current = source.substring(currentStart, cursor);
206      } else if (ch == '$') {
207        cursor++;
208        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
209          cursor++;
210        current = source.substring(currentStart, cursor);
211      } else if (ch == '{') {
212        cursor++;
213        ch = source.charAt(cursor);
214        if (ch == '}')
215          cursor++;
216        current = source.substring(currentStart, cursor);
217      } else if (ch == '"') {
218        cursor++;
219        boolean escape = false;
220        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
221          if (escape)
222            escape = false;
223          else 
224            escape = (source.charAt(cursor) == '\\');
225          cursor++;
226        }
227        if (cursor == source.length())
228          throw error("Unterminated string");
229        cursor++;
230        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
231      } else if (ch == '`') {
232        cursor++;
233        boolean escape = false;
234        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
235          if (escape)
236            escape = false;
237          else 
238            escape = (source.charAt(cursor) == '\\');
239          cursor++;
240        }
241        if (cursor == source.length())
242          throw error("Unterminated string");
243        cursor++;
244        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
245      } else if (ch == '\''){
246        cursor++;
247        char ech = ch;
248        boolean escape = false;
249        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
250          if (escape)
251            escape = false;
252          else 
253            escape = (source.charAt(cursor) == '\\');
254          cursor++;
255        }
256        if (cursor == source.length())
257          throw error("Unterminated string");
258        cursor++;
259        current = source.substring(currentStart, cursor);
260        if (ech == '\'')
261          current = "\'"+current.substring(1, current.length() - 1)+"\'";
262      } else if (ch == '`') {
263        cursor++;
264        boolean escape = false;
265        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
266          if (escape)
267            escape = false;
268          else 
269            escape = (source.charAt(cursor) == '\\');
270          cursor++;
271        }
272        if (cursor == source.length())
273          throw error("Unterminated string");
274        cursor++;
275        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
276      } else if (ch == '@'){
277        int start = cursor;
278        cursor++;
279        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
280          cursor++;          
281        current = source.substring(currentStart, cursor);
282      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
283        cursor++;
284        current = source.substring(currentStart, cursor);
285      }
286    }
287  }
288
289
290  private boolean isDateChar(char ch,int start) {
291    int eot = source.charAt(start+1) == 'T' ? 10 : 20;
292    
293    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1)));
294  }
295  public boolean isOp() {
296    return ExpressionNode.Operation.fromCode(current) != null;
297  }
298  public boolean done() {
299    return currentStart >= source.length();
300  }
301  public int nextId() {
302    id++;
303    return id;
304  }
305  public SourceLocation getCurrentStartLocation() {
306    return currentStartLocation;
307  }
308  
309  // special case use
310  public void setCurrent(String current) {
311    this.current = current;
312  }
313
314  public boolean hasComment() {
315    return !done() && current.startsWith("//");
316  }
317  public boolean hasToken(String kw) {
318    return !done() && kw.equals(current);
319  }
320  public boolean hasToken(String... names) {
321    if (done()) 
322      return false;
323    for (String s : names)
324      if (s.equals(current))
325        return true;
326    return false;
327  }
328  
329  public void token(String kw) throws FHIRLexerException {
330    if (!kw.equals(current)) 
331      throw error("Found \""+current+"\" expecting \""+kw+"\"");
332    next();
333  }
334  
335  public String readConstant(String desc) throws FHIRLexerException {
336    if (!isStringConstant())
337      throw error("Found "+current+" expecting \"["+desc+"]\"");
338
339    return processConstant(take());
340  }
341
342  public String readFixedName(String desc) throws FHIRLexerException {
343    if (!isFixedName())
344      throw error("Found "+current+" expecting \"["+desc+"]\"");
345
346    return processFixedName(take());
347  }
348
349  public String processConstant(String s) throws FHIRLexerException {
350    StringBuilder b = new StringBuilder();
351    int i = 1;
352    while (i < s.length()-1) {
353      char ch = s.charAt(i);
354      if (ch == '\\') {
355        i++;
356        switch (s.charAt(i)) {
357        case 't': 
358          b.append('\t');
359          break;
360        case 'r':
361          b.append('\r');
362          break;
363        case 'n': 
364          b.append('\n');
365          break;
366        case 'f': 
367          b.append('\f');
368          break;
369        case '\'':
370          b.append('\'');
371          break;
372        case '"':
373          b.append('"');
374          break;
375        case '`':
376          b.append('`');
377          break;
378        case '\\': 
379          b.append('\\');
380          break;
381        case '/': 
382          b.append('/');
383          break;
384        case 'u':
385          i++;
386          int uc = Integer.parseInt(s.substring(i, i+4), 16);
387          b.append((char) uc);
388          i = i + 4;
389          break;
390        default:
391          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
392        }
393      } else {
394        b.append(ch);
395        i++;
396      }
397    }
398    return b.toString();
399  }
400  
401  public String processFixedName(String s) throws FHIRLexerException {
402    StringBuilder b = new StringBuilder();
403    int i = 1;
404    while (i < s.length()-1) {
405      char ch = s.charAt(i);
406      if (ch == '\\') {
407        i++;
408        switch (s.charAt(i)) {
409        case 't': 
410          b.append('\t');
411          break;
412        case 'r':
413          b.append('\r');
414          break;
415        case 'n': 
416          b.append('\n');
417          break;
418        case 'f': 
419          b.append('\f');
420          break;
421        case '\'':
422          b.append('\'');
423          break;
424        case '"':
425          b.append('"');
426          break;
427        case '\\': 
428          b.append('\\');
429          break;
430        case '/': 
431          b.append('/');
432          break;
433        case 'u':
434          i++;
435          int uc = Integer.parseInt(s.substring(i, i+4), 16);
436          b.append((char) uc);
437          i = i + 4;
438          break;
439        default:
440          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
441        }
442      } else {
443        b.append(ch);
444        i++;
445      }
446    }
447    return b.toString();
448  }
449
450  public void skipToken(String token) throws FHIRLexerException {
451    if (getCurrent().equals(token))
452      next();
453    
454  }
455  public String takeDottedToken() throws FHIRLexerException {
456    StringBuilder b = new StringBuilder();
457    b.append(take());
458    while (!done() && getCurrent().equals(".")) {
459      b.append(take());
460      b.append(take());
461    }
462    return b.toString();
463  }
464  
465  void skipComments() throws FHIRLexerException {
466    while (!done() && hasComment())
467      next();
468  }
469  public int getCurrentStart() {
470    return currentStart;
471  }
472
473}