001/* 002 * #%L 003 * HAPI FHIR - Core Library 004 * %% 005 * Copyright (C) 2014 - 2023 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.util; 021 022import ca.uhn.fhir.context.FhirContext; 023import ca.uhn.fhir.context.RuntimeResourceDefinition; 024import ca.uhn.fhir.i18n.Msg; 025import ca.uhn.fhir.model.primitive.IdDt; 026import ca.uhn.fhir.parser.DataFormatException; 027import ca.uhn.fhir.rest.api.Constants; 028import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException; 029import com.google.common.escape.Escaper; 030import com.google.common.net.PercentEscaper; 031import org.apache.commons.lang3.StringUtils; 032import org.apache.http.NameValuePair; 033import org.apache.http.client.utils.URLEncodedUtils; 034import org.apache.http.message.BasicNameValuePair; 035import org.hl7.fhir.instance.model.api.IPrimitiveType; 036 037import javax.annotation.Nonnull; 038import javax.annotation.Nullable; 039import java.io.UnsupportedEncodingException; 040import java.net.MalformedURLException; 041import java.net.URI; 042import java.net.URISyntaxException; 043import java.net.URL; 044import java.net.URLDecoder; 045import java.util.ArrayList; 046import java.util.Collection; 047import java.util.HashMap; 048import java.util.List; 049import java.util.Map; 050import java.util.Map.Entry; 051import java.util.StringTokenizer; 052import java.util.stream.Collectors; 053 054import static org.apache.commons.lang3.StringUtils.defaultIfBlank; 055import static org.apache.commons.lang3.StringUtils.defaultString; 056import static org.apache.commons.lang3.StringUtils.endsWith; 057import static org.apache.commons.lang3.StringUtils.isBlank; 058import static org.apache.commons.lang3.StringUtils.isNotBlank; 059 060@SuppressWarnings("JavadocLinkAsPlainText") 061public class UrlUtil { 062 private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class); 063 064 private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*"; 065 private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false); 066 067 /** 068 * Non instantiable 069 */ 070 private UrlUtil() { 071 } 072 073 /** 074 * Cleans up a value that will be serialized as an HTTP header. This method: 075 * <p> 076 * - Strips any newline (\r or \n) characters 077 * 078 * @since 6.2.0 079 */ 080 public static String sanitizeHeaderValue(String theHeader) { 081 return theHeader.replace("\n", "").replace("\r", ""); 082 } 083 084 public static String sanitizeBaseUrl(String theBaseUrl) { 085 return theBaseUrl.replaceAll("[^a-zA-Z0-9:/._-]", ""); 086 } 087 088 /** 089 * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid. 090 */ 091 public static String constructAbsoluteUrl(String theBase, String theEndpoint) { 092 if (theEndpoint == null) { 093 return null; 094 } 095 if (isAbsolute(theEndpoint)) { 096 return theEndpoint; 097 } 098 if (theBase == null) { 099 return theEndpoint; 100 } 101 102 try { 103 return new URL(new URL(theBase), theEndpoint).toString(); 104 } catch (MalformedURLException e) { 105 ourLog.warn("Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e); 106 return theEndpoint; 107 } 108 } 109 110 public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) { 111 if (theParentExtensionUrl == null) { 112 return theExtensionUrl; 113 } 114 if (theExtensionUrl == null) { 115 return null; 116 } 117 118 int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/'); 119 int childLastSlashIdx = theExtensionUrl.lastIndexOf('/'); 120 121 if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) { 122 return theExtensionUrl; 123 } 124 125 if (parentLastSlashIdx != childLastSlashIdx) { 126 return theExtensionUrl; 127 } 128 129 if (!theParentExtensionUrl.substring(0, parentLastSlashIdx).equals(theExtensionUrl.substring(0, parentLastSlashIdx))) { 130 return theExtensionUrl; 131 } 132 133 if (theExtensionUrl.length() > parentLastSlashIdx) { 134 return theExtensionUrl.substring(parentLastSlashIdx + 1); 135 } 136 137 return theExtensionUrl; 138 } 139 140 /** 141 * Given a FHIR resource URL, extracts the associated resource type. Supported formats 142 * include the following inputs, all of which will return {@literal Patient}. If no 143 * resource type can be determined, {@literal null} will be returned. 144 * <ul> 145 * <li>Patient 146 * <li>Patient? 147 * <li>Patient?identifier=foo 148 * <li>/Patient 149 * <li>/Patient? 150 * <li>/Patient?identifier=foo 151 * <li>http://foo/base/Patient?identifier=foo 152 * <li>http://foo/base/Patient/1 153 * <li>http://foo/base/Patient/1/_history/2 154 * <li>Patient/1 155 * <li>Patient/1/_history/2 156 * <li>/Patient/1 157 * <li>/Patient/1/_history/2 158 * </ul> 159 */ 160 @Nullable 161 public static String determineResourceTypeInResourceUrl(FhirContext theFhirContext, String theUrl) { 162 if (theUrl == null) { 163 return null; 164 } 165 if (theUrl.startsWith("urn:")) { 166 return null; 167 } 168 169 String resourceType = null; 170 int qmIndex = theUrl.indexOf("?"); 171 if (qmIndex > 0) { 172 String urlResourceType = theUrl.substring(0, qmIndex); 173 int slashIdx = urlResourceType.lastIndexOf('/'); 174 if (slashIdx != -1) { 175 urlResourceType = urlResourceType.substring(slashIdx + 1); 176 } 177 if (isNotBlank(urlResourceType)) { 178 resourceType = urlResourceType; 179 } 180 } else { 181 resourceType = theUrl; 182 int slashIdx = resourceType.indexOf('/'); 183 if (slashIdx == 0) { 184 resourceType = resourceType.substring(1); 185 } 186 187 slashIdx = resourceType.indexOf('/'); 188 if (slashIdx != -1) { 189 resourceType = new IdDt(resourceType).getResourceType(); 190 } 191 192 } 193 194 try { 195 if (isNotBlank(resourceType)) { 196 theFhirContext.getResourceDefinition(resourceType); 197 } 198 } catch (DataFormatException e) { 199 return null; 200 } 201 202 return resourceType; 203 } 204 205 206 /** 207 * URL encode a value according to RFC 3986 208 * <p> 209 * This method is intended to be applied to an individual parameter 210 * name or value. For example, if you are creating the URL 211 * <code>http://example.com/fhir/Patient?key=føø</code> 212 * it would be appropriate to pass the string "føø" to this method, 213 * but not appropriate to pass the entire URL since characters 214 * such as "/" and "?" would also be escaped. 215 * </P> 216 */ 217 public static String escapeUrlParam(String theUnescaped) { 218 if (theUnescaped == null) { 219 return null; 220 } 221 return PARAMETER_ESCAPER.escape(theUnescaped); 222 } 223 224 /** 225 * Applies the same encodong as {@link #escapeUrlParam(String)} but against all 226 * values in a collection 227 */ 228 public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) { 229 return theUnescaped 230 .stream() 231 .map(t -> PARAMETER_ESCAPER.escape(t)) 232 .collect(Collectors.toList()); 233 } 234 235 public static boolean isAbsolute(String theValue) { 236 String value = theValue.toLowerCase(); 237 return value.startsWith("http://") || value.startsWith("https://"); 238 } 239 240 public static boolean isNeedsSanitization(CharSequence theString) { 241 if (theString != null) { 242 for (int i = 0; i < theString.length(); i++) { 243 char nextChar = theString.charAt(i); 244 switch (nextChar) { 245 case '\'': 246 case '"': 247 case '<': 248 case '>': 249 case '\n': 250 case '\r': 251 return true; 252 } 253 if (nextChar < ' ') { 254 return true; 255 } 256 } 257 } 258 return false; 259 } 260 261 public static boolean isValid(String theUrl) { 262 if (theUrl == null || theUrl.length() < 8) { 263 return false; 264 } 265 266 String url = theUrl.toLowerCase(); 267 if (url.charAt(0) != 'h') { 268 return false; 269 } 270 if (url.charAt(1) != 't') { 271 return false; 272 } 273 if (url.charAt(2) != 't') { 274 return false; 275 } 276 if (url.charAt(3) != 'p') { 277 return false; 278 } 279 int slashOffset; 280 if (url.charAt(4) == ':') { 281 slashOffset = 5; 282 } else if (url.charAt(4) == 's') { 283 if (url.charAt(5) != ':') { 284 return false; 285 } 286 slashOffset = 6; 287 } else { 288 return false; 289 } 290 291 if (url.charAt(slashOffset) != '/') { 292 return false; 293 } 294 if (url.charAt(slashOffset + 1) != '/') { 295 return false; 296 } 297 298 return true; 299 } 300 301 public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl) throws DataFormatException { 302 String url = theUrl; 303 int paramIndex = url.indexOf('?'); 304 305 // Change pattern of "Observation/?param=foo" into "Observation?param=foo" 306 if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') { 307 url = url.substring(0, paramIndex - 1) + url.substring(paramIndex); 308 paramIndex--; 309 } 310 311 String resourceName = url.substring(0, paramIndex); 312 if (resourceName.contains("/")) { 313 resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1); 314 } 315 return theCtx.getResourceDefinition(resourceName); 316 } 317 318 public static Map<String, String[]> parseQueryString(String theQueryString) { 319 HashMap<String, List<String>> map = new HashMap<>(); 320 parseQueryString(theQueryString, map); 321 return toQueryStringMap(map); 322 } 323 324 private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) { 325 String query = defaultString(theQueryString); 326 if (query.startsWith("?")) { 327 query = query.substring(1); 328 } 329 330 331 StringTokenizer tok = new StringTokenizer(query, "&"); 332 while (tok.hasMoreTokens()) { 333 String nextToken = tok.nextToken(); 334 if (isBlank(nextToken)) { 335 continue; 336 } 337 338 int equalsIndex = nextToken.indexOf('='); 339 String nextValue; 340 String nextKey; 341 if (equalsIndex == -1) { 342 nextKey = nextToken; 343 nextValue = ""; 344 } else { 345 nextKey = nextToken.substring(0, equalsIndex); 346 nextValue = nextToken.substring(equalsIndex + 1); 347 } 348 349 nextKey = unescape(nextKey); 350 nextValue = unescape(nextValue); 351 352 List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>()); 353 list.add(nextValue); 354 } 355 } 356 357 public static Map<String, String[]> parseQueryStrings(String... theQueryString) { 358 HashMap<String, List<String>> map = new HashMap<>(); 359 for (String next : theQueryString) { 360 parseQueryString(next, map); 361 } 362 return toQueryStringMap(map); 363 } 364 365 /** 366 * Normalizes canonical URLs for comparison. Trailing "/" is stripped, 367 * and any version identifiers or fragment hash is removed 368 */ 369 public static String normalizeCanonicalUrlForComparison(String theUrl) { 370 String retVal; 371 try { 372 retVal = new URI(theUrl).normalize().toString(); 373 } catch (URISyntaxException e) { 374 retVal = theUrl; 375 } 376 while (endsWith(retVal, "/")) { 377 retVal = retVal.substring(0, retVal.length() - 1); 378 } 379 int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|'); 380 if (hashOrPipeIndex != -1) { 381 retVal = retVal.substring(0, hashOrPipeIndex); 382 } 383 return retVal; 384 } 385 386 /** 387 * Parse a URL in one of the following forms: 388 * <ul> 389 * <li>[Resource Type]?[Search Params] 390 * <li>[Resource Type]/[Resource ID] 391 * <li>[Resource Type]/[Resource ID]/_history/[Version ID] 392 * </ul> 393 */ 394 public static UrlParts parseUrl(String theUrl) { 395 String url = theUrl; 396 UrlParts retVal = new UrlParts(); 397 if (url.startsWith("http")) { 398 int qmIdx = url.indexOf('?'); 399 if (qmIdx != -1) { 400 retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null)); 401 url = url.substring(0, qmIdx); 402 } 403 404 IdDt id = new IdDt(url); 405 retVal.setResourceType(id.getResourceType()); 406 retVal.setResourceId(id.getIdPart()); 407 retVal.setVersionId(id.getVersionIdPart()); 408 return retVal; 409 } 410 411 int parsingStart = 0; 412 if (url.length() > 2) { 413 if (url.charAt(0) == '/') { 414 if (Character.isLetter(url.charAt(1))) { 415 parsingStart = 1; 416 } 417 } 418 } 419 420 int nextStart = parsingStart; 421 boolean nextIsHistory = false; 422 423 for (int idx = parsingStart; idx < url.length(); idx++) { 424 char nextChar = url.charAt(idx); 425 boolean atEnd = (idx + 1) == url.length(); 426 if (nextChar == '?' || nextChar == '/' || atEnd) { 427 int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx; 428 String nextSubstring = url.substring(nextStart, endIdx); 429 if (retVal.getResourceType() == null) { 430 retVal.setResourceType(nextSubstring); 431 } else if (retVal.getResourceId() == null) { 432 retVal.setResourceId(nextSubstring); 433 } else if (nextIsHistory) { 434 retVal.setVersionId(nextSubstring); 435 } else { 436 if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) { 437 nextIsHistory = true; 438 } else { 439 throw new InvalidRequestException(Msg.code(1742) + "Invalid FHIR resource URL: " + url); 440 } 441 } 442 if (nextChar == '?') { 443 if (url.length() > idx + 1) { 444 retVal.setParams(url.substring(idx + 1)); 445 } 446 break; 447 } 448 nextStart = idx + 1; 449 } 450 } 451 452 return retVal; 453 454 } 455 456 /** 457 * This method specifically HTML-encodes the " and 458 * < characters in order to prevent injection attacks 459 */ 460 public static String sanitizeUrlPart(IPrimitiveType<?> theString) { 461 String retVal = null; 462 if (theString != null) { 463 retVal = sanitizeUrlPart(theString.getValueAsString()); 464 } 465 return retVal; 466 } 467 468 /** 469 * This method specifically HTML-encodes the " and 470 * < characters in order to prevent injection attacks. 471 * <p> 472 * The following characters are escaped: 473 * <ul> 474 * <li>'</li> 475 * <li>"</li> 476 * <li><</li> 477 * <li>></li> 478 * <li>\n (newline)</li> 479 * </ul> 480 */ 481 public static String sanitizeUrlPart(CharSequence theString) { 482 if (theString == null) { 483 return null; 484 } 485 486 boolean needsSanitization = isNeedsSanitization(theString); 487 488 if (needsSanitization) { 489 // Ok, we're sanitizing 490 StringBuilder buffer = new StringBuilder(theString.length() + 10); 491 for (int j = 0; j < theString.length(); j++) { 492 493 char nextChar = theString.charAt(j); 494 switch (nextChar) { 495 /* 496 * NB: If you add a constant here, you also need to add it 497 * to isNeedsSanitization()!! 498 */ 499 case '\'': 500 buffer.append("'"); 501 break; 502 case '"': 503 buffer.append("""); 504 break; 505 case '<': 506 buffer.append("<"); 507 break; 508 case '>': 509 buffer.append(">"); 510 break; 511 case '\n': 512 buffer.append(" "); 513 break; 514 case '\r': 515 buffer.append(" "); 516 break; 517 default: 518 if (nextChar >= ' ') { 519 buffer.append(nextChar); 520 } 521 break; 522 } 523 524 } // for build escaped string 525 526 return buffer.toString(); 527 } 528 529 return theString.toString(); 530 } 531 532 /** 533 * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the 534 * same strings as the input but with sanitization applied 535 */ 536 public static String[] sanitizeUrlPart(String[] theParameterValues) { 537 String[] retVal = null; 538 if (theParameterValues != null) { 539 retVal = new String[theParameterValues.length]; 540 for (int i = 0; i < theParameterValues.length; i++) { 541 retVal[i] = sanitizeUrlPart(theParameterValues[i]); 542 } 543 } 544 return retVal; 545 } 546 547 private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) { 548 HashMap<String, String[]> retVal = new HashMap<>(); 549 for (Entry<String, List<String>> nextEntry : map.entrySet()) { 550 retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0])); 551 } 552 return retVal; 553 } 554 555 public static String unescape(String theString) { 556 if (theString == null) { 557 return null; 558 } 559 // If the user passes "_outputFormat" as a GET request parameter directly in the URL: 560 final boolean shouldEscapePlus = !theString.startsWith("application/"); 561 562 for (int i = 0; i < theString.length(); i++) { 563 char nextChar = theString.charAt(i); 564 if (nextChar == '%' || (nextChar == '+' && shouldEscapePlus)) { 565 try { 566 // Yes it would be nice to not use a string "UTF-8" but the equivalent 567 // method that takes Charset is JDK10+ only... sigh.... 568 return URLDecoder.decode(theString, "UTF-8"); 569 } catch (UnsupportedEncodingException e) { 570 throw new Error(Msg.code(1743) + "UTF-8 not supported, this shouldn't happen", e); 571 } 572 } 573 } 574 return theString; 575 } 576 577 public static List<NameValuePair> translateMatchUrl(String theMatchUrl) { 578 List<NameValuePair> parameters; 579 String matchUrl = theMatchUrl; 580 int questionMarkIndex = matchUrl.indexOf('?'); 581 if (questionMarkIndex != -1) { 582 matchUrl = matchUrl.substring(questionMarkIndex + 1); 583 } 584 585 final String[] searchList = new String[]{ 586 "+", 587 "|", 588 "=>=", 589 "=<=", 590 "=>", 591 "=<" 592 }; 593 final String[] replacementList = new String[]{ 594 "%2B", 595 "%7C", 596 "=%3E%3D", 597 "=%3C%3D", 598 "=%3E", 599 "=%3C" 600 }; 601 matchUrl = StringUtils.replaceEach(matchUrl, searchList, replacementList); 602 if (matchUrl.contains(" ")) { 603 throw new InvalidRequestException(Msg.code(1744) + "Failed to parse match URL[" + theMatchUrl + "] - URL is invalid (must not contain spaces)"); 604 } 605 606 parameters = URLEncodedUtils.parse((matchUrl), Constants.CHARSET_UTF8, '&'); 607 608 // One issue that has happened before is people putting a "+" sign into an email address in a match URL 609 // and having that turn into a " ". Since spaces are never appropriate for email addresses, let's just 610 // assume they really meant "+". 611 for (int i = 0; i < parameters.size(); i++) { 612 NameValuePair next = parameters.get(i); 613 if (next.getName().equals("email") && next.getValue().contains(" ")) { 614 BasicNameValuePair newPair = new BasicNameValuePair(next.getName(), next.getValue().replace(' ', '+')); 615 parameters.set(i, newPair); 616 } 617 } 618 619 return parameters; 620 } 621 622 public static class UrlParts { 623 private String myParams; 624 private String myResourceId; 625 private String myResourceType; 626 private String myVersionId; 627 628 public String getParams() { 629 return myParams; 630 } 631 632 public void setParams(String theParams) { 633 myParams = theParams; 634 } 635 636 public String getResourceId() { 637 return myResourceId; 638 } 639 640 public void setResourceId(String theResourceId) { 641 myResourceId = theResourceId; 642 } 643 644 public String getResourceType() { 645 return myResourceType; 646 } 647 648 public void setResourceType(String theResourceType) { 649 myResourceType = theResourceType; 650 } 651 652 public String getVersionId() { 653 return myVersionId; 654 } 655 656 public void setVersionId(String theVersionId) { 657 myVersionId = theVersionId; 658 } 659 } 660}