001package ca.uhn.fhir.util;
002
003import ca.uhn.fhir.context.FhirContext;
004import ca.uhn.fhir.context.RuntimeResourceDefinition;
005import ca.uhn.fhir.model.primitive.IdDt;
006import ca.uhn.fhir.parser.DataFormatException;
007import ca.uhn.fhir.rest.api.Constants;
008import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
009import com.google.common.escape.Escaper;
010import com.google.common.net.PercentEscaper;
011import org.apache.http.NameValuePair;
012import org.apache.http.client.utils.URLEncodedUtils;
013import org.hl7.fhir.instance.model.api.IPrimitiveType;
014
015import java.io.UnsupportedEncodingException;
016import java.net.MalformedURLException;
017import java.net.URL;
018import java.net.URLDecoder;
019import java.util.*;
020import java.util.Map.Entry;
021
022import static org.apache.commons.lang3.StringUtils.*;
023
024/*
025 * #%L
026 * HAPI FHIR - Core Library
027 * %%
028 * Copyright (C) 2014 - 2020 University Health Network
029 * %%
030 * Licensed under the Apache License, Version 2.0 (the "License");
031 * you may not use this file except in compliance with the License.
032 * You may obtain a copy of the License at
033 *
034 *      http://www.apache.org/licenses/LICENSE-2.0
035 *
036 * Unless required by applicable law or agreed to in writing, software
037 * distributed under the License is distributed on an "AS IS" BASIS,
038 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
039 * See the License for the specific language governing permissions and
040 * limitations under the License.
041 * #L%
042 */
043
044public class UrlUtil {
045        private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class);
046
047        private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*";
048        private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false);
049
050        public static class UrlParts {
051                private String myParams;
052                private String myResourceId;
053                private String myResourceType;
054                private String myVersionId;
055
056                public String getParams() {
057                        return myParams;
058                }
059
060                public void setParams(String theParams) {
061                        myParams = theParams;
062                }
063
064                public String getResourceId() {
065                        return myResourceId;
066                }
067
068                public void setResourceId(String theResourceId) {
069                        myResourceId = theResourceId;
070                }
071
072                public String getResourceType() {
073                        return myResourceType;
074                }
075
076                public void setResourceType(String theResourceType) {
077                        myResourceType = theResourceType;
078                }
079
080                public String getVersionId() {
081                        return myVersionId;
082                }
083
084                public void setVersionId(String theVersionId) {
085                        myVersionId = theVersionId;
086                }
087        }
088
089        /**
090         * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid.
091         */
092        public static String constructAbsoluteUrl(String theBase, String theEndpoint) {
093                if (theEndpoint == null) {
094                        return null;
095                }
096                if (isAbsolute(theEndpoint)) {
097                        return theEndpoint;
098                }
099                if (theBase == null) {
100                        return theEndpoint;
101                }
102
103                try {
104                        return new URL(new URL(theBase), theEndpoint).toString();
105                } catch (MalformedURLException e) {
106                        ourLog.warn("Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e);
107                        return theEndpoint;
108                }
109        }
110
111        public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) {
112                if (theParentExtensionUrl == null) {
113                        return theExtensionUrl;
114                }
115                if (theExtensionUrl == null) {
116                        return null;
117                }
118
119                int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/');
120                int childLastSlashIdx = theExtensionUrl.lastIndexOf('/');
121
122                if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) {
123                        return theExtensionUrl;
124                }
125
126                if (parentLastSlashIdx != childLastSlashIdx) {
127                        return theExtensionUrl;
128                }
129
130                if (!theParentExtensionUrl.substring(0, parentLastSlashIdx).equals(theExtensionUrl.substring(0, parentLastSlashIdx))) {
131                        return theExtensionUrl;
132                }
133
134                if (theExtensionUrl.length() > parentLastSlashIdx) {
135                        return theExtensionUrl.substring(parentLastSlashIdx + 1);
136                }
137
138                return theExtensionUrl;
139        }
140
141        /**
142         * URL encode a value according to RFC 3986
143         * <p>
144         * This method is intended to be applied to an individual parameter
145         * name or value. For example, if you are creating the URL
146         * <code>http://example.com/fhir/Patient?key=føø</code>
147         * it would be appropriate to pass the string "føø" to this method,
148         * but not appropriate to pass the entire URL since characters
149         * such as "/" and "?" would also be escaped.
150         * </P>
151         */
152        public static String escapeUrlParam(String theUnescaped) {
153                if (theUnescaped == null) {
154                        return null;
155                }
156                return PARAMETER_ESCAPER.escape(theUnescaped);
157        }
158
159        public static boolean isAbsolute(String theValue) {
160                String value = theValue.toLowerCase();
161                return value.startsWith("http://") || value.startsWith("https://");
162        }
163
164        public static boolean isNeedsSanitization(CharSequence theString) {
165                if (theString != null) {
166                        for (int i = 0; i < theString.length(); i++) {
167                                char nextChar = theString.charAt(i);
168                                switch (nextChar) {
169                                        case '\'':
170                                        case '"':
171                                        case '<':
172                                        case '>':
173                                        case '\n':
174                                        case '\r':
175                                                return true;
176                                }
177                                if (nextChar < ' ') {
178                                        return true;
179                                }
180                        }
181                }
182                return false;
183        }
184
185        public static boolean isValid(String theUrl) {
186                if (theUrl == null || theUrl.length() < 8) {
187                        return false;
188                }
189
190                String url = theUrl.toLowerCase();
191                if (url.charAt(0) != 'h') {
192                        return false;
193                }
194                if (url.charAt(1) != 't') {
195                        return false;
196                }
197                if (url.charAt(2) != 't') {
198                        return false;
199                }
200                if (url.charAt(3) != 'p') {
201                        return false;
202                }
203                int slashOffset;
204                if (url.charAt(4) == ':') {
205                        slashOffset = 5;
206                } else if (url.charAt(4) == 's') {
207                        if (url.charAt(5) != ':') {
208                                return false;
209                        }
210                        slashOffset = 6;
211                } else {
212                        return false;
213                }
214
215                if (url.charAt(slashOffset) != '/') {
216                        return false;
217                }
218                if (url.charAt(slashOffset + 1) != '/') {
219                        return false;
220                }
221
222                return true;
223        }
224
225        public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl) throws DataFormatException {
226                int paramIndex = theUrl.indexOf('?');
227                String resourceName = theUrl.substring(0, paramIndex);
228                if (resourceName.contains("/")) {
229                        resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1);
230                }
231                return theCtx.getResourceDefinition(resourceName);
232        }
233
234        public static Map<String, String[]> parseQueryString(String theQueryString) {
235                HashMap<String, List<String>> map = new HashMap<>();
236                parseQueryString(theQueryString, map);
237                return toQueryStringMap(map);
238        }
239
240        private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) {
241                String query = defaultString(theQueryString);
242                if (query.startsWith("?")) {
243                        query = query.substring(1);
244                }
245
246
247                StringTokenizer tok = new StringTokenizer(query, "&");
248                while (tok.hasMoreTokens()) {
249                        String nextToken = tok.nextToken();
250                        if (isBlank(nextToken)) {
251                                continue;
252                        }
253
254                        int equalsIndex = nextToken.indexOf('=');
255                        String nextValue;
256                        String nextKey;
257                        if (equalsIndex == -1) {
258                                nextKey = nextToken;
259                                nextValue = "";
260                        } else {
261                                nextKey = nextToken.substring(0, equalsIndex);
262                                nextValue = nextToken.substring(equalsIndex + 1);
263                        }
264
265                        nextKey = unescape(nextKey);
266                        nextValue = unescape(nextValue);
267
268                        List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>());
269                        list.add(nextValue);
270                }
271        }
272
273        public static Map<String, String[]> parseQueryStrings(String... theQueryString) {
274                HashMap<String, List<String>> map = new HashMap<>();
275                for (String next : theQueryString) {
276                        parseQueryString(next, map);
277                }
278                return toQueryStringMap(map);
279        }
280
281        /**
282         * Parse a URL in one of the following forms:
283         * <ul>
284         * <li>[Resource Type]?[Search Params]
285         * <li>[Resource Type]/[Resource ID]
286         * <li>[Resource Type]/[Resource ID]/_history/[Version ID]
287         * </ul>
288         */
289        public static UrlParts parseUrl(String theUrl) {
290                String url = theUrl;
291                UrlParts retVal = new UrlParts();
292                if (url.startsWith("http")) {
293                        if (url.startsWith("/")) {
294                                url = url.substring(1);
295                        }
296
297                        int qmIdx = url.indexOf('?');
298                        if (qmIdx != -1) {
299                                retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null));
300                                url = url.substring(0, qmIdx);
301                        }
302
303                        IdDt id = new IdDt(url);
304                        retVal.setResourceType(id.getResourceType());
305                        retVal.setResourceId(id.getIdPart());
306                        retVal.setVersionId(id.getVersionIdPart());
307                        return retVal;
308                }
309
310                int parsingStart = 0;
311                if (url.length() > 2) {
312                        if (url.charAt(0) == '/') {
313                                if (Character.isLetter(url.charAt(1))) {
314                                        parsingStart = 1;
315                                }
316                        }
317                }
318
319                if (url.matches("/[a-zA-Z]+\\?.*")) {
320                        url = url.substring(1);
321                }
322                int nextStart = 0;
323                boolean nextIsHistory = false;
324
325                for (int idx = parsingStart; idx < url.length(); idx++) {
326                        char nextChar = url.charAt(idx);
327                        boolean atEnd = (idx + 1) == url.length();
328                        if (nextChar == '?' || nextChar == '/' || atEnd) {
329                                int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx;
330                                String nextSubstring = url.substring(nextStart, endIdx);
331                                if (retVal.getResourceType() == null) {
332                                        retVal.setResourceType(nextSubstring);
333                                } else if (retVal.getResourceId() == null) {
334                                        retVal.setResourceId(nextSubstring);
335                                } else if (nextIsHistory) {
336                                        retVal.setVersionId(nextSubstring);
337                                } else {
338                                        if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) {
339                                                nextIsHistory = true;
340                                        } else {
341                                                throw new InvalidRequestException("Invalid FHIR resource URL: " + url);
342                                        }
343                                }
344                                if (nextChar == '?') {
345                                        if (url.length() > idx + 1) {
346                                                retVal.setParams(url.substring(idx + 1));
347                                        }
348                                        break;
349                                }
350                                nextStart = idx + 1;
351                        }
352                }
353
354                return retVal;
355
356        }
357
358        /**
359         * This method specifically HTML-encodes the &quot; and
360         * &lt; characters in order to prevent injection attacks
361         */
362        public static String sanitizeUrlPart(IPrimitiveType<?> theString) {
363                String retVal = null;
364                if (theString != null) {
365                        retVal = sanitizeUrlPart(theString.getValueAsString());
366                }
367                return retVal;
368        }
369
370        /**
371         * This method specifically HTML-encodes the &quot; and
372         * &lt; characters in order to prevent injection attacks.
373         *
374         * The following characters are escaped:
375         * <ul>
376         *    <li>&apos;</li>
377         *    <li>&quot;</li>
378         *    <li>&lt;</li>
379         *    <li>&gt;</li>
380         *    <li>\n (newline)</li>
381         * </ul>
382         *
383         */
384        public static String sanitizeUrlPart(CharSequence theString) {
385                if (theString == null) {
386                        return null;
387                }
388
389                boolean needsSanitization = isNeedsSanitization(theString);
390
391                if (needsSanitization) {
392                        // Ok, we're sanitizing
393                        StringBuilder buffer = new StringBuilder(theString.length() + 10);
394                        for (int j = 0; j < theString.length(); j++) {
395
396                                char nextChar = theString.charAt(j);
397                                switch (nextChar) {
398                                        /*
399                                         * NB: If you add a constant here, you also need to add it
400                                         * to isNeedsSanitization()!!
401                                         */
402                                        case '\'':
403                                                buffer.append("&apos;");
404                                                break;
405                                        case '"':
406                                                buffer.append("&quot;");
407                                                break;
408                                        case '<':
409                                                buffer.append("&lt;");
410                                                break;
411                                        case '>':
412                                                buffer.append("&gt;");
413                                                break;
414                                        case '\n':
415                                                buffer.append("&#10;");
416                                                break;
417                                        case '\r':
418                                                buffer.append("&#13;");
419                                                break;
420                                        default:
421                                                if (nextChar >= ' ') {
422                                                        buffer.append(nextChar);
423                                                }
424                                                break;
425                                }
426
427                        } // for build escaped string
428
429                        return buffer.toString();
430                }
431
432                return theString.toString();
433        }
434
435        private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) {
436                HashMap<String, String[]> retVal = new HashMap<>();
437                for (Entry<String, List<String>> nextEntry : map.entrySet()) {
438                        retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0]));
439                }
440                return retVal;
441        }
442
443        public static String unescape(String theString) {
444                if (theString == null) {
445                        return null;
446                }
447                for (int i = 0; i < theString.length(); i++) {
448                        char nextChar = theString.charAt(i);
449                        if (nextChar == '%' || nextChar == '+') {
450                                try {
451                                        // Yes it would be nice to not use a string "UTF-8" but the equivalent
452                                        // method that takes Charset is JDK10+ only... sigh....
453                                        return URLDecoder.decode(theString, "UTF-8");
454                                } catch (UnsupportedEncodingException e) {
455                                        throw new Error("UTF-8 not supported, this shouldn't happen", e);
456                                }
457                        }
458                }
459                return theString;
460        }
461
462        public static List<NameValuePair> translateMatchUrl(String theMatchUrl) {
463                List<NameValuePair> parameters;
464                String matchUrl = theMatchUrl;
465                int questionMarkIndex = matchUrl.indexOf('?');
466                if (questionMarkIndex != -1) {
467                        matchUrl = matchUrl.substring(questionMarkIndex + 1);
468                }
469                matchUrl = matchUrl.replace("|", "%7C");
470                matchUrl = matchUrl.replace("=>=", "=%3E%3D");
471                matchUrl = matchUrl.replace("=<=", "=%3C%3D");
472                matchUrl = matchUrl.replace("=>", "=%3E");
473                matchUrl = matchUrl.replace("=<", "=%3C");
474                if (matchUrl.contains(" ")) {
475                        throw new InvalidRequestException("Failed to parse match URL[" + theMatchUrl + "] - URL is invalid (must not contain spaces)");
476                }
477
478                parameters = URLEncodedUtils.parse((matchUrl), Constants.CHARSET_UTF8, '&');
479                return parameters;
480        }
481}