001/* 002Copyright (c) 2011+, HL7, Inc 003All rights reserved. 004 005Redistribution and use in source and binary forms, with or without modification, 006are permitted provided that the following conditions are met: 007 008 * Redistributions of source code must retain the above copyright notice, this 009 list of conditions and the following disclaimer. 010 * Redistributions in binary form must reproduce the above copyright notice, 011 this list of conditions and the following disclaimer in the documentation 012 and/or other materials provided with the distribution. 013 * Neither the name of HL7 nor the names of its contributors may be used to 014 endorse or promote products derived from this software without specific 015 prior written permission. 016 017THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 018ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 019WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 020IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 021INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 022NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 023PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 024WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 025ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 026POSSIBILITY OF SUCH DAMAGE. 027 028*/ 029package org.hl7.fhir.utilities.xml; 030 031/* 032 * #%L 033 * HAPI FHIR - Core Library 034 * %% 035 * Copyright (C) 2014 - 2017 University Health Network 036 * %% 037 * Licensed under the Apache License, Version 2.0 (the "License"); 038 * you may not use this file except in compliance with the License. 039 * You may obtain a copy of the License at 040 * 041 * http://www.apache.org/licenses/LICENSE-2.0 042 * 043 * Unless required by applicable law or agreed to in writing, software 044 * distributed under the License is distributed on an "AS IS" BASIS, 045 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 046 * See the License for the specific language governing permissions and 047 * limitations under the License. 048 * #L% 049 */ 050 051 052import java.io.*; 053import java.util.List; 054import java.util.Set; 055 056import javax.xml.parsers.DocumentBuilder; 057import javax.xml.parsers.DocumentBuilderFactory; 058import javax.xml.parsers.ParserConfigurationException; 059import javax.xml.transform.Transformer; 060import javax.xml.transform.TransformerException; 061import javax.xml.transform.TransformerFactory; 062import javax.xml.transform.dom.DOMSource; 063import javax.xml.transform.stream.StreamResult; 064 065import org.hl7.fhir.exceptions.FHIRException; 066import org.hl7.fhir.utilities.Utilities; 067import org.w3c.dom.Attr; 068import org.w3c.dom.Document; 069import org.w3c.dom.Element; 070import org.w3c.dom.Node; 071import org.w3c.dom.ls.DOMImplementationLS; 072import org.w3c.dom.ls.LSSerializer; 073import org.xml.sax.SAXException; 074 075public class XMLUtil { 076 077 public static final String SPACE_CHAR = "\u00A0"; 078 079 public static boolean isNMToken(String name) { 080 if (name == null) 081 return false; 082 for (int i = 0; i < name.length(); i++) 083 if (!isNMTokenChar(name.charAt(i))) 084 return false; 085 return name.length() > 0; 086 } 087 088 public static boolean isNMTokenChar(char c) { 089 return isLetter(c) || isDigit(c) || c == '.' || c == '-' || c == '_' || c == ':' || isCombiningChar(c) || isExtender(c); 090 } 091 092 private static boolean isDigit(char c) { 093 return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9') || 094 (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F') || 095 (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF') || 096 (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F') || 097 (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29'); 098 } 099 100 private static boolean isCombiningChar(char c) { 101 return (c >= '\u0300' && c <= '\u0345') || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || 102 (c >= '\u0591' && c <= '\u05A1') || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD') || 103 c == '\u05BF' || (c >= '\u05C1' && c <= '\u05C2') || c == '\u05C4' || (c >= '\u064B' && c <= '\u0652') || 104 c == '\u0670' || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4') || 105 (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903') || c == '\u093C' || 106 (c >= '\u093E' && c <= '\u094C') || c == '\u094D' || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || 107 (c >= '\u0981' && c <= '\u0983') || c == '\u09BC' || c == '\u09BE' || c == '\u09BF' || (c >= '\u09C0' && c <= '\u09C4') || 108 (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD') || c == '\u09D7' || (c >= '\u09E2' && c <= '\u09E3') || 109 c == '\u0A02' || c == '\u0A3C' || c == '\u0A3E' || c == '\u0A3F' || (c >= '\u0A40' && c <= '\u0A42') || 110 (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D') || (c >= '\u0A70' && c <= '\u0A71') || 111 (c >= '\u0A81' && c <= '\u0A83') || c == '\u0ABC' || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || 112 (c >= '\u0ACB' && c <= '\u0ACD') || (c >= '\u0B01' && c <= '\u0B03') || c == '\u0B3C' || (c >= '\u0B3E' && c <= '\u0B43') || 113 (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D') || (c >= '\u0B56' && c <= '\u0B57') || 114 (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2') || (c >= '\u0BC6' && c <= '\u0BC8') || 115 (c >= '\u0BCA' && c <= '\u0BCD') || c == '\u0BD7' || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || 116 (c >= '\u0C46' && c <= '\u0C48') || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || 117 (c >= '\u0C82' && c <= '\u0C83') || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || 118 (c >= '\u0CCA' && c <= '\u0CCD') || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || 119 (c >= '\u0D3E' && c <= '\u0D43') || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D') || c == '\u0D57' || 120 c == '\u0E31' || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E') || c == '\u0EB1' || 121 (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD') || 122 (c >= '\u0F18' && c <= '\u0F19') || c == '\u0F35' || c == '\u0F37' || c == '\u0F39' || c == '\u0F3E' || c == '\u0F3F' || 123 (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95') || c == '\u0F97' || 124 (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7') || c == '\u0FB9' || (c >= '\u20D0' && c <= '\u20DC') || 125 c == '\u20E1' || (c >= '\u302A' && c <= '\u302F') || c == '\u3099' || c == '\u309A'; 126 } 127 128 private static boolean isExtender(char c) { 129 return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387' || c == '\u0640' || c == '\u0E46' || 130 c == '\u0EC6' || c == '\u3005' || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || 131 (c >= '\u30FC' && c <= '\u30FE'); 132 } 133 134 private static boolean isLetter(char c) { 135 return isBaseChar(c) || isIdeographic(c); 136 } 137 138 private static boolean isBaseChar(char c) { 139 return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A') || (c >= '\u00C0' && c <= '\u00D6') || 140 (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131') || 141 (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E') || 142 (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5') || 143 (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1') || 144 c == '\u0386' || (c >= '\u0388' && c <= '\u038A') || c == '\u038C' || (c >= '\u038E' && c <= '\u03A1') || 145 (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6') || c == '\u03DA' || c == '\u03DC' || c == '\u03DE' || 146 c == '\u03E0' || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F') || 147 (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4') || 148 (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB') || 149 (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556') || 150 c == '\u0559' || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2') || 151 (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7') || 152 (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3') || 153 c == '\u06D5' || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939') || c == '\u093D' || 154 (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990') || 155 (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0') || c == '\u09B2' || 156 (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DC' && c <= '\u09DD') || (c >= '\u09DF' && c <= '\u09E1') || 157 (c >= '\u09F0' && c <= '\u09F1') || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || 158 (c >= '\u0A13' && c <= '\u0A28') || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || 159 (c >= '\u0A35' && c <= '\u0A36') || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C') || 160 c == '\u0A5E' || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B') || c == '\u0A8D' || 161 (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0') || 162 (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9') || c == '\u0ABD' || c == '\u0AE0' || 163 (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28') || 164 (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39') || 165 c == '\u0B3D' || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || 166 (c >= '\u0B85' && c <= '\u0B8A') || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || 167 (c >= '\u0B99' && c <= '\u0B9A') || c == '\u0B9C' || (c >= '\u0B9E' && c <= '\u0B9F') || 168 (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA') || (c >= '\u0BAE' && c <= '\u0BB5') || 169 (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C') || (c >= '\u0C0E' && c <= '\u0C10') || 170 (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33') || (c >= '\u0C35' && c <= '\u0C39') || 171 (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C') || (c >= '\u0C8E' && c <= '\u0C90') || 172 (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3') || (c >= '\u0CB5' && c <= '\u0CB9') || 173 c == '\u0CDE' || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || 174 (c >= '\u0D0E' && c <= '\u0D10') || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || 175 (c >= '\u0D60' && c <= '\u0D61') || (c >= '\u0E01' && c <= '\u0E2E') || c == '\u0E30' || 176 (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82') || 177 c == '\u0E84' || (c >= '\u0E87' && c <= '\u0E88') || c == '\u0E8A' || c == '\u0E8D' || (c >= '\u0E94' && c <= '\u0E97') || 178 (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3') || c == '\u0EA5' || c == '\u0EA7' || 179 (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE') || c == '\u0EB0' || 180 (c >= '\u0EB2' && c <= '\u0EB3') || c == '\u0EBD' || (c >= '\u0EC0' && c <= '\u0EC4') || 181 (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69') || (c >= '\u10A0' && c <= '\u10C5') || 182 (c >= '\u10D0' && c <= '\u10F6') || c == '\u1100' || (c >= '\u1102' && c <= '\u1103') || 183 (c >= '\u1105' && c <= '\u1107') || c == '\u1109' || (c >= '\u110B' && c <= '\u110C') || 184 (c >= '\u110E' && c <= '\u1112') || c == '\u113C' || c == '\u113E' || c == '\u1140' || c == '\u114C' || 185 c == '\u114E' || c == '\u1150' || (c >= '\u1154' && c <= '\u1155') || c == '\u1159' || 186 (c >= '\u115F' && c <= '\u1161') || c == '\u1163' || c == '\u1165' || c == '\u1167' || c == '\u1169' || 187 (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173') || c == '\u1175' || 188 c == '\u119E' || c == '\u11A8' || c == '\u11AB' || (c >= '\u11AE' && c <= '\u11AF') || 189 (c >= '\u11B7' && c <= '\u11B8') || c == '\u11BA' || (c >= '\u11BC' && c <= '\u11C2') || 190 c == '\u11EB' || c == '\u11F0' || c == '\u11F9' || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || 191 (c >= '\u1F00' && c <= '\u1F15') || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || 192 (c >= '\u1F48' && c <= '\u1F4D') || (c >= '\u1F50' && c <= '\u1F57') || c == '\u1F59' || c == '\u1F5B' || c == '\u1F5D' || 193 (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC') || 194 c == '\u1FBE' || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || 195 (c >= '\u1FD0' && c <= '\u1FD3') || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || 196 (c >= '\u1FF2' && c <= '\u1FF4') || (c >= '\u1FF6' && c <= '\u1FFC') || c == '\u2126' || 197 (c >= '\u212A' && c <= '\u212B') || c == '\u212E' || (c >= '\u2180' && c <= '\u2182') || 198 (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA') || (c >= '\u3105' && c <= '\u312C') || 199 (c >= '\uAC00' && c <= '\uD7A3'); 200 } 201 202 private static boolean isIdeographic(char c) { 203 return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029'); 204 } 205 206 public static String determineEncoding(InputStream stream) throws IOException { 207 stream.mark(20000); 208 try { 209 int b0 = stream.read(); 210 int b1 = stream.read(); 211 int b2 = stream.read(); 212 int b3 = stream.read(); 213 214 if (b0 == 0xFE && b1 == 0xFF) 215 return "UTF-16BE"; 216 else if (b0 == 0xFF && b1 == 0xFE) 217 return "UTF-16LE"; 218 else if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF ) 219 return "UTF-8"; 220 else if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) 221 return "UTF-16BE"; 222 else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) 223 return "UTF-16LE"; 224 else if (b0 == 0x3C && b1 == 0x3F && b2 == 0x78 && b3 == 0x6D) { 225// UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding 226// which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding 227// declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns 228// for the relevant ASCII characters, the encoding declaration itself may be read reliably 229 InputStreamReader rdr = new InputStreamReader(stream, "US-ASCII"); 230 String hdr = readFirstLine(rdr); 231 return extractEncoding(hdr); 232 } else 233 return null; 234 } finally { 235 stream.reset(); 236 } 237 } 238 239 private static String extractEncoding(String hdr) { 240 int i = hdr.indexOf("encoding="); 241 if (i == -1) 242 return null; 243 hdr = hdr.substring(i+9); 244 char sep = hdr.charAt(0); 245 hdr = hdr.substring(1); 246 i = hdr.indexOf(sep); 247 if (i == -1) 248 return null; 249 return hdr.substring(0, i); 250 } 251 252 private static String readFirstLine(InputStreamReader rdr) throws IOException { 253 char[] buf = new char[1]; 254 StringBuffer bldr = new StringBuffer(); 255 rdr.read(buf); 256 while (buf[0] != '>') { 257 bldr.append(buf[0]); 258 rdr.read(buf); 259 } 260 return bldr.toString(); 261 } 262 263 264 public static boolean charSetImpliesAscii(String charset) { 265 return charset.equals("ISO-8859-1") || charset.equals("US-ASCII"); 266 } 267 268 269 /** 270 * Converts the raw characters to XML escape characters. 271 * 272 * @param rawContent 273 * @param charset Null when charset is not known, so we assume it's unicode 274 * @param isNoLines 275 * @return escape string 276 */ 277 public static String escapeXML(String rawContent, String charset, boolean isNoLines) { 278 if (rawContent == null){ 279 return ""; 280 } 281 StringBuffer sb = new StringBuffer(); 282 283 for (int i = 0; i < rawContent.length(); i++) { 284 char ch = rawContent.charAt(i); 285 if (ch == '\'') 286 sb.append("'"); 287 else if (ch == '&') 288 sb.append("&"); 289 else if (ch == '"') 290 sb.append("""); 291 else if (ch == '<') 292 sb.append("<"); 293 else if (ch == '>') 294 sb.append(">"); 295 else if (ch > '~' && charset != null && charSetImpliesAscii(charset)) 296 // TODO - why is hashcode the only way to get the unicode number for the character 297 // in jre 5.0? 298 sb.append("&#x"+Integer.toHexString(new Character(ch).hashCode()).toUpperCase()+";"); 299 else if (isNoLines) { 300 if (ch == '\r') 301 sb.append("
"); 302 else if (ch != '\n') 303 sb.append(ch); 304 } 305 else 306 sb.append(ch); 307 } 308 return sb.toString(); 309 } 310 311 public static Element getFirstChild(Element e) { 312 if (e == null) 313 return null; 314 Node n = e.getFirstChild(); 315 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 316 n = n.getNextSibling(); 317 return (Element) n; 318 } 319 320 public static Element getNamedChild(Element e, String name) { 321 Element c = getFirstChild(e); 322 while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName())) 323 c = getNextSibling(c); 324 return c; 325 } 326 327 public static Element getNextSibling(Element e) { 328 Node n = e.getNextSibling(); 329 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 330 n = n.getNextSibling(); 331 return (Element) n; 332 } 333 334 public static void getNamedChildren(Element e, String name, List<Element> set) { 335 Element c = getFirstChild(e); 336 while (c != null) { 337 if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) ) 338 set.add(c); 339 c = getNextSibling(c); 340 } 341 } 342 343 public static String htmlToXmlEscapedPlainText(Element r) { 344 StringBuilder s = new StringBuilder(); 345 Node n = r.getFirstChild(); 346 boolean ws = false; 347 while (n != null) { 348 if (n.getNodeType() == Node.TEXT_NODE) { 349 String t = n.getTextContent().trim(); 350 if (Utilities.noString(t)) 351 ws = true; 352 else { 353 if (ws) 354 s.append(" "); 355 ws = false; 356 s.append(t); 357 } 358 } 359 if (n.getNodeType() == Node.ELEMENT_NODE) { 360 if (ws) 361 s.append(" "); 362 ws = false; 363 s.append(htmlToXmlEscapedPlainText((Element) n)); 364 if (r.getNodeName().equals("br") || r.getNodeName().equals("p")) 365 s.append("\r\n"); 366 } 367 n = n.getNextSibling(); 368 } 369 return s.toString(); 370 } 371 372 public static String htmlToXmlEscapedPlainText(String definition) throws ParserConfigurationException, SAXException, IOException { 373 return htmlToXmlEscapedPlainText(parseToDom("<div>"+definition+"</div>").getDocumentElement()); 374 } 375 376 public static String elementToString(Element el) { 377 if (el == null) 378 return ""; 379 Document document = el.getOwnerDocument(); 380 DOMImplementationLS domImplLS = (DOMImplementationLS) document 381 .getImplementation(); 382 LSSerializer serializer = domImplLS.createLSSerializer(); 383 return serializer.writeToString(el); 384 } 385 386 public static String getNamedChildValue(Element element, String name) { 387 Element e = getNamedChild(element, name); 388 return e == null ? null : e.getAttribute("value"); 389 } 390 391 public static void setNamedChildValue(Element element, String name, String value) throws FHIRException { 392 Element e = getNamedChild(element, name); 393 if (e == null) 394 throw new FHIRException("unable to find element "+name); 395 e.setAttribute("value", value); 396 } 397 398 399 public static void getNamedChildrenWithWildcard(Element focus, String name, List<Element> children) { 400 Element c = getFirstChild(focus); 401 while (c != null) { 402 String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 403 if (name.equals(n) || (name.endsWith("[x]") && n.startsWith(name.substring(0, name.length()-3)))) 404 children.add(c); 405 c = getNextSibling(c); 406 } 407 } 408 409 public static void getNamedChildrenWithTails(Element focus, String name, List<Element> children, Set<String> typeTails) { 410 Element c = getFirstChild(focus); 411 while (c != null) { 412 String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 413 if (n.equals(name) || (!n.equals("responseCode") && (n.startsWith(name) && typeTails.contains(n.substring(name.length()))))) 414 children.add(c); 415 c = getNextSibling(c); 416 } 417 } 418 419 public static boolean hasNamedChild(Element e, String name) { 420 Element c = getFirstChild(e); 421 while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName())) 422 c = getNextSibling(c); 423 return c != null; 424 } 425 426 public static Document parseToDom(String content) throws ParserConfigurationException, SAXException, IOException { 427 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 428 factory.setNamespaceAware(false); 429 DocumentBuilder builder = factory.newDocumentBuilder(); 430 return builder.parse(new ByteArrayInputStream(content.getBytes())); 431 } 432 433 public static Document parseFileToDom(String filename) throws ParserConfigurationException, SAXException, IOException { 434 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 435 factory.setNamespaceAware(false); 436 DocumentBuilder builder = factory.newDocumentBuilder(); 437 //FIXME resource leak 438 return builder.parse(new FileInputStream(filename)); 439 } 440 441 public static Element getLastChild(Element e) { 442 if (e == null) 443 return null; 444 Node n = e.getLastChild(); 445 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 446 n = n.getPreviousSibling(); 447 return (Element) n; 448 } 449 450 public static Element getPrevSibling(Element e) { 451 Node n = e.getPreviousSibling(); 452 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 453 n = n.getPreviousSibling(); 454 return (Element) n; 455 } 456 457 public static String getNamedChildAttribute(Element element, String name, String aname) { 458 Element e = getNamedChild(element, name); 459 return e == null ? null : e.getAttribute(aname); 460 } 461 462 public static void writeDomToFile(Document doc, String filename) throws TransformerException { 463 TransformerFactory transformerFactory = TransformerFactory.newInstance(); 464 Transformer transformer = transformerFactory.newTransformer(); 465 DOMSource source = new DOMSource(doc); 466 StreamResult streamResult = new StreamResult(new File(filename)); 467 transformer.transform(source, streamResult); 468 } 469 470 public static String getXsiType(org.w3c.dom.Element element) { 471 Attr a = element.getAttributeNodeNS("http://www.w3.org/2001/XMLSchema-instance", "type"); 472 return (a == null ? null : a.getTextContent()); 473 474 } 475 476 public static String getDirectText(org.w3c.dom.Element node) { 477 Node n = node.getFirstChild(); 478 StringBuilder b = new StringBuilder(); 479 while (n != null) { 480 if (n.getNodeType() == Node.TEXT_NODE) 481 b.append(n.getTextContent()); 482 n = n.getNextSibling(); 483 } 484 return b.toString().trim(); 485 } 486 487 488}