001/*
002Copyright (c) 2011+, HL7, Inc
003All rights reserved.
004
005Redistribution and use in source and binary forms, with or without modification, 
006are permitted provided that the following conditions are met:
007
008 * Redistributions of source code must retain the above copyright notice, this 
009   list of conditions and the following disclaimer.
010 * Redistributions in binary form must reproduce the above copyright notice, 
011   this list of conditions and the following disclaimer in the documentation 
012   and/or other materials provided with the distribution.
013 * Neither the name of HL7 nor the names of its contributors may be used to 
014   endorse or promote products derived from this software without specific 
015   prior written permission.
016
017THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
018ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
019WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
020IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
021INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
022NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
023PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
024WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
025ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
026POSSIBILITY OF SUCH DAMAGE.
027
028*/
029package org.hl7.fhir.utilities.xml;
030
031/*
032 * #%L
033 * HAPI FHIR - Core Library
034 * %%
035 * Copyright (C) 2014 - 2017 University Health Network
036 * %%
037 * Licensed under the Apache License, Version 2.0 (the "License");
038 * you may not use this file except in compliance with the License.
039 * You may obtain a copy of the License at
040 * 
041 *      http://www.apache.org/licenses/LICENSE-2.0
042 * 
043 * Unless required by applicable law or agreed to in writing, software
044 * distributed under the License is distributed on an "AS IS" BASIS,
045 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
046 * See the License for the specific language governing permissions and
047 * limitations under the License.
048 * #L%
049 */
050
051
052import java.io.*;
053import java.util.List;
054import java.util.Set;
055
056import javax.xml.parsers.DocumentBuilder;
057import javax.xml.parsers.DocumentBuilderFactory;
058import javax.xml.parsers.ParserConfigurationException;
059import javax.xml.transform.Transformer;
060import javax.xml.transform.TransformerException;
061import javax.xml.transform.TransformerFactory;
062import javax.xml.transform.dom.DOMSource;
063import javax.xml.transform.stream.StreamResult;
064
065import org.hl7.fhir.exceptions.FHIRException;
066import org.hl7.fhir.utilities.Utilities;
067import org.w3c.dom.Attr;
068import org.w3c.dom.Document;
069import org.w3c.dom.Element;
070import org.w3c.dom.Node;
071import org.w3c.dom.ls.DOMImplementationLS;
072import org.w3c.dom.ls.LSSerializer;
073import org.xml.sax.SAXException;
074
075public class XMLUtil {
076
077        public static final String SPACE_CHAR = "\u00A0";
078
079  public static boolean isNMToken(String name) {
080                if (name == null)
081                        return false;
082                for (int i = 0; i < name.length(); i++) 
083                        if (!isNMTokenChar(name.charAt(i)))
084                                return false;   
085                return name.length() > 0;
086        }
087
088        public static boolean isNMTokenChar(char c) {
089                return isLetter(c) || isDigit(c) || c == '.' || c == '-' || c == '_' || c == ':' || isCombiningChar(c) || isExtender(c);
090        }
091
092        private static boolean isDigit(char c) {
093                return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9') || 
094                        (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F') || 
095                        (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF') || 
096                        (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F') || 
097                        (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29');
098        }
099
100        private static boolean isCombiningChar(char c) {
101                return (c >= '\u0300' && c <= '\u0345') || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || 
102                        (c >= '\u0591' && c <= '\u05A1') || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD') || 
103                        c == '\u05BF' || (c >= '\u05C1' && c <= '\u05C2') || c == '\u05C4' || (c >= '\u064B' && c <= '\u0652') || 
104                        c == '\u0670' || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4') || 
105                        (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903') || c == '\u093C' || 
106                        (c >= '\u093E' && c <= '\u094C') || c == '\u094D' || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || 
107                        (c >= '\u0981' && c <= '\u0983') || c == '\u09BC' || c == '\u09BE' || c == '\u09BF' || (c >= '\u09C0' && c <= '\u09C4') || 
108                        (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD') || c == '\u09D7' || (c >= '\u09E2' && c <= '\u09E3') || 
109                        c == '\u0A02' || c == '\u0A3C' || c == '\u0A3E' || c == '\u0A3F' || (c >= '\u0A40' && c <= '\u0A42') || 
110                        (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D') || (c >= '\u0A70' && c <= '\u0A71') || 
111                        (c >= '\u0A81' && c <= '\u0A83') || c == '\u0ABC' || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || 
112                        (c >= '\u0ACB' && c <= '\u0ACD') || (c >= '\u0B01' && c <= '\u0B03') || c == '\u0B3C' || (c >= '\u0B3E' && c <= '\u0B43') || 
113                        (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D') || (c >= '\u0B56' && c <= '\u0B57') || 
114                        (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2') || (c >= '\u0BC6' && c <= '\u0BC8') || 
115                        (c >= '\u0BCA' && c <= '\u0BCD') || c == '\u0BD7' || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || 
116                        (c >= '\u0C46' && c <= '\u0C48') || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || 
117                        (c >= '\u0C82' && c <= '\u0C83') || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || 
118                        (c >= '\u0CCA' && c <= '\u0CCD') || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || 
119                        (c >= '\u0D3E' && c <= '\u0D43') || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D') || c == '\u0D57' || 
120                        c == '\u0E31' || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E') || c == '\u0EB1' || 
121                        (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD') || 
122                        (c >= '\u0F18' && c <= '\u0F19') || c == '\u0F35' || c == '\u0F37' || c == '\u0F39' || c == '\u0F3E' || c == '\u0F3F' || 
123                        (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95') || c == '\u0F97' || 
124                        (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7') || c == '\u0FB9' || (c >= '\u20D0' && c <= '\u20DC') ||
125                        c == '\u20E1' || (c >= '\u302A' && c <= '\u302F') || c == '\u3099' || c == '\u309A';
126        }
127
128        private static boolean isExtender(char c) {
129                return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387' || c == '\u0640' || c == '\u0E46' || 
130                        c == '\u0EC6' || c == '\u3005' || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || 
131                        (c >= '\u30FC' && c <= '\u30FE');
132        }
133
134        private static boolean isLetter(char c) {
135                return isBaseChar(c) || isIdeographic(c);
136        }
137
138        private static boolean isBaseChar(char c) {
139                return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A') || (c >= '\u00C0' && c <= '\u00D6') || 
140                        (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131') || 
141                        (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E') || 
142                        (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5') || 
143                        (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1') || 
144                        c == '\u0386' || (c >= '\u0388' && c <= '\u038A') || c == '\u038C' || (c >= '\u038E' && c <= '\u03A1') || 
145                        (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6') || c == '\u03DA' || c == '\u03DC' || c == '\u03DE' || 
146                        c == '\u03E0' || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F') || 
147                        (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4') || 
148                        (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB') || 
149                        (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556') || 
150                        c == '\u0559' || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2') || 
151                        (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7') || 
152                        (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3') || 
153                        c == '\u06D5' || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939') || c == '\u093D' || 
154                        (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990') || 
155                        (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0') || c == '\u09B2' || 
156                        (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DC' && c <= '\u09DD') || (c >= '\u09DF' && c <= '\u09E1') || 
157                        (c >= '\u09F0' && c <= '\u09F1') || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || 
158                        (c >= '\u0A13' && c <= '\u0A28') || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || 
159                        (c >= '\u0A35' && c <= '\u0A36') || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C') || 
160                        c == '\u0A5E' || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B') || c == '\u0A8D' || 
161                        (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0') || 
162                        (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9') || c == '\u0ABD' || c == '\u0AE0' || 
163                        (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28') || 
164                        (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39') || 
165                        c == '\u0B3D' || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || 
166                        (c >= '\u0B85' && c <= '\u0B8A') || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || 
167                        (c >= '\u0B99' && c <= '\u0B9A') || c == '\u0B9C' || (c >= '\u0B9E' && c <= '\u0B9F') || 
168                        (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA') || (c >= '\u0BAE' && c <= '\u0BB5') || 
169                        (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C') || (c >= '\u0C0E' && c <= '\u0C10') || 
170                        (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33') || (c >= '\u0C35' && c <= '\u0C39') || 
171                        (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C') || (c >= '\u0C8E' && c <= '\u0C90') || 
172                        (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3') || (c >= '\u0CB5' && c <= '\u0CB9') || 
173                        c == '\u0CDE' || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || 
174                        (c >= '\u0D0E' && c <= '\u0D10') || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || 
175                        (c >= '\u0D60' && c <= '\u0D61') || (c >= '\u0E01' && c <= '\u0E2E') || c == '\u0E30' || 
176                        (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82') || 
177                        c == '\u0E84' || (c >= '\u0E87' && c <= '\u0E88') || c == '\u0E8A' || c == '\u0E8D' || (c >= '\u0E94' && c <= '\u0E97') || 
178                        (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3') || c == '\u0EA5' || c == '\u0EA7' || 
179                        (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE') || c == '\u0EB0' || 
180                        (c >= '\u0EB2' && c <= '\u0EB3') || c == '\u0EBD' || (c >= '\u0EC0' && c <= '\u0EC4') || 
181                        (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69') || (c >= '\u10A0' && c <= '\u10C5') || 
182                        (c >= '\u10D0' && c <= '\u10F6') || c == '\u1100' || (c >= '\u1102' && c <= '\u1103') || 
183                        (c >= '\u1105' && c <= '\u1107') || c == '\u1109' || (c >= '\u110B' && c <= '\u110C') || 
184                        (c >= '\u110E' && c <= '\u1112') || c == '\u113C' || c == '\u113E' || c == '\u1140' || c == '\u114C' || 
185                        c == '\u114E' || c == '\u1150' || (c >= '\u1154' && c <= '\u1155') || c == '\u1159' || 
186                        (c >= '\u115F' && c <= '\u1161') || c == '\u1163' || c == '\u1165' || c == '\u1167' || c == '\u1169' || 
187                        (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173') || c == '\u1175' || 
188                        c == '\u119E' || c == '\u11A8' || c == '\u11AB' || (c >= '\u11AE' && c <= '\u11AF') || 
189                        (c >= '\u11B7' && c <= '\u11B8') || c == '\u11BA' || (c >= '\u11BC' && c <= '\u11C2') || 
190                        c == '\u11EB' || c == '\u11F0' || c == '\u11F9' || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || 
191                        (c >= '\u1F00' && c <= '\u1F15') || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || 
192                        (c >= '\u1F48' && c <= '\u1F4D') || (c >= '\u1F50' && c <= '\u1F57') || c == '\u1F59' || c == '\u1F5B' || c == '\u1F5D' || 
193                        (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC') || 
194                        c == '\u1FBE' || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || 
195                        (c >= '\u1FD0' && c <= '\u1FD3') || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || 
196                        (c >= '\u1FF2' && c <= '\u1FF4') || (c >= '\u1FF6' && c <= '\u1FFC') || c == '\u2126' || 
197                        (c >= '\u212A' && c <= '\u212B') || c == '\u212E' || (c >= '\u2180' && c <= '\u2182') || 
198                        (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA') || (c >= '\u3105' && c <= '\u312C') || 
199                        (c >= '\uAC00' && c <= '\uD7A3');
200        }
201
202        private static boolean isIdeographic(char c) {
203                return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029');
204        }
205
206        public static String determineEncoding(InputStream stream) throws IOException {
207                stream.mark(20000);
208                try {
209                        int b0 = stream.read();
210                        int b1 = stream.read();
211                        int b2 = stream.read();
212                        int b3 = stream.read();
213
214                        if (b0 == 0xFE && b1 == 0xFF)
215                                return "UTF-16BE";
216                        else if (b0 == 0xFF && b1 == 0xFE)
217                                return "UTF-16LE";
218                        else if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF )
219                                return "UTF-8";
220                        else if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F)
221                                return "UTF-16BE";
222                        else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00)
223                                return "UTF-16LE";
224                        else if (b0 == 0x3C && b1 == 0x3F && b2 == 0x78 && b3 == 0x6D) {
225//                              UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding 
226//                              which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding 
227//                              declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns 
228//                              for the relevant ASCII characters, the encoding declaration itself may be read reliably
229                                InputStreamReader rdr = new InputStreamReader(stream, "US-ASCII");
230                                String hdr = readFirstLine(rdr);
231                                return extractEncoding(hdr); 
232                        } else
233                                return null;
234                } finally {
235                        stream.reset();
236                }
237        }
238
239        private static String extractEncoding(String hdr) {
240                int i = hdr.indexOf("encoding=");
241                if (i == -1)
242                        return null;
243                hdr = hdr.substring(i+9);
244                char sep = hdr.charAt(0);
245                hdr = hdr.substring(1);
246                i = hdr.indexOf(sep);
247                if (i == -1)
248                        return null;
249                return hdr.substring(0, i);
250        }
251
252        private static String readFirstLine(InputStreamReader rdr) throws IOException {
253                char[] buf = new char[1];
254                StringBuffer bldr = new StringBuffer();
255                rdr.read(buf);
256                while (buf[0] != '>') {
257                        bldr.append(buf[0]);
258                        rdr.read(buf);
259                }
260                return bldr.toString();
261        }
262
263        
264    public static boolean charSetImpliesAscii(String charset) {
265                return charset.equals("ISO-8859-1") || charset.equals("US-ASCII");
266        }
267
268        
269        /**
270         * Converts the raw characters to XML escape characters.
271         * 
272         * @param rawContent
273         * @param charset Null when charset is not known, so we assume it's unicode
274         * @param isNoLines
275         * @return escape string
276         */
277        public static String escapeXML(String rawContent, String charset, boolean isNoLines) {
278                if (rawContent == null){
279                        return "";
280                }
281                StringBuffer sb = new StringBuffer();
282
283                for (int i = 0; i < rawContent.length(); i++) {
284                        char ch = rawContent.charAt(i);
285                        if (ch == '\'')
286                                sb.append("&#39;");
287                        else if (ch == '&')
288                                sb.append("&amp;");
289                        else if (ch == '"')
290                                sb.append("&quot;");
291                        else if (ch == '<')
292                                sb.append("&lt;");
293                        else if (ch == '>')
294                                sb.append("&gt;");
295                        else if (ch > '~' && charset != null && charSetImpliesAscii(charset)) 
296                                // TODO - why is hashcode the only way to get the unicode number for the character
297                                // in jre 5.0?
298                                sb.append("&#x"+Integer.toHexString(new Character(ch).hashCode()).toUpperCase()+";");
299                        else if (isNoLines) {
300                                if (ch == '\r')
301                                        sb.append("&#xA;");
302                                else if (ch != '\n')
303                                        sb.append(ch);
304                        }
305                        else
306                                sb.append(ch);
307                }
308                return sb.toString();
309        }
310
311  public static Element getFirstChild(Element e) {
312    if (e == null)
313      return null;
314    Node n = e.getFirstChild();
315    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
316      n = n.getNextSibling();
317    return (Element) n;
318  }
319
320  public static Element getNamedChild(Element e, String name) {
321    Element c = getFirstChild(e);
322    while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName()))
323      c = getNextSibling(c);
324    return c;
325  }
326
327  public static Element getNextSibling(Element e) {
328    Node n = e.getNextSibling();
329    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
330      n = n.getNextSibling();
331    return (Element) n;
332  }
333
334  public static void getNamedChildren(Element e, String name, List<Element> set) {
335    Element c = getFirstChild(e);
336    while (c != null) {
337      if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) )
338        set.add(c);
339      c = getNextSibling(c);
340    }
341  }
342
343  public static String htmlToXmlEscapedPlainText(Element r) {
344    StringBuilder s = new StringBuilder();
345    Node n = r.getFirstChild();
346    boolean ws = false;
347    while (n != null) {
348      if (n.getNodeType() == Node.TEXT_NODE) {
349        String t = n.getTextContent().trim();
350        if (Utilities.noString(t))
351          ws = true;
352        else {
353          if (ws)
354            s.append(" ");
355          ws = false;
356          s.append(t);
357        }
358      }
359      if (n.getNodeType() == Node.ELEMENT_NODE) {
360        if (ws)
361          s.append(" ");
362        ws = false;
363        s.append(htmlToXmlEscapedPlainText((Element) n));
364        if (r.getNodeName().equals("br") || r.getNodeName().equals("p"))
365          s.append("\r\n");
366      }
367      n = n.getNextSibling();      
368    }
369    return s.toString();
370  }
371
372  public static String htmlToXmlEscapedPlainText(String definition) throws ParserConfigurationException, SAXException, IOException  {
373    return htmlToXmlEscapedPlainText(parseToDom("<div>"+definition+"</div>").getDocumentElement());
374  }
375
376  public static String elementToString(Element el) {
377    if (el == null)
378      return "";
379    Document document = el.getOwnerDocument();
380    DOMImplementationLS domImplLS = (DOMImplementationLS) document
381        .getImplementation();
382    LSSerializer serializer = domImplLS.createLSSerializer();
383    return serializer.writeToString(el);
384  }
385
386  public static String getNamedChildValue(Element element, String name) {
387    Element e = getNamedChild(element, name);
388    return e == null ? null : e.getAttribute("value");
389  }
390
391  public static void setNamedChildValue(Element element, String name, String value) throws FHIRException  {
392    Element e = getNamedChild(element, name);
393    if (e == null)
394      throw new FHIRException("unable to find element "+name);
395    e.setAttribute("value", value);
396  }
397
398
399        public static void getNamedChildrenWithWildcard(Element focus, String name, List<Element> children) {
400    Element c = getFirstChild(focus);
401    while (c != null) {
402        String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 
403      if (name.equals(n) || (name.endsWith("[x]") && n.startsWith(name.substring(0, name.length()-3))))
404        children.add(c);
405      c = getNextSibling(c);
406    }
407  }
408
409        public static void getNamedChildrenWithTails(Element focus, String name, List<Element> children, Set<String> typeTails) {
410    Element c = getFirstChild(focus);
411    while (c != null) {
412      String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 
413      if (n.equals(name) || (!n.equals("responseCode") && (n.startsWith(name) && typeTails.contains(n.substring(name.length())))))
414        children.add(c);
415      c = getNextSibling(c);
416    }
417  }
418        
419  public static boolean hasNamedChild(Element e, String name) {
420    Element c = getFirstChild(e);
421    while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName()))
422      c = getNextSibling(c);
423    return c != null;
424  }
425
426  public static Document parseToDom(String content) throws ParserConfigurationException, SAXException, IOException  {
427    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
428    factory.setNamespaceAware(false);
429    DocumentBuilder builder = factory.newDocumentBuilder();
430    return builder.parse(new ByteArrayInputStream(content.getBytes()));
431  }
432
433  public static Document parseFileToDom(String filename) throws ParserConfigurationException, SAXException, IOException  {
434    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
435    factory.setNamespaceAware(false);
436    DocumentBuilder builder = factory.newDocumentBuilder();
437    //FIXME resource leak
438    return builder.parse(new FileInputStream(filename));
439  }
440
441  public static Element getLastChild(Element e) {
442    if (e == null)
443      return null;
444    Node n = e.getLastChild();
445    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
446      n = n.getPreviousSibling();
447    return (Element) n;
448  }
449
450  public static Element getPrevSibling(Element e) {
451    Node n = e.getPreviousSibling();
452    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
453      n = n.getPreviousSibling();
454    return (Element) n;
455  }
456
457  public static String getNamedChildAttribute(Element element, String name, String aname) {
458    Element e = getNamedChild(element, name);
459    return e == null ? null : e.getAttribute(aname);
460  }
461
462  public static void writeDomToFile(Document doc, String filename) throws TransformerException {
463    TransformerFactory transformerFactory = TransformerFactory.newInstance();
464    Transformer transformer = transformerFactory.newTransformer();
465    DOMSource source = new DOMSource(doc);
466    StreamResult streamResult =  new StreamResult(new File(filename));
467    transformer.transform(source, streamResult);    
468  }
469
470  public static String getXsiType(org.w3c.dom.Element element) {
471    Attr a = element.getAttributeNodeNS("http://www.w3.org/2001/XMLSchema-instance", "type");
472    return (a == null ? null : a.getTextContent());
473    
474  }
475
476        public static String getDirectText(org.w3c.dom.Element node) {
477    Node n = node.getFirstChild();
478    StringBuilder b = new StringBuilder();
479    while (n != null) {
480        if (n.getNodeType() == Node.TEXT_NODE) 
481                b.append(n.getTextContent());
482        n = n.getNextSibling();
483    }
484          return b.toString().trim();
485        }
486
487        
488}