001/**
002 * The contents of this file are subject to the Mozilla Public License Version 1.1
003 * (the "License"); you may not use this file except in compliance with the License.
004 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
005 * Software distributed under the License is distributed on an "AS IS" basis,
006 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007 * specific language governing rights and limitations under the License.
008 *
009 * The Original Code is "XMLParser.java".  Description:
010 * "Parses and encodes HL7 messages in XML form, according to HL7's normative XML encoding
011 * specification."
012 *
013 * The Initial Developer of the Original Code is University Health Network. Copyright (C)
014 * 2002.  All Rights Reserved.
015 *
016 * Contributor(s): ______________________________________.
017 *
018 * Alternatively, the contents of this file may be used under the terms of the
019 * GNU General Public License (the  �GPL�), in which case the provisions of the GPL are
020 * applicable instead of those above.  If you wish to allow use of your version of this
021 * file only under the terms of the GPL and not to allow others to use your version
022 * of this file under the MPL, indicate your decision by deleting  the provisions above
023 * and replace  them with the notice and other provisions required by the GPL License.
024 * If you do not delete the provisions above, a recipient may use your version of
025 * this file under either the MPL or the GPL.
026 */
027
028package ca.uhn.hl7v2.parser;
029
030import java.util.HashSet;
031import java.util.Set;
032import java.util.regex.Matcher;
033import java.util.regex.Pattern;
034
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037import org.w3c.dom.DOMException;
038import org.w3c.dom.Document;
039import org.w3c.dom.Element;
040import org.w3c.dom.Node;
041import org.w3c.dom.NodeList;
042
043import ca.uhn.hl7v2.ErrorCode;
044import ca.uhn.hl7v2.HL7Exception;
045import ca.uhn.hl7v2.HapiContext;
046import ca.uhn.hl7v2.model.Composite;
047import ca.uhn.hl7v2.model.DataTypeException;
048import ca.uhn.hl7v2.model.GenericComposite;
049import ca.uhn.hl7v2.model.GenericMessage;
050import ca.uhn.hl7v2.model.GenericPrimitive;
051import ca.uhn.hl7v2.model.Message;
052import ca.uhn.hl7v2.model.Primitive;
053import ca.uhn.hl7v2.model.Segment;
054import ca.uhn.hl7v2.model.Type;
055import ca.uhn.hl7v2.model.Varies;
056import ca.uhn.hl7v2.util.Terser;
057import ca.uhn.hl7v2.util.XMLUtils;
058
059/**
060 * Parses and encodes HL7 messages in XML form, according to HL7's normative XML encoding
061 * specification. This is an abstract class that handles datatype and segment parsing/encoding, but
062 * not the parsing/encoding of entire messages. To use the XML parser, you should create a subclass
063 * for a certain message structure. This subclass must be able to identify the Segment objects that
064 * correspond to various Segment nodes in an XML document, and call the methods <code>
065 * parse(Segment segment, ElementNode segmentNode)</code> and
066 * <code>encode(Segment segment, ElementNode segmentNode)
067 * </code> as appropriate. XMLParser uses the Xerces parser, which must be installed in your
068 * classpath.
069 * 
070 * @see ParserConfiguration for configuration options which may affect parser encoding and decoding behaviour
071 * @author Bryan Tripp, Shawn Bellina
072 */
073public abstract class XMLParser extends Parser {
074
075        private static final String ESCAPE_ATTRNAME = "V";
076        private static final String ESCAPE_NODENAME = "escape";
077        private static final Logger log = LoggerFactory.getLogger(XMLParser.class);
078    protected static final String NS = "urn:hl7-org:v2xml";
079    private static final Pattern NS_PATTERN = Pattern.compile("xmlns(.*)=\"" + NS + "\"");
080
081        private String textEncoding;
082
083        
084        private boolean disableWhitespaceTrimming;
085
086
087        /** Constructor */
088        public XMLParser() {
089                super();
090        }
091
092    /**
093     *
094     * @param context the HAPI context
095     */
096        public XMLParser(HapiContext context) {
097                super(context);
098        }
099
100        /**
101         * Constructor
102         * 
103         * @param theFactory custom factory to use for model class lookup
104         */
105        public XMLParser(ModelClassFactory theFactory) {
106                super(theFactory);
107
108        }
109
110        /**
111         * Returns a String representing the encoding of the given message, if the encoding is
112         * recognized. For example if the given message appears to be encoded using HL7 2.x XML rules
113         * then "XML" would be returned. If the encoding is not recognized then null is returned. That
114         * this method returns a specific encoding does not guarantee that the message is correctly
115         * encoded (e.g. well formed XML) - just that it is not encoded using any other encoding than
116         * the one returned. Returns null if the encoding is not recognized.
117         */
118        public String getEncoding(String message) {
119                return EncodingDetector.isXmlEncoded(message) ? getDefaultEncoding() : null;
120        }
121
122        /**
123         * @return the preferred encoding of this Parser
124         */
125        public String getDefaultEncoding() {
126                return "XML";
127        }
128
129        /**
130         * Sets the <i>keepAsOriginalNodes<i>
131         * 
132         * The nodes whose names match the <i>keepAsOriginalNodes<i> will be kept as original, meaning
133         * that no white space treaming will occur on them
134     *
135     * @param keepAsOriginalNodes of the nodes to be kept as original
136     * @deprecated Use {@link ParserConfiguration#setXmlDisableWhitespaceTrimmingOnNodeNames(Set)} instead. That method works exactly the same as this one but has been renamed for a more clear meaning. 
137         */
138        @Deprecated()
139        public void setKeepAsOriginalNodes(String[] keepAsOriginalNodes) {
140                getParserConfiguration().setXmlDisableWhitespaceTrimmingOnNodeNames(keepAsOriginalNodes);
141        }
142
143        /**
144         * Sets the <i>keepAsOriginalNodes<i>
145         * 
146         * @deprecated Use {@link ParserConfiguration#getXmlDisableWhitespaceTrimmingOnNodeNames()} instead
147         */
148        @Deprecated
149        public String[] getKeepAsOriginalNodes() {
150                return getParserConfiguration().getXmlDisableWhitespaceTrimmingOnNodeNames().toArray(new String[getParserConfiguration().getXmlDisableWhitespaceTrimmingOnNodeNames().size()]);
151        }
152
153        /**
154         * <p>
155         * Creates and populates a Message object from an XML Document that contains an XML-encoded HL7
156         * message.
157         * </p>
158         * <p>
159         * The easiest way to implement this method for a particular message structure is as follows:
160         * <ol>
161         * <li>Create an instance of the Message type you are going to handle with your subclass of
162         * XMLParser</li>
163         * <li>Go through the given Document and find the Elements that represent the top level of each
164         * message segment.</li>
165         * <li>For each of these segments, call
166         * <code>parse(Segment segmentObject, Element segmentElement)</code>, providing the appropriate
167         * Segment from your Message object, and the corresponding Element.</li>
168         * </ol>
169         * At the end of this process, your Message object should be populated with data from the XML
170         * Document.
171         * </p>
172         *
173     * @param xmlMessage DOM message object to be parsed
174     * @param version HL7 version
175         * @throws HL7Exception if the message is not correctly formatted.
176         * @throws EncodingNotSupportedException if the message encoded is not supported by this parser.
177         */
178        public abstract Message parseDocument(Document xmlMessage, String version) throws HL7Exception;
179
180        /**
181         * <p>
182         * Parses a message string and returns the corresponding Message object. This method checks that
183         * the given message string is XML encoded, creates an XML Document object (using Xerces) from
184         * the given String, and calls the abstract method <code>parse(Document XMLMessage)</code>
185         * </p>
186         */
187        protected Message doParse(String message, String version) throws HL7Exception {
188                Message m;
189
190                // parse message string into a DOM document
191                Document doc;
192                doc = parseStringIntoDocument(message);
193                m = parseDocument(doc, version);
194
195                return m;
196        }
197
198        /**
199         * Parses a string containing an XML document into a Document object.
200         * 
201         * Note that this method is synchronized currently, as the XML parser is not thread safe
202         * 
203         * @throws HL7Exception
204         */
205        protected synchronized Document parseStringIntoDocument(String message) throws HL7Exception {
206                try {
207                        return XMLUtils.parse(message);
208                } catch (Exception e) {
209                        throw new HL7Exception("Exception parsing XML", e);
210                }
211        }
212
213        /**
214         * Formats a Message object into an HL7 message string using the given encoding.
215         * 
216         * @throws HL7Exception if the data fields in the message do not permit encoding (e.g. required
217         *             fields are null)
218         * @throws EncodingNotSupportedException if the requested encoding is not supported by this
219         *             parser.
220         */
221        protected String doEncode(Message source, String encoding) throws HL7Exception {
222                if (!encoding.equals("XML"))
223                        throw new EncodingNotSupportedException("XMLParser supports only XML encoding");
224                return encode(source);
225        }
226
227        /**
228         * Formats a Message object into an HL7 message string using this parser's default encoding (XML
229         * encoding). This method calls the abstract method <code>encodeDocument(...)</code> in order to
230         * obtain XML Document object representation of the Message, then serializes it to a String.
231         * 
232         * @throws HL7Exception if the data fields in the message do not permit encoding (e.g. required
233         *             fields are null)
234         */
235        protected String doEncode(Message source) throws HL7Exception {
236                if (source instanceof GenericMessage) {
237                        throw new HL7Exception(
238                                        "Can't XML-encode a GenericMessage.  Message must have a recognized structure.");
239                }
240
241                Document doc = encodeDocument(source);
242                // Element documentElement = doc.getDocumentElement();
243                // if (!documentElement.hasAttribute("xmlns"))
244                // documentElement.setAttribute("xmlns", "urn:hl7-org:v2xml");
245                try {
246                        return XMLUtils.serialize(doc, getParserConfiguration().isPrettyPrintWhenEncodingXml());
247                } catch (Exception e) {
248                        throw new HL7Exception("Exception serializing XML document to string", e);
249                }
250        }
251
252        /**
253         * <p>
254         * Creates an XML Document that corresponds to the given Message object.
255         * </p>
256         * <p>
257         * If you are implementing this method, you should create an XML Document, and insert XML
258         * Elements into it that correspond to the groups and segments that belong to the message type
259         * that your subclass of XMLParser supports. Then, for each segment in the message, call the
260         * method <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element
261         * for that segment and the corresponding Segment object from the given Message.
262         * </p>
263     *
264     * @param source message
265     * @return the DOM document object of the encoded message
266         */
267        public abstract Document encodeDocument(Message source) throws HL7Exception;
268
269
270    protected void assertNamespaceURI(String ns) throws HL7Exception {
271        if (!NS.equals(ns)) {
272            throw new HL7Exception("Namespace URI must be " + NS);
273        }
274    }
275
276        /**
277         * Populates the given Segment object with data from the given XML Element.
278         *
279     * @param segmentObject the segment to parse into
280     * @param segmentElement the DOM element to be parsed
281         * @throws HL7Exception if the XML Element does not have the correct name and structure for the
282         *             given Segment, or if there is an error while setting individual field values.
283         */
284        public void parse(Segment segmentObject, Element segmentElement) throws HL7Exception {
285                Set<String> done = new HashSet<String>();
286
287                NodeList all = segmentElement.getChildNodes();
288                for (int i = 0; i < all.getLength(); i++) {
289                        String elementName = all.item(i).getNodeName();
290
291                        if (all.item(i).getNodeType() == Node.ELEMENT_NODE && !done.contains(elementName)) {
292                assertNamespaceURI(all.item(i).getNamespaceURI());
293                                done.add(elementName);
294
295                                int index = elementName.indexOf('.');
296                                if (index >= 0 && elementName.length() > index) { // properly formatted element
297                                        String fieldNumString = elementName.substring(index + 1);
298                                        int fieldNum = Integer.parseInt(fieldNumString);
299                                        parseReps(segmentObject, segmentElement, elementName, fieldNum);
300                                } else {
301                                        log.debug("Child of segment {} doesn't look like a field {}",
302                                                        segmentObject.getName(), elementName);
303                                }
304                        }
305                }
306
307                // set data type of OBX-5
308                if (segmentObject.getClass().getName().contains("OBX")) {
309                        Varies.fixOBX5(segmentObject, getFactory(), getHapiContext().getParserConfiguration());
310                }
311        }
312
313        private void parseReps(Segment segmentObject, Element segmentElement, String fieldName,
314                        int fieldNum) throws HL7Exception {
315
316                NodeList reps = segmentElement.getElementsByTagName(fieldName);
317                for (int i = 0; i < reps.getLength(); i++) {
318                        parse(segmentObject.getField(fieldNum, i), (Element) reps.item(i));
319                }
320        }
321
322        /**
323         * Populates the given Element with data from the given Segment, by inserting Elements
324         * corresponding to the Segment's fields, their components, etc. Returns true if there is at
325         * least one data value in the segment.
326     *
327     * @param segmentObject the segment to be encoded
328     * @param segmentElement the DOM element to encode into
329     * @return true if there is at least one data value in the segment
330     * @throws HL7Exception if an erro occurred while encoding
331         */
332        public boolean encode(Segment segmentObject, Element segmentElement) throws HL7Exception {
333                boolean hasValue = false;
334                int n = segmentObject.numFields();
335                for (int i = 1; i <= n; i++) {
336                        String name = makeElementName(segmentObject, i);
337                        Type[] reps = segmentObject.getField(i);
338                        for (Type rep : reps) {
339                                Element newNode = segmentElement.getOwnerDocument().createElement(name);
340                                boolean componentHasValue = encode(rep, newNode);
341                                if (componentHasValue) {
342                                        try {
343                                                segmentElement.appendChild(newNode);
344                                        } catch (DOMException e) {
345                                                throw new HL7Exception("DOMException encoding Segment: ", e);
346                                        }
347                                        hasValue = true;
348                                }
349                        }
350                }
351                return hasValue;
352        }
353
354        /**
355         * Populates the given Type object with data from the given XML Element.
356     *
357     * @param datatypeObject the type to parse into
358     * @param datatypeElement the DOM element to be parsed
359     * @throws DataTypeException if the data did not match the expected type rules
360         */
361        public void parse(Type datatypeObject, Element datatypeElement) throws HL7Exception {
362                if (datatypeObject instanceof Varies) {
363                        parseVaries((Varies) datatypeObject, datatypeElement);
364                } else if (datatypeObject instanceof Primitive) {
365                        parsePrimitive((Primitive) datatypeObject, datatypeElement);
366                } else if (datatypeObject instanceof Composite) {
367                        parseComposite((Composite) datatypeObject, datatypeElement);
368                }
369        }
370
371        /**
372         * Parses an XML element into a Varies by determining whether the element is primitive or
373         * composite, calling setData() on the Varies with a new generic primitive or composite as
374         * appropriate, and then calling parse again with the new Type object.
375         */
376        private void parseVaries(Varies datatypeObject, Element datatypeElement)
377                        throws HL7Exception {
378                // figure out what data type it holds
379                // short nodeType = datatypeElement.getFirstChild().getNodeType();
380                if (!hasChildElement(datatypeElement)) {
381                        // it's a primitive
382                        datatypeObject.setData(new GenericPrimitive(datatypeObject.getMessage()));
383                } else {
384                        // it's a composite ... almost know what type, except that we don't have the version
385                        // here
386                        datatypeObject.setData(new GenericComposite(datatypeObject.getMessage()));
387                }
388                parse(datatypeObject.getData(), datatypeElement);
389        }
390
391        /** Returns true if any of the given element's children are (non-escape) elements */
392        private boolean hasChildElement(Element e) {
393                NodeList children = e.getChildNodes();
394                boolean hasElement = false;
395                int c = 0;
396                while (c < children.getLength() && !hasElement) {
397                        if (children.item(c).getNodeType() == Node.ELEMENT_NODE
398                                        && !ESCAPE_NODENAME.equals(children.item(c).getNodeName())) {
399                                hasElement = true;
400                        }
401                        c++;
402                }
403                return hasElement;
404        }
405
406        /**
407         * Parses a primitive type by filling it with text child, if any. If the datatype element
408         * contains escape elements, resolve them properly.
409         */
410        private void parsePrimitive(Primitive datatypeObject, Element datatypeElement)
411                        throws HL7Exception {
412                NodeList children = datatypeElement.getChildNodes();
413                StringBuilder builder = new StringBuilder();
414                for (int c = 0; c < children.getLength(); c++) {
415                        Node child = children.item(c);
416                        try {
417                                if (child.getNodeType() == Node.TEXT_NODE) {
418                                        String value = child.getNodeValue();
419                                        if (value != null && value.length() > 0) {
420                                                if (keepAsOriginal(child.getParentNode())) {
421                                                        builder.append(value);
422                                                } else {
423                                                        builder.append(removeWhitespace(value));
424                                                }
425                                        }
426                                        // Check for formatting elements
427                                } else if (child.getNodeType() == Node.ELEMENT_NODE
428                                                && ESCAPE_NODENAME.equals(child.getLocalName())) {
429                    assertNamespaceURI(child.getNamespaceURI());
430                                        EncodingCharacters ec = EncodingCharacters.getInstance(datatypeObject
431                                                        .getMessage());
432                                        Element elem = (Element) child;
433                                        String attr = elem.getAttribute(ESCAPE_ATTRNAME).trim();
434                                        if (attr != null && attr.length() > 0) {
435                                                builder.append(ec.getEscapeCharacter()).append(attr)
436                                                                .append(ec.getEscapeCharacter());
437                                        }
438                                }
439                        } catch (Exception e) {
440                                log.error("Error parsing primitive value from TEXT_NODE", e);
441                        }
442
443                }
444                datatypeObject.setValue(builder.toString());
445        }
446
447        /**
448         * Checks if <code>Node</code> content should be kept as original (ie.: whitespaces won't be
449         * removed)
450         * 
451         * @param node The target <code>Node</code>
452         * @return boolean <code>true</code> if whitespaces should not be removed from node content,
453         *         <code>false</code> otherwise
454         */
455        protected boolean keepAsOriginal(Node node) {
456                if (getParserConfiguration().isXmlDisableWhitespaceTrimmingOnAllNodes()) {
457                        return true;
458                }
459                return (node.getNodeName() != null) && getParserConfiguration().getXmlDisableWhitespaceTrimmingOnNodeNames().contains(node.getNodeName());
460        }
461
462        /**
463         * Removes all unnecessary whitespace from the given String (intended to be used with Primitive
464         * values). This includes leading and trailing whitespace, and repeated space characters.
465         * Carriage returns, line feeds, and tabs are replaced with spaces.
466         */
467        protected String removeWhitespace(String s) {
468                if (this.disableWhitespaceTrimming) {
469                        return s;
470                }
471                
472                s = s.replace('\r', ' ');
473                s = s.replace('\n', ' ');
474                s = s.replace('\t', ' ');
475
476                boolean repeatedSpacesExist = true;
477                while (repeatedSpacesExist) {
478                        int loc = s.indexOf("  ");
479                        if (loc < 0) {
480                                repeatedSpacesExist = false;
481                        } else {
482                                StringBuilder buf = new StringBuilder();
483                                buf.append(s.substring(0, loc));
484                                buf.append(" ");
485                                buf.append(s.substring(loc + 2));
486                                s = buf.toString();
487                        }
488                }
489                return s.trim();
490        }
491
492        /**
493         * Populates a Composite type by looping through it's children, finding corresponding Elements
494         * among the children of the given Element, and calling parse(Type, Element) for each.
495         */
496        private void parseComposite(Composite datatypeObject, Element datatypeElement)
497                        throws HL7Exception {
498                if (datatypeObject instanceof GenericComposite) { // elements won't be named
499                                                                                                                        // GenericComposite.x
500                        NodeList children = datatypeElement.getChildNodes();
501                        int compNum = 0;
502                        for (int i = 0; i < children.getLength(); i++) {
503                                if (children.item(i).getNodeType() == Node.ELEMENT_NODE) {
504                                        Element nextElement = (Element) children.item(i);
505                    assertNamespaceURI(nextElement.getNamespaceURI());
506                                        String localName = nextElement.getLocalName();
507                                        int dotIndex = localName.indexOf(".");
508                                        if (dotIndex > -1) {
509                                                compNum = Integer.parseInt(localName.substring(dotIndex + 1)) - 1;
510                                        } else {
511                                                log.debug(
512                                                                "Datatype element {} doesn't have a valid numbered name, usgin default index of {}",
513                                                                datatypeElement.getLocalName(), compNum);
514                                        }
515                                        Type nextComponent = datatypeObject.getComponent(compNum);
516                                        parse(nextComponent, nextElement);
517                                        compNum++;
518                                }
519                        }
520                } else {
521                        Type[] children = datatypeObject.getComponents();
522                        for (int i = 0; i < children.length; i++) {
523                                NodeList matchingElements = datatypeElement.getElementsByTagNameNS(NS, makeElementName(
524                                                datatypeObject, i + 1));
525                                if (matchingElements.getLength() > 0) {
526                                        parse(children[i], (Element) matchingElements.item(0));
527                                }
528                        }
529                        
530                        int nextExtraCmpIndex = 0;
531                        boolean foundExtraComponent;
532                        do {
533                                foundExtraComponent = false;
534                                NodeList matchingElements = datatypeElement.getElementsByTagNameNS(NS, makeElementName(datatypeObject, children.length + nextExtraCmpIndex + 1));
535                                if (matchingElements.getLength() > 0) {
536                                        parse(datatypeObject.getExtraComponents().getComponent(nextExtraCmpIndex), (Element) matchingElements.item(0));
537                                        foundExtraComponent = true;
538                                }
539                                nextExtraCmpIndex++;
540                        } while (foundExtraComponent);
541                        
542                        
543                }
544        }
545
546        /** Returns the expected XML element name for the given child of the given Segment */
547        private String makeElementName(Segment s, int child) {
548                return s.getName() + "." + child;
549        }
550
551        /** Returns the expected XML element name for the given child of the given Composite */
552        private String makeElementName(Composite composite, int child) {
553                return composite.getName() + "." + child;
554        }
555
556        /**
557         * Populates the given Element with data from the given Type, by inserting Elements
558         * corresponding to the Type's components and values. Returns true if the given type contains a
559         * value (i.e. for Primitives, if getValue() doesn't return null, and for Composites, if at
560         * least one underlying Primitive doesn't return null).
561         */
562        private boolean encode(Type datatypeObject, Element datatypeElement) throws DataTypeException {
563                boolean hasData = false;
564                if (datatypeObject instanceof Varies) {
565                        hasData = encodeVaries((Varies) datatypeObject, datatypeElement);
566                } else if (datatypeObject instanceof Primitive) {
567                        hasData = encodePrimitive((Primitive) datatypeObject, datatypeElement);
568                } else if (datatypeObject instanceof Composite) {
569                        hasData = encodeComposite((Composite) datatypeObject, datatypeElement);
570                }
571                return hasData;
572        }
573
574        /**
575         * Encodes a Varies type by extracting it's data field and encoding that. Returns true if the
576         * data field (or one of its components) contains a value.
577         */
578        private boolean encodeVaries(Varies datatypeObject, Element datatypeElement)
579                        throws DataTypeException {
580                boolean hasData = false;
581                if (datatypeObject.getData() != null) {
582                        hasData = encode(datatypeObject.getData(), datatypeElement);
583                }
584                return hasData;
585        }
586
587        /**
588         * Encodes a Primitive in XML by adding it's value as a child of the given Element. Detects
589         * escape character and creates proper <escape> elements in the DOM tree. Returns true if the
590         * given Primitive contains a value.
591         */
592        private boolean encodePrimitive(Primitive datatypeObject, Element datatypeElement)
593                        throws DataTypeException {
594                String value = datatypeObject.getValue();
595                boolean hasValue = (value != null && value.length() > 0);
596                if (hasValue) {
597                        try {
598                                EncodingCharacters ec = EncodingCharacters.getInstance(datatypeObject.getMessage());
599                                char esc = ec.getEscapeCharacter();
600                                int pos;
601                                int oldpos = 0;
602                                boolean escaping = false;
603
604                                // Find next escape character
605                                while ((pos = value.indexOf(esc, oldpos)) >= 0) {
606
607                                        // string until next escape character
608                                        String v = value.substring(oldpos, pos);
609                                        if (!escaping) {
610                                                // currently in "text mode", so create textnode from it
611                                                if (v.length() > 0)
612                                                        datatypeElement.appendChild(datatypeElement.getOwnerDocument()
613                                                                        .createTextNode(v));
614                                                escaping = true;
615                                        } else {
616                                                if (v.startsWith(".") || "H".equals(v) || "N".equals(v)) {
617                                                        // currently in "escape mode", so create escape element from it
618                                                        Element escape = datatypeElement.getOwnerDocument().createElement(
619                                                                        ESCAPE_NODENAME);
620                                                        escape.setAttribute(ESCAPE_ATTRNAME, v);
621                                                        datatypeElement.appendChild(escape);
622                                                        escaping = false;
623                                                } else {
624                                                        // no proper escape sequence, assume text
625                                                        datatypeElement.appendChild(datatypeElement.getOwnerDocument()
626                                                                        .createTextNode(esc + v));
627                                                }
628                                        }
629                                        oldpos = pos + 1;
630                                }
631                                // create text from the remainder
632                                if (oldpos <= value.length()) {
633
634                                        StringBuilder sb = new StringBuilder();
635                                        // If we are in escaping mode, there appears no closing escape character,
636                                        // so we treat the string as text
637                                        if (escaping)
638                                                sb.append(esc);
639
640                                        sb.append(value.substring(oldpos));
641                                        datatypeElement.appendChild(datatypeElement.getOwnerDocument().createTextNode(
642                                                        sb.toString()));
643                                }
644
645                        } catch (Exception e) {
646                                throw new DataTypeException("Exception encoding Primitive: ", e);
647                        }
648
649                }
650                return hasValue;
651        }
652
653        /**
654         * Encodes a Composite in XML by looping through it's components, creating new children for each
655         * of them (with the appropriate names) and populating them by calling encode(Type, Element)
656         * using these children. Returns true if at least one component contains a value.
657         */
658        private boolean encodeComposite(Composite datatypeObject, Element datatypeElement)
659                        throws DataTypeException {
660                Type[] components = datatypeObject.getComponents();
661                boolean hasValue = false;
662                for (int i = 0; i < components.length; i++) {
663                        String name = makeElementName(datatypeObject, i + 1);
664                        Element newNode = datatypeElement.getOwnerDocument().createElement(name);
665                        boolean componentHasValue = encode(components[i], newNode);
666                        if (componentHasValue) {
667                                try {
668                                        datatypeElement.appendChild(newNode);
669                                } catch (DOMException e) {
670                                        throw new DataTypeException("DOMException encoding Composite: ", e);
671                                }
672                                hasValue = true;
673                        }
674                }
675                return hasValue;
676        }
677
678        /**
679         * <p>
680         * Returns a minimal amount of data from a message string, including only the data needed to
681         * send a response to the remote system. This includes the following fields:
682         * <ul>
683         * <li>field separator</li>
684         * <li>encoding characters</li>
685         * <li>processing ID</li>
686         * <li>message control ID</li>
687         * </ul>
688         * This method is intended for use when there is an error parsing a message, (so the Message
689         * object is unavailable) but an error message must be sent back to the remote system including
690         * some of the information in the inbound message. This method parses only that required
691         * information, hopefully avoiding the condition that caused the original error.
692         * </p>
693         */
694        public Segment getCriticalResponseData(String message) throws HL7Exception {
695                String version = getVersion(message);
696                Segment criticalData = Parser.makeControlMSH(version, getFactory());
697
698                Terser.set(criticalData, 1, 0, 1, 1, parseLeaf(message, "MSH.1", 0));
699                Terser.set(criticalData, 2, 0, 1, 1, parseLeaf(message, "MSH.2", 0));
700                Terser.set(criticalData, 10, 0, 1, 1, parseLeaf(message, "MSH.10", 0));
701                String procID = parseLeaf(message, "MSH.11", 0);
702                if (procID == null || procID.length() == 0) {
703                        procID = parseLeaf(message, "PT.1", message.indexOf("MSH.11"));
704                        // this field is a composite in later versions
705                }
706                Terser.set(criticalData, 11, 0, 1, 1, procID);
707
708                return criticalData;
709        }
710
711        /**
712         * For response messages, returns the value of MSA-2 (the message ID of the message sent by the
713         * sending system). This value may be needed prior to main message parsing, so that
714         * (particularly in a multi-threaded scenario) the message can be routed to the thread that sent
715         * the request. We need this information first so that any parse exceptions are thrown to the
716         * correct thread. Implementers of Parsers should take care to make the implementation of this
717         * method very fast and robust. Returns null if MSA-2 can not be found (e.g. if the message is
718         * not a response message). Trims whitespace from around the MSA-2 field.
719         */
720        public String getAckID(String message) {
721                String ackID = null;
722                try {
723                        ackID = parseLeaf(message, "msa.2", 0).trim();
724                } catch (HL7Exception e) { /* OK ... assume it isn't a response message */
725                }
726                return ackID;
727        }
728
729        public String getVersion(String message) throws HL7Exception {
730        String version = parseLeaf(message, "MSH.12", 0);
731        if (version == null || version.trim().length() == 0) {
732            version = parseLeaf(message, "VID.1", message.indexOf("MSH.12"));
733        }
734        return version;     
735        }
736
737        /**
738         * Attempts to retrieve the value of a leaf tag without using DOM or SAX. This method searches
739         * the given message string for the given tag name, and returns everything after the given tag
740         * and before the start of the next tag. Whitespace is stripped. This is intended only for lead
741         * nodes, as the value is considered to end at the start of the next tag, regardless of whether
742         * it is the matching end tag or some other nested tag.
743         * 
744         * @param message a string message in XML form
745         * @param tagName the name of the XML tag, e.g. "MSA.2"
746         * @param startAt the character location at which to start searching
747         * @throws HL7Exception if the tag can not be found
748         */
749        protected static String parseLeaf(String message, String tagName, int startAt) throws HL7Exception {
750
751        // Workaround #176: XML may include explicit namespaces. It would be more stable to use some
752        // kind of pull parser for this method instead of manually digging for tags in the XML structure.
753        String prefix = "";
754        Matcher m = NS_PATTERN.matcher(message);
755        if (m.find()) {
756            String ns = m.group(1);
757            if (ns != null && ns.length() > 0) {
758                prefix = ns.substring(1) + ":";
759            }
760        }
761
762                int tagStart = message.indexOf("<" + prefix + tagName, startAt);
763                if (tagStart < 0)
764                        tagStart = message.indexOf("<" + prefix + tagName.toUpperCase(), startAt);
765                int valStart = message.indexOf(">", tagStart) + 1;
766                int valEnd = message.indexOf("<", valStart);
767
768        String value;
769                if (tagStart >= 0 && valEnd >= valStart) {
770                        value = message.substring(valStart, valEnd);
771                } else {
772                        throw new HL7Exception("Couldn't find " + tagName + " in message beginning: "
773                                        + message.substring(0, Math.min(150, message.length())),
774                                        ErrorCode.REQUIRED_FIELD_MISSING);
775                }
776
777                // Escape codes, as defined at http://hdf.ncsa.uiuc.edu/HDF5/XML/xml_escape_chars.htm
778                value = value.replaceAll("&quot;", "\"");
779                value = value.replaceAll("&apos;", "'");
780                value = value.replaceAll("&amp;", "&");
781                value = value.replaceAll("&lt;", "<");
782                value = value.replaceAll("&gt;", ">");
783
784                return value;
785        }
786
787        /**
788         * Throws unsupported operation exception
789         * 
790         * @throws UnsupportedOperationException
791         */
792        @Override
793        public String doEncode(Segment structure, EncodingCharacters encodingCharacters)
794                        throws HL7Exception {
795                throw new UnsupportedOperationException("Not supported yet.");
796        }
797
798        /**
799         * Throws unsupported operation exception
800         * 
801         * @throws UnsupportedOperationException
802         */
803        @Override
804        protected Message doParseForSpecificPackage(String theMessage, String theVersion,
805                        String thePackageName) throws HL7Exception {
806                throw new UnsupportedOperationException("Not supported yet.");
807        }
808
809        /**
810         * Throws unsupported operation exception
811         * 
812         * @throws UnsupportedOperationException
813         */
814        @Override
815        public String doEncode(Type type, EncodingCharacters encodingCharacters) throws HL7Exception {
816                throw new UnsupportedOperationException("Not supported yet.");
817        }
818
819        /**
820         * Throws unsupported operation exception
821         * 
822         * @throws UnsupportedOperationException
823         */
824        @Override
825        public void parse(Type type, String string, EncodingCharacters encodingCharacters)
826                        throws HL7Exception {
827                throw new UnsupportedOperationException("Not supported yet.");
828        }
829
830        /**
831         * Throws unsupported operation exception
832         * 
833         * @throws UnsupportedOperationException
834         */
835        @Override
836        public void parse(Segment segment, String string, EncodingCharacters encodingCharacters)
837                        throws HL7Exception {
838                throw new UnsupportedOperationException("Not supported yet.");
839        }
840
841        /**
842         * Returns the text encoding to be used in generating new messages. Note that this affects
843         * encoding to string only, not parsing.
844         * 
845         * @return text encoding
846         */
847        public String getTextEncoding() {
848                return textEncoding;
849        }
850
851        /**
852         * Sets the text encoding to be used in generating new messages. Note that this affects encoding
853         * to string only, not parsing.
854         * 
855         * @param textEncoding The encoding. Default is the platform default.
856         */
857        public void setTextEncoding(String textEncoding) {
858                this.textEncoding = textEncoding;
859        }
860
861}