001/**
002The contents of this file are subject to the Mozilla Public License Version 1.1
003(the "License"); you may not use this file except in compliance with the License.
004You may obtain a copy of the License at http://www.mozilla.org/MPL/
005Software distributed under the License is distributed on an "AS IS" basis,
006WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007specific language governing rights and limitations under the License.
008
009The Initial Developer of the Original Code is University Health Network. Copyright (C)
0102001.  All Rights Reserved.
011
012Contributor(s): ______________________________________.
013
014Alternatively, the contents of this file may be used under the terms of the
015GNU General Public License (the  �GPL�), in which case the provisions of the GPL are
016applicable instead of those above.  If you wish to allow use of your version of this
017file only under the terms of the GPL and not to allow others to use your version
018of this file under the MPL, indicate your decision by deleting  the provisions above
019and replace  them with the notice and other provisions required by the GPL License.
020If you do not delete the provisions above, a recipient may use your version of
021this file under either the MPL or the GPL.
022
023*/
024package ca.uhn.hl7v2.parser;
025
026import java.io.File;
027import java.io.FileReader;
028import java.util.ArrayList;
029import java.util.HashSet;
030import java.util.List;
031import java.util.Set;
032
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035import org.w3c.dom.DOMException;
036import org.w3c.dom.Document;
037import org.w3c.dom.Element;
038import org.w3c.dom.Node;
039import org.w3c.dom.NodeList;
040
041import ca.uhn.hl7v2.HL7Exception;
042import ca.uhn.hl7v2.HapiContext;
043import ca.uhn.hl7v2.model.Group;
044import ca.uhn.hl7v2.model.Message;
045import ca.uhn.hl7v2.model.Segment;
046import ca.uhn.hl7v2.model.Structure;
047import ca.uhn.hl7v2.util.XMLUtils;
048import ca.uhn.hl7v2.validation.impl.NoValidation;
049import ca.uhn.hl7v2.validation.impl.ValidationContextFactory;
050
051/**
052 * <p>A default XMLParser.  This class assigns segment elements (in an XML-encoded message) 
053 * to Segment objects (in a Message object) using the name of a segment and the names 
054 * of any groups in which the segment is nested.  The names of group classes must correspond
055 * to the names of group elements (they must be identical except that a dot in the element 
056 * name, following the message name, is replaced with an underscore, in order to consitute a 
057 * valid class name). </p>
058 * <p>At the time of writing, the group names in the XML spec are changing.  Many of the group 
059 * names have been automatically generated based on the group contents.  However, these automatic 
060 * names are gradually being replaced with manually assigned names.  This process is expected to 
061 * be complete by November 2002.  As a result, mismatches are likely.  Messages could be  
062 * transformed prior to parsing (using XSLT) as a work-around.  Alternatively the group class names 
063 * could be changed to reflect updates in the XML spec.  Ultimately, HAPI group classes will be 
064 * changed to correspond with the official group names, once these are all assigned.  </p>
065 * 
066 * @see ParserConfiguration for configuration options which may affect parser encoding and decoding behaviour
067 * @author Bryan Tripp
068 */
069public class DefaultXMLParser extends XMLParser {
070
071    private static final Logger log = LoggerFactory.getLogger(DefaultXMLParser.class);
072
073    private static final Set<String> ourForceGroupNames;
074    
075    static {
076        ourForceGroupNames = new HashSet<String>();
077        ourForceGroupNames.add("DIET");
078    }
079    
080    public DefaultXMLParser() {
081        super();
082    }
083    
084    public DefaultXMLParser(HapiContext context) {
085                super(context);
086        }
087
088        /** 
089     * Creates a new instance of DefaultXMLParser 
090     *  
091     * @param theFactory custom factory to use for model class lookup 
092     */
093    public DefaultXMLParser(ModelClassFactory theFactory) {
094        super(theFactory);
095    }
096    
097    /**
098     * <p>Creates an XML Document that corresponds to the given Message object. </p>
099     * <p>If you are implementing this method, you should create an XML Document, and insert XML Elements
100     * into it that correspond to the groups and segments that belong to the message type that your subclass
101     * of XMLParser supports.  Then, for each segment in the message, call the method
102     * <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element for
103     * that segment and the corresponding Segment object from the given Message.</p>
104     */
105    public Document encodeDocument(Message source) throws HL7Exception {
106        String messageClassName = source.getClass().getName();
107        String messageName = messageClassName.substring(messageClassName.lastIndexOf('.') + 1);
108        try {
109            Document doc = XMLUtils.emptyDocument(messageName);
110            //Element root = doc.createElement(messageName);
111            //doc.appendChild(root);
112            encode(source, doc.getDocumentElement());
113            return doc;
114        } catch (Exception e) {
115            throw new HL7Exception(
116                "Can't create XML document - " + e.getClass().getName(), e);
117        }
118    }
119
120    /**
121     * Copies data from a group object into the corresponding group element, creating any 
122     * necessary child nodes.  
123     */
124    private void encode(Group groupObject, Element groupElement) throws HL7Exception {
125        String[] childNames = groupObject.getNames();
126        String messageName = groupObject.getMessage().getName();
127        
128        try {
129                for (String name : childNames) {
130                Structure[] reps = groupObject.getAll(name);
131                for (Structure rep : reps) {
132                    String elementName = makeGroupElementName(messageName, name);
133                                        Element childElement;
134                                        try {
135                                                childElement = groupElement.getOwnerDocument().createElement(elementName);
136                                } catch (DOMException e) {
137                                    throw new HL7Exception(
138                                        "Can't encode element " + elementName + " in group " + groupObject.getClass().getName(), e);
139                                }
140                    groupElement.appendChild(childElement);
141                    if (rep instanceof Group) {
142                        encode((Group) rep, childElement);
143                    }
144                    else if (rep instanceof Segment) {
145                        encode((Segment) rep, childElement);
146                    }
147                                }
148            }
149        } catch (DOMException e) {
150            throw new HL7Exception(
151                "Can't encode group " + groupObject.getClass().getName(), e);
152        }
153    }
154
155
156    /**
157     * <p>Creates and populates a Message object from an XML Document that contains an XML-encoded HL7 message.</p>
158     * <p>The easiest way to implement this method for a particular message structure is as follows:
159     * <ol><li>Create an instance of the Message type you are going to handle with your subclass
160     * of XMLParser</li>
161     * <li>Go through the given Document and find the Elements that represent the top level of
162     * each message segment. </li>
163     * <li>For each of these segments, call <code>parse(Segment segmentObject, Element segmentElement)</code>,
164     * providing the appropriate Segment from your Message object, and the corresponding Element.</li></ol>
165     * At the end of this process, your Message object should be populated with data from the XML
166     * Document.</p>
167     * @throws HL7Exception if the message is not correctly formatted.
168     * @throws EncodingNotSupportedException if the message encoded
169     *     is not supported by this parser.
170     */
171    public Message parseDocument(Document xmlMessage, String version) throws HL7Exception {
172
173        assertNamespaceURI(xmlMessage.getDocumentElement().getNamespaceURI());
174
175        Message message = instantiateMessage(xmlMessage.getDocumentElement().getLocalName(), version, true);
176        // Note: this will change in future to reuse the Parser's/HapiContext's
177        // ValidationContext.
178        message.setValidationContext(getValidationContext());
179        parse(message, xmlMessage.getDocumentElement());
180        return message;
181    }
182
183    /**
184     * Populates the given group object with data from the given group element, ignoring 
185     * any unrecognized nodes.  
186     */
187    private void parse(Group groupObject, Element groupElement) throws HL7Exception {
188        String[] childNames = groupObject.getNames();
189        String messageName = groupObject.getMessage().getName();
190        
191        NodeList allChildNodes = groupElement.getChildNodes();
192        List<String> unparsedElementList = new ArrayList<String>();
193        for (int i = 0; i < allChildNodes.getLength(); i++) {
194            Node node = allChildNodes.item(i);
195            String name = node.getLocalName();
196            if (node.getNodeType() == Node.ELEMENT_NODE && !unparsedElementList.contains(name)) {
197                assertNamespaceURI(node.getNamespaceURI());
198                unparsedElementList.add(name);                
199            }
200        }
201        
202        //we're not too fussy about order here (all occurrences get parsed as repetitions) ... 
203        for (String nextChildName : childNames) {
204            String childName = nextChildName;
205            if(groupObject.isGroup(nextChildName)) {
206                childName = makeGroupElementName(groupObject.getMessage().getName(), nextChildName);
207            }
208                        unparsedElementList.remove(childName);
209            
210            // 4 char segment names are second occurrences of a segment within a single message
211            // structure. e.g. the second PID segment in an A17 patient swap message is known
212            // to hapi's code represenation as PID2
213            if (nextChildName.length() == 4 && Character.isDigit(nextChildName.charAt(3))) {
214                log.trace("Skipping rep segment: {}", nextChildName);
215            } else {   
216                parseReps(groupElement, groupObject, messageName, nextChildName, nextChildName);
217            }
218        }
219        
220        for (String segName : unparsedElementList) {
221            String segIndexName = groupObject.addNonstandardSegment(segName);
222            parseReps(groupElement, groupObject, messageName, segName, segIndexName);
223        }
224    }
225    
226    //param childIndexName may have an integer on the end if >1 sibling with same name (e.g. NTE2) 
227    private void parseReps(Element groupElement, Group groupObject, 
228            String messageName, String childName, String childIndexName) throws HL7Exception {
229        
230        String groupName = makeGroupElementName(messageName, childName);
231        List<Element> reps = getChildElementsByTagName(groupElement, groupName);
232        log.trace("# of elements matching {}: {}", groupName, reps.size());
233
234                if (groupObject.isRepeating(childIndexName)) {
235                        for (int i = 0; i < reps.size(); i++) {
236                                parseRep(reps.get(i), groupObject.get(childIndexName, i));
237                        }                                       
238                } else {
239                        if (reps.size() > 0) {
240                                parseRep(reps.get(0), groupObject.get(childIndexName, 0));                              
241                        }
242
243//                      if (reps.size() > 1) {                       
244//                              String newIndexName = groupObject.addNonstandardSegment(childName);                     
245//                              for (int i = 1; i < reps.size(); i++) {
246//                                      parseRep((Element) reps.get(i), groupObject.get(newIndexName, i-1));
247//                              }                                                               
248//                      }
249                        if (reps.size() > 1) {
250                                String newIndexName;
251                                int i=1;
252                                try     {
253                                        for (i = 1; i < reps.size(); i++) {
254                                                newIndexName = childName+(i+1);
255                                                Structure st = groupObject.get(newIndexName);
256                                                parseRep(reps.get(i), st);
257                                        }
258                                } catch(Throwable t) {
259                                        log.info("Issue Parsing: " + t);
260                                        newIndexName = groupObject.addNonstandardSegment(childName);
261                                        for (int j = i; j < reps.size(); j++) {
262                                                parseRep(reps.get(j), groupObject.get(newIndexName, j-i));
263                                        }
264                                }
265                        }
266                        
267                }
268    }
269    
270    private void parseRep(Element theElem, Structure theObj) throws HL7Exception {
271                if (theObj instanceof Group) {
272                        parse((Group) theObj, theElem);
273                }
274                else if (theObj instanceof Segment) {
275                        parse((Segment) theObj, theElem);
276                }                
277                log.trace("Parsed element: {}", theElem.getNodeName());         
278    }
279    
280    //includes direct children only
281    private List<Element> getChildElementsByTagName(Element theElement, String theName) throws HL7Exception {
282        List<Element> result = new ArrayList<Element>(10);
283        NodeList children = theElement.getChildNodes();
284        
285        for (int i = 0; i < children.getLength(); i++) {
286                Node child = children.item(i);
287                if (child.getNodeType() == Node.ELEMENT_NODE && child.getLocalName().equals(theName)) {
288                assertNamespaceURI(child.getNamespaceURI());
289                        result.add((Element)child);
290                }
291        }
292        
293        return result; 
294    }
295    
296    /** 
297     * Given the name of a group element in an XML message, returns the corresponding 
298     * group class name.  This name is identical except in order to be a valid class 
299     * name, the dot character immediately following the message name is replaced with 
300     * an underscore.  For example, there is a group element called ADT_A01.INSURANCE and the 
301     * corresponding group Class is called ADT_A01_INSURANCE. 
302     */
303//    protected static String makeGroupClassName(String elementName) {
304//        return elementName.replace('.', '_');
305//    }
306
307    /** 
308     * Given the name of a message and a Group class, returns the corresponding group element name in an 
309     * XML-encoded message.  This is the message name and group name separated by a dot. For example, 
310     * ADT_A01.INSURANCE.
311     * 
312     * If it looks like a segment name (i.e. has 3 characters), no change is made. 
313     */
314    protected static String makeGroupElementName(String messageName, String className) {
315        String ret;
316        
317        if (className.length() > 4 || ourForceGroupNames.contains(className)) {
318            StringBuilder elementName = new StringBuilder();
319            elementName.append(messageName);
320            elementName.append('.');
321            elementName.append(className);
322            ret = elementName.toString();
323        } else if (className.length() == 4) {
324            // It is not clear why this case is needed.. We should figure out
325                // why it was added, since removing it or optimizing its use would
326                // prevent the need for "ourForGroupNames" above
327                ret = className.substring(0,3);
328        } else {
329            ret = className;
330        }
331        
332        return ret;
333    }
334
335    /** Test harness */
336    public static void main(String args[]) {
337        if (args.length != 1) {
338            System.out.println("Usage: DefaultXMLParser pipe_encoded_file");
339            System.exit(1);
340        }
341
342        //read and parse message from file 
343        try {
344            File messageFile = new File(args[0]);
345            long fileLength = messageFile.length();
346            FileReader r = new FileReader(messageFile);
347            char[] cbuf = new char[(int) fileLength];
348            System.out.println("Reading message file ... " + r.read(cbuf) + " of " + fileLength + " chars");
349            r.close();
350            String messString = String.valueOf(cbuf);
351
352            Parser inParser = null;
353            Parser outParser = null;
354            PipeParser pp = new PipeParser();
355            ca.uhn.hl7v2.parser.XMLParser xp = new DefaultXMLParser();
356            System.out.println("Encoding: " + pp.getEncoding(messString));
357            if (pp.getEncoding(messString) != null) {
358                inParser = pp;
359                outParser = xp;
360            }
361            else if (xp.getEncoding(messString) != null) {
362                inParser = xp;
363                outParser = pp;
364            }
365
366            Message mess = inParser.parse(messString);
367            System.out.println("Got message of type " + mess.getClass().getName());
368
369            String otherEncoding = outParser.encode(mess);
370            System.out.println(otherEncoding);
371        }
372        catch (Exception e) {
373            e.printStackTrace();
374        }
375    }
376
377    /**
378     * {@inheritDoc}
379     */
380        @Override
381        public void parse(Message theMessage, String theString) throws HL7Exception {
382                Document doc = parseStringIntoDocument(theString);
383        parse(theMessage, doc.getDocumentElement());
384
385        applySuperStructureName(theMessage);
386        }
387
388    /**
389     * Convenience factory method which returns an instance that has a 
390     * {@link NoValidation NoValidation validation context}. 
391     */
392    public static DefaultXMLParser getInstanceWithNoValidation() {
393        DefaultXMLParser retVal = new DefaultXMLParser();
394        retVal.setValidationContext(ValidationContextFactory.noValidation());
395        return retVal;
396    }
397
398
399}