001package ca.uhn.hl7v2.parser;
002
003import java.util.Collections;
004import java.util.HashSet;
005import java.util.Set;
006
007import ca.uhn.hl7v2.HapiContext;
008import ca.uhn.hl7v2.model.GenericMessage;
009import ca.uhn.hl7v2.model.Varies;
010import ca.uhn.hl7v2.util.Terser;
011import ca.uhn.hl7v2.util.idgenerator.FileBasedHiLoGenerator;
012import ca.uhn.hl7v2.util.idgenerator.IDGenerator;
013import ca.uhn.hl7v2.validation.ValidationContext;
014
015/**
016 * Contains
017 * 
018 * @see HapiContext#getParserConfiguration()
019 * 
020 */
021public class ParserConfiguration {
022
023        /**
024         * @link {@link UnexpectedSegmentBehaviourEnum#ADD_INLINE}
025         */
026        // NB if you change the default, edit the javadoc for the enum itself
027        public static final UnexpectedSegmentBehaviourEnum DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR = UnexpectedSegmentBehaviourEnum.ADD_INLINE;
028
029        private boolean allowUnknownVersions;
030        private boolean escapeSubcomponentDelimiterInPrimitive = false;
031        private IDGenerator idGenerator = new FileBasedHiLoGenerator();
032        private String myDefaultObx2Type;
033        private boolean myEncodeEmptyMandatorySegments = true;
034        private Set<String> myForcedEncode = new HashSet<String>();
035        private String myInvalidObx2Type;
036        private UnexpectedSegmentBehaviourEnum myUnexpectedSegmentBehaviour;
037        private boolean nonGreedyMode = false;
038        private boolean prettyPrintWhenEncodingXml = true;
039        private boolean validating = true;
040
041        /**
042         * <p>
043         * Forces the parser to encode certain segments/fields, even if they contain
044         * no content. This method may be called multiple times with multiple path
045         * definitions, and each path definition contains the path to the segment or
046         * field which needs to be forced.
047         * </p>
048         * <p>
049         * Path definitions are similar in format to {@link Terser Terser} paths.
050         * They contain a slash-separated lookup path to reach a given segment, and
051         * optionally a field number. The following are examples of paths which
052         * could be added here, as well as the sample output for an otherwise empty
053         * ORU^R01 message:
054         * </p>
055         * <table cellpadding="2" cellspacing="2" border="0">
056         * <thead>
057         * <tr>
058         * <th style="background: #FFA0FF;">Forced Encode Path</th>
059         * <th style="background: #FFA0FF;">Encode Output</th>
060         * </tr>
061         * </thead>
062         * <tr>
063         * <td>None (for illustration purposes)</td>
064         * <td style=" font-family: monospace;">
065         * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4</td>
066         * </tr>
067         * <tr>
068         * <td style="background: #E0E0E0;">PATIENT_RESULT/ORDER_OBSERVATION/ORC</td>
069         * <td style="background: #E0E0E0; font-family: monospace;">
070         * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
071         * ORC|</td>
072         * </tr>
073         * <tr>
074         * <td>PATIENT_RESULT/ORDER_OBSERVATION/ORC-4</td>
075         * <td style=" font-family: monospace;">
076         * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
077         * ORC||||</td>
078         * </tr>
079         * <tr>
080         * <td style="background: #E0E0E0;">PATIENT_RESULT/ORDER_OBSERVATION/ORC-4-2
081         * </td>
082         * <td style="background: #E0E0E0; font-family: monospace;">
083         * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
084         * ORC||||^</td>
085         * </tr>
086         * </table>
087         * <p>
088         * While empty segments do not generally have any meaning according to HL7,
089         * this may be useful when transmitting to systems which rely on segments
090         * being received even if they have no content.
091         * </p>
092         * <p>
093         * Note that this configuration item currently only applies to
094         * {@link PipeParser}
095         * </p>
096         *
097     * @param theForcedEncode path definition
098         * @since 2.0
099         */
100        public void addForcedEncode(String theForcedEncode) {
101                if (theForcedEncode == null) {
102                        throw new NullPointerException("forced encode may not be null");
103                }
104
105                int lastSlashIndex = theForcedEncode.lastIndexOf('/');
106                lastSlashIndex = Math.max(lastSlashIndex, 0);
107
108                if (lastSlashIndex == 0) {
109                        if (!theForcedEncode.matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) {
110                                throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2");
111                        }
112                } else {
113                        if (lastSlashIndex == theForcedEncode.length() || !theForcedEncode.substring(lastSlashIndex + 1).matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) {
114                                throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2");
115                        }
116                }
117                myForcedEncode.add(theForcedEncode);
118        }
119
120        boolean determineForcedEncodeIncludesTerserPath(String theTerserPath) {
121                for (String next : getForcedEncode()) {
122                        if (next.startsWith(theTerserPath)) {
123                                return true;
124                        }
125                }
126                return false;
127        }
128
129        int determineForcedFieldNumForTerserPath(String theCurrentTerserPath) {
130                int forceUpToFieldNum = 0;
131                for (String nextPath : getForcedEncode()) {
132                        if (nextPath.startsWith(theCurrentTerserPath) && nextPath.length() > theCurrentTerserPath.length()) {
133                                int endOfFieldDef = nextPath.indexOf('-', theCurrentTerserPath.length() + 1);
134                                if (endOfFieldDef == -1) {
135                                        endOfFieldDef = nextPath.length();
136                                }
137                                String fieldNumString = nextPath.substring(theCurrentTerserPath.length() + 1, endOfFieldDef);
138                                forceUpToFieldNum = Math.max(forceUpToFieldNum, Integer.parseInt(fieldNumString));
139                        }
140                }
141                return forceUpToFieldNum;
142        }
143
144        /**
145         * Returns the default datatype ("ST", "NM", etc) for an OBX segment with a
146         * missing OBX-2 value
147         * 
148         * @return Returns the default datatype ("ST", "NM", etc) for an OBX segment
149         *         with a missing OBX-2 value
150         * @see #setDefaultObx2Type(String)
151         */
152        public String getDefaultObx2Type() {
153                return myDefaultObx2Type;
154        }
155
156        /**
157         * @return Returns the forced encode strings added by
158         *         {@link #addForcedEncode(String)}
159         * 
160         * @see #addForcedEncode(String)
161         * @since 1.3
162         */
163        public Set<String> getForcedEncode() {
164                return Collections.unmodifiableSet(myForcedEncode);
165        }
166
167        /**
168         * @return the ID Generator to be used for generating IDs for new messages
169         */
170        public IDGenerator getIdGenerator() {
171                return idGenerator;
172        }
173
174        /**
175         * Returns the value provides a default datatype ("ST", "NM", etc) for an
176         * OBX segment with an invalid OBX-2 value.
177         * 
178         * @return Returns the value provides a default datatype ("ST", "NM", etc)
179         *         for an OBX segment with an invalid OBX-2 value.
180         * @see #setInvalidObx2Type(String)
181         */
182        public String getInvalidObx2Type() {
183                return myInvalidObx2Type;
184        }
185
186        /**
187         * Returns the behaviour to use when parsing a message and a nonstandard
188         * segment is found. Default is
189         * {@link #DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR}
190     *
191     * @return the behaviour to use when a nonstandard egment is found
192         */
193        public UnexpectedSegmentBehaviourEnum getUnexpectedSegmentBehaviour() {
194                if (myUnexpectedSegmentBehaviour == null) {
195                        myUnexpectedSegmentBehaviour = DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR;
196                }
197                return myUnexpectedSegmentBehaviour;
198        }
199
200        /**
201         * If set to <code>true</code> (default is <code>false</code>) the parser
202         * will allow messages to parse, even if they contain a version which is not
203         * known to the parser. When operating in this mode, if a message arrives
204         * with an unknown version string, the parser will attempt to parse it using
205         * a {@link GenericMessage Generic Message} class instead of a specific HAPI
206         * structure class. Default is <code>false</code>.
207     *
208     * @return true if parsing messages with unknown versions is allowed
209         */
210        public boolean isAllowUnknownVersions() {
211                return this.allowUnknownVersions;
212        }
213
214        /**
215     * Returns <code>true</code> if empty segments should still be encoded
216     * if they are mandatory within their message structure.  Default is <code>false</code>.
217         * @return <code>true</code> if empty segments should still be encoded
218     *
219         * @see #setEncodeEmptyMandatoryFirstSegments(boolean)
220         */
221        public boolean isEncodeEmptyMandatorySegments() {
222                return myEncodeEmptyMandatorySegments;
223        }
224
225        /**
226     * Returns code>true</code> if subcomponent delimiters in OBX-5 shall be
227     *         ignored. Default is <code>false</code>.
228         * @return <code>true</code> if subcomponent delimiters in OBX-5 shall be
229         *         ignored
230         */
231        public boolean isEscapeSubcomponentDelimiterInPrimitive() {
232                return escapeSubcomponentDelimiterInPrimitive;
233        }
234
235        /**
236         * Returns <code>true</code> if the parser should parse in non-greedy mode. Default
237         * is <code>false</code>
238         * 
239         * @see #setNonGreedyMode(boolean) for an explanation of non-greedy mode
240         */
241        public boolean isNonGreedyMode() {
242                return nonGreedyMode;
243        }
244
245        /**
246         * If set to <code>true</code> (which is the default), {@link XMLParser XML Parsers}
247         * will attempt to pretty-print the XML they generate. This means the messages will look
248         * nicer to humans, but may take up slightly more space/bandwidth.
249         */
250        public boolean isPrettyPrintWhenEncodingXml() {
251                return prettyPrintWhenEncodingXml;
252        }
253
254        /**
255     * Returns <code>true</code> if the parser validates using a configured
256     *         {@link ValidationContext}. Default is <code>true</code>.
257         * @return <code>true</code> if the parser validates using a configured
258         *         {@link ValidationContext}
259         */
260        public boolean isValidating() {
261                return validating;
262        }
263
264        /**
265         * Removes a forced encode entry
266         *
267     * @param theForcedEncode path definition to be removed
268         * @see #addForcedEncode(String)
269         * @since 1.3
270         */
271        public void removeForcedEncode(String theForcedEncode) {
272                if (theForcedEncode == null) {
273                        throw new NullPointerException("forced encode may not be null");
274                }
275
276                myForcedEncode.remove(theForcedEncode);
277        }
278
279        /**
280         * If set to <code>true</code> (default is <code>false</code>) the parser
281         * will allow messages to parse, even if they contain a version which is not
282         * known to the parser. When operating in this mode, if a message arrives
283         * with an unknown version string, the parser will attempt to parse it using
284         * a {@link GenericMessage Generic Message} class instead of a specific HAPI
285         * structure class.
286     *
287     * @param theAllowUnknownVersions true if parsing unknown versions shall be allowed
288         */
289        public void setAllowUnknownVersions(boolean theAllowUnknownVersions) {
290                allowUnknownVersions = theAllowUnknownVersions;
291        }
292
293        /**
294         * <p>
295         * If this property is set, the value provides a default datatype ("ST",
296         * "NM", etc) for an OBX segment with a missing OBX-2 value. This is useful
297         * when parsing messages from systems which do not correctly populate OBX-2.
298         * </p>
299         * <p>
300         * For example, if this property is set to "ST", and the following OBX
301         * segment is encountered:
302         * 
303         * <pre>
304         * OBX|||||This is a value
305         * </pre>
306         * 
307         * It will be parsed as though it had read:
308         * 
309         * <pre>
310         * OBX||ST|||This is a value
311         * </pre>
312         * 
313         * </p>
314         * <p>
315         * Note that this configuration can also be set globally using the system
316         * property {@link Varies#DEFAULT_OBX2_TYPE_PROP}, but any value provided to
317         * {@link ParserConfiguration} takes priority over the system property.
318         * </p>
319         * 
320         * @param theDefaultObx2Type
321         *            If this property is set, the value provides a default datatype
322         *            ("ST", "NM", etc) for an OBX segment with a missing OBX-2
323         *            value
324         * @see #setInvalidObx2Type(String)
325         * @see Varies#INVALID_OBX2_TYPE_PROP
326         */
327        public void setDefaultObx2Type(String theDefaultObx2Type) {
328                myDefaultObx2Type = theDefaultObx2Type;
329        }
330
331        /**
332         * <p>
333         * If set to <code>true</code> (default is <code>true</code>), when encoding
334         * a group using the PipeParser where the first segment is required, but no
335         * data has been populated in that segment, the empty segment is now still
336         * encoded if needed as a blank segment in order to give parsers a hint
337         * about which group subsequent segments are in. This helps to ensure that
338         * messages can be "round tripped", meaning that a message which is parsed,
339         * encoded, and then re-parsed should contain exactly the same structure
340         * from beginning to end.
341         * </p>
342         * <p>
343         * </p>
344         * For example, in an ORU^R01 message with a populated OBX segment, but no
345         * data in the mandatory OBR segment which begins the ORDER_OBSERVATION
346         * group the message would still contain an empty OBR segment when encoded:
347         * 
348         * <pre>
349         *      MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5
350         *      OBR|
351         *      OBX||ST|||Value Data
352         * </pre>
353         * 
354         * Previously, the following encoding would have occurred, which would have
355         * incorrectly been parsed as having a custom OBX segment instead of having
356         * a normal ORDER_OBSERVATION group:
357         * 
358         * <pre>
359         *      MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5
360         *      OBX||ST|||Value Data
361         * </pre>
362         * 
363         * @param theEncodeEmptyMandatorySegments
364         *            If set to <code>true</code> (default is <code>true</code>),
365         *            when encoding a group using the PipeParser where the first
366         *            segment is required, but no data has been populated in that
367         *            segment, the empty segment is now still encoded if needed as a
368         *            blank segment in order to give parsers a hint about which
369         *            group subsequent segments are in
370         */
371        public void setEncodeEmptyMandatoryFirstSegments(boolean theEncodeEmptyMandatorySegments) {
372                myEncodeEmptyMandatorySegments = theEncodeEmptyMandatorySegments;
373        }
374
375        /**
376         * Set to <code>true</code> if subcomponent delimiters in OBX-5 shall be
377         * ignored
378     * @param escapeSubcomponentDelimiterInPrimitive boolean flag to enable or disable this behavior
379         */
380        public void setEscapeSubcomponentDelimiterInPrimitive(boolean escapeSubcomponentDelimiterInPrimitive) {
381                this.escapeSubcomponentDelimiterInPrimitive = escapeSubcomponentDelimiterInPrimitive;
382        }
383
384        /**
385         * @param idGenerator
386         *            the {@link IDGenerator} to be used for generating IDs for new
387         *            messages, preferable initialized using the methods described
388         *            in IDGeneratorFactory.
389         * 
390         * @see IDGenerator
391         */
392        public void setIdGenerator(IDGenerator idGenerator) {
393                this.idGenerator = idGenerator;
394        }
395
396        /**
397         * <p>
398         * If this property is set, the value provides a default datatype ("ST",
399         * "NM", etc) for an OBX segment with an invalid OBX-2 value. This is useful
400         * when parsing messages from systems which do not correctly populate OBX-2.
401         * </p>
402         * <p>
403         * For example, if this property is set to "ST", and the following OBX
404         * segment is encountered:
405         * 
406         * <pre>
407         * OBX||INVALID|||This is a value
408         * </pre>
409         * 
410         * It will be parsed as though it had read:
411         * 
412         * <pre>
413         * OBX||ST|||This is a value
414         * </pre>
415         * 
416         * </p>
417         * <p>
418         * Note that this configuration can also be set globally using the system
419         * property {@link Varies#INVALID_OBX2_TYPE_PROP}, but any value provided to
420         * {@link ParserConfiguration} takes priority over the system property.
421         * </p>
422         * 
423         * @param theInvalidObx2Type
424         *            If this property is set, the value provides a default datatype
425         *            ("ST", "NM", etc) for an OBX segment with an invalid OBX-2
426         *            value. This is useful when parsing messages from systems which
427         *            do not correctly populate OBX-2.
428         * @see ParserConfiguration#setDefaultObx2Type(String)
429         * @see Varies#DEFAULT_OBX2_TYPE_PROP
430         */
431        public void setInvalidObx2Type(String theInvalidObx2Type) {
432                myInvalidObx2Type = theInvalidObx2Type;
433        }
434
435        /**
436         * If set to <code>true</code> (default is <code>false</code>), pipe parser will be
437         * put in non-greedy mode. This setting applies only to {@link PipeParser Pipe Parsers} and
438         * will have no effect on {@link XMLParser XML Parsers}.
439         * 
440         * <p>
441         * In non-greedy mode, if the message structure being parsed has an ambiguous
442         * choice of where to put a segment because there is a segment matching the
443         * current segment name in both a later position in the message, and
444         * in an earlier position as a part of a repeating group, the earlier
445         * position will be chosen.
446         * </p>
447         * <p>
448         * This is perhaps best explained with an example. Consider the following structure:
449         * </p>
450         * <pre>
451         * MSH
452         * GROUP_1 (start)
453         * {
454         *    AAA
455         *    BBB
456         *    GROUP_2 (start)
457         *    {
458         *       AAA
459         *    }
460         *    GROUP_2 (end)
461         * }
462         * GROUP_1 (end)
463         * </pre>
464         * <p>
465         * </p>
466         * For the above example, consider a message containing the following segments:<br/>
467         * <code>MSH<br/>
468         * AAA<br/>
469         * BBB<br/>
470         * AAA</code>
471         * </p>
472         * <p>
473         * In this example, when the second AAA segment is encountered, there are two
474         * possible choices. It would be placed in GROUP_2, or it could be placed in 
475         * a second repetition of GROUP_1. By default it will be placed in GROUP_2, but
476         * in non-greedy mode it will be put in a new repetition of GROUP_1.
477         * </p>
478         * <p>
479         * This mode is useful for example when parsing OML^O21 messages containing
480         * multiple orders.
481         * </p>
482         */
483        public void setNonGreedyMode(boolean theNonGreedyMode) {
484                nonGreedyMode = theNonGreedyMode;
485        }
486
487        /**
488         * If set to <code>true</code> (which is the default), {@link XMLParser XML Parsers}
489         * will attempt to pretty-print the XML they generate. This means the messages will look
490         * nicer to humans, but may take up slightly more space/bandwidth.
491         */
492        public void setPrettyPrintWhenEncodingXml(boolean thePrettyPrintWhenEncodingXml) {
493                prettyPrintWhenEncodingXml = thePrettyPrintWhenEncodingXml;
494        }
495
496        /**
497         * Sets the behaviour to use when parsing a message and a nonstandard
498         * segment is found
499     *
500     * @param theUnexpectedSegmentBehaviour behaviour to use when a nonstandard segment is found
501     */
502        public void setUnexpectedSegmentBehaviour(UnexpectedSegmentBehaviourEnum theUnexpectedSegmentBehaviour) {
503                if (theUnexpectedSegmentBehaviour == null) {
504                        throw new NullPointerException("UnexpectedSegmentBehaviour can not be null");
505                }
506                myUnexpectedSegmentBehaviour = theUnexpectedSegmentBehaviour;
507        }
508
509        /**
510         * Determines whether the parser validates using a configured
511         * {@link ValidationContext} or not. This allows to disable message
512         * validation although a validation context is defined.
513         * 
514         * @param validating
515         *            <code>true</code> if parser shall validate, <code>false</code>
516         *            if not
517         */
518        public void setValidating(boolean validating) {
519                this.validating = validating;
520        }
521
522}