001package ca.uhn.hl7v2.parser; 002 003import java.util.Collections; 004import java.util.HashSet; 005import java.util.Set; 006 007import ca.uhn.hl7v2.HapiContext; 008import ca.uhn.hl7v2.model.GenericMessage; 009import ca.uhn.hl7v2.model.Varies; 010import ca.uhn.hl7v2.util.Terser; 011import ca.uhn.hl7v2.util.idgenerator.FileBasedHiLoGenerator; 012import ca.uhn.hl7v2.util.idgenerator.IDGenerator; 013import ca.uhn.hl7v2.validation.ValidationContext; 014 015/** 016 * Contains 017 * 018 * @see HapiContext#getParserConfiguration() 019 * 020 */ 021public class ParserConfiguration { 022 023 /** 024 * @link {@link UnexpectedSegmentBehaviourEnum#ADD_INLINE} 025 */ 026 // NB if you change the default, edit the javadoc for the enum itself 027 public static final UnexpectedSegmentBehaviourEnum DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR = UnexpectedSegmentBehaviourEnum.ADD_INLINE; 028 029 private boolean allowUnknownVersions; 030 private boolean escapeSubcomponentDelimiterInPrimitive = false; 031 private IDGenerator idGenerator = new FileBasedHiLoGenerator(); 032 private String myDefaultObx2Type; 033 private boolean myEncodeEmptyMandatorySegments = true; 034 private Set<String> myForcedEncode = new HashSet<String>(); 035 private String myInvalidObx2Type; 036 private UnexpectedSegmentBehaviourEnum myUnexpectedSegmentBehaviour; 037 private boolean nonGreedyMode = false; 038 private boolean prettyPrintWhenEncodingXml = true; 039 private boolean validating = true; 040 041 /** 042 * <p> 043 * Forces the parser to encode certain segments/fields, even if they contain 044 * no content. This method may be called multiple times with multiple path 045 * definitions, and each path definition contains the path to the segment or 046 * field which needs to be forced. 047 * </p> 048 * <p> 049 * Path definitions are similar in format to {@link Terser Terser} paths. 050 * They contain a slash-separated lookup path to reach a given segment, and 051 * optionally a field number. The following are examples of paths which 052 * could be added here, as well as the sample output for an otherwise empty 053 * ORU^R01 message: 054 * </p> 055 * <table cellpadding="2" cellspacing="2" border="0"> 056 * <thead> 057 * <tr> 058 * <th style="background: #FFA0FF;">Forced Encode Path</th> 059 * <th style="background: #FFA0FF;">Encode Output</th> 060 * </tr> 061 * </thead> 062 * <tr> 063 * <td>None (for illustration purposes)</td> 064 * <td style=" font-family: monospace;"> 065 * MSH|^~\&|||||||ORU^R01^ORU_R01||T|2.4</td> 066 * </tr> 067 * <tr> 068 * <td style="background: #E0E0E0;">PATIENT_RESULT/ORDER_OBSERVATION/ORC</td> 069 * <td style="background: #E0E0E0; font-family: monospace;"> 070 * MSH|^~\&|||||||ORU^R01^ORU_R01||T|2.4<br> 071 * ORC|</td> 072 * </tr> 073 * <tr> 074 * <td>PATIENT_RESULT/ORDER_OBSERVATION/ORC-4</td> 075 * <td style=" font-family: monospace;"> 076 * MSH|^~\&|||||||ORU^R01^ORU_R01||T|2.4<br> 077 * ORC||||</td> 078 * </tr> 079 * <tr> 080 * <td style="background: #E0E0E0;">PATIENT_RESULT/ORDER_OBSERVATION/ORC-4-2 081 * </td> 082 * <td style="background: #E0E0E0; font-family: monospace;"> 083 * MSH|^~\&|||||||ORU^R01^ORU_R01||T|2.4<br> 084 * ORC||||^</td> 085 * </tr> 086 * </table> 087 * <p> 088 * While empty segments do not generally have any meaning according to HL7, 089 * this may be useful when transmitting to systems which rely on segments 090 * being received even if they have no content. 091 * </p> 092 * <p> 093 * Note that this configuration item currently only applies to 094 * {@link PipeParser} 095 * </p> 096 * 097 * @param theForcedEncode path definition 098 * @since 2.0 099 */ 100 public void addForcedEncode(String theForcedEncode) { 101 if (theForcedEncode == null) { 102 throw new NullPointerException("forced encode may not be null"); 103 } 104 105 int lastSlashIndex = theForcedEncode.lastIndexOf('/'); 106 lastSlashIndex = Math.max(lastSlashIndex, 0); 107 108 if (lastSlashIndex == 0) { 109 if (!theForcedEncode.matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) { 110 throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2"); 111 } 112 } else { 113 if (lastSlashIndex == theForcedEncode.length() || !theForcedEncode.substring(lastSlashIndex + 1).matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) { 114 throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2"); 115 } 116 } 117 myForcedEncode.add(theForcedEncode); 118 } 119 120 boolean determineForcedEncodeIncludesTerserPath(String theTerserPath) { 121 for (String next : getForcedEncode()) { 122 if (next.startsWith(theTerserPath)) { 123 return true; 124 } 125 } 126 return false; 127 } 128 129 int determineForcedFieldNumForTerserPath(String theCurrentTerserPath) { 130 int forceUpToFieldNum = 0; 131 for (String nextPath : getForcedEncode()) { 132 if (nextPath.startsWith(theCurrentTerserPath) && nextPath.length() > theCurrentTerserPath.length()) { 133 int endOfFieldDef = nextPath.indexOf('-', theCurrentTerserPath.length() + 1); 134 if (endOfFieldDef == -1) { 135 endOfFieldDef = nextPath.length(); 136 } 137 String fieldNumString = nextPath.substring(theCurrentTerserPath.length() + 1, endOfFieldDef); 138 forceUpToFieldNum = Math.max(forceUpToFieldNum, Integer.parseInt(fieldNumString)); 139 } 140 } 141 return forceUpToFieldNum; 142 } 143 144 /** 145 * Returns the default datatype ("ST", "NM", etc) for an OBX segment with a 146 * missing OBX-2 value 147 * 148 * @return Returns the default datatype ("ST", "NM", etc) for an OBX segment 149 * with a missing OBX-2 value 150 * @see #setDefaultObx2Type(String) 151 */ 152 public String getDefaultObx2Type() { 153 return myDefaultObx2Type; 154 } 155 156 /** 157 * @return Returns the forced encode strings added by 158 * {@link #addForcedEncode(String)} 159 * 160 * @see #addForcedEncode(String) 161 * @since 1.3 162 */ 163 public Set<String> getForcedEncode() { 164 return Collections.unmodifiableSet(myForcedEncode); 165 } 166 167 /** 168 * @return the ID Generator to be used for generating IDs for new messages 169 */ 170 public IDGenerator getIdGenerator() { 171 return idGenerator; 172 } 173 174 /** 175 * Returns the value provides a default datatype ("ST", "NM", etc) for an 176 * OBX segment with an invalid OBX-2 value. 177 * 178 * @return Returns the value provides a default datatype ("ST", "NM", etc) 179 * for an OBX segment with an invalid OBX-2 value. 180 * @see #setInvalidObx2Type(String) 181 */ 182 public String getInvalidObx2Type() { 183 return myInvalidObx2Type; 184 } 185 186 /** 187 * Returns the behaviour to use when parsing a message and a nonstandard 188 * segment is found. Default is 189 * {@link #DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR} 190 * 191 * @return the behaviour to use when a nonstandard egment is found 192 */ 193 public UnexpectedSegmentBehaviourEnum getUnexpectedSegmentBehaviour() { 194 if (myUnexpectedSegmentBehaviour == null) { 195 myUnexpectedSegmentBehaviour = DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR; 196 } 197 return myUnexpectedSegmentBehaviour; 198 } 199 200 /** 201 * If set to <code>true</code> (default is <code>false</code>) the parser 202 * will allow messages to parse, even if they contain a version which is not 203 * known to the parser. When operating in this mode, if a message arrives 204 * with an unknown version string, the parser will attempt to parse it using 205 * a {@link GenericMessage Generic Message} class instead of a specific HAPI 206 * structure class. Default is <code>false</code>. 207 * 208 * @return true if parsing messages with unknown versions is allowed 209 */ 210 public boolean isAllowUnknownVersions() { 211 return this.allowUnknownVersions; 212 } 213 214 /** 215 * Returns <code>true</code> if empty segments should still be encoded 216 * if they are mandatory within their message structure. Default is <code>false</code>. 217 * @return <code>true</code> if empty segments should still be encoded 218 * 219 * @see #setEncodeEmptyMandatoryFirstSegments(boolean) 220 */ 221 public boolean isEncodeEmptyMandatorySegments() { 222 return myEncodeEmptyMandatorySegments; 223 } 224 225 /** 226 * Returns code>true</code> if subcomponent delimiters in OBX-5 shall be 227 * ignored. Default is <code>false</code>. 228 * @return <code>true</code> if subcomponent delimiters in OBX-5 shall be 229 * ignored 230 */ 231 public boolean isEscapeSubcomponentDelimiterInPrimitive() { 232 return escapeSubcomponentDelimiterInPrimitive; 233 } 234 235 /** 236 * Returns <code>true</code> if the parser should parse in non-greedy mode. Default 237 * is <code>false</code> 238 * 239 * @see #setNonGreedyMode(boolean) for an explanation of non-greedy mode 240 */ 241 public boolean isNonGreedyMode() { 242 return nonGreedyMode; 243 } 244 245 /** 246 * If set to <code>true</code> (which is the default), {@link XMLParser XML Parsers} 247 * will attempt to pretty-print the XML they generate. This means the messages will look 248 * nicer to humans, but may take up slightly more space/bandwidth. 249 */ 250 public boolean isPrettyPrintWhenEncodingXml() { 251 return prettyPrintWhenEncodingXml; 252 } 253 254 /** 255 * Returns <code>true</code> if the parser validates using a configured 256 * {@link ValidationContext}. Default is <code>true</code>. 257 * @return <code>true</code> if the parser validates using a configured 258 * {@link ValidationContext} 259 */ 260 public boolean isValidating() { 261 return validating; 262 } 263 264 /** 265 * Removes a forced encode entry 266 * 267 * @param theForcedEncode path definition to be removed 268 * @see #addForcedEncode(String) 269 * @since 1.3 270 */ 271 public void removeForcedEncode(String theForcedEncode) { 272 if (theForcedEncode == null) { 273 throw new NullPointerException("forced encode may not be null"); 274 } 275 276 myForcedEncode.remove(theForcedEncode); 277 } 278 279 /** 280 * If set to <code>true</code> (default is <code>false</code>) the parser 281 * will allow messages to parse, even if they contain a version which is not 282 * known to the parser. When operating in this mode, if a message arrives 283 * with an unknown version string, the parser will attempt to parse it using 284 * a {@link GenericMessage Generic Message} class instead of a specific HAPI 285 * structure class. 286 * 287 * @param theAllowUnknownVersions true if parsing unknown versions shall be allowed 288 */ 289 public void setAllowUnknownVersions(boolean theAllowUnknownVersions) { 290 allowUnknownVersions = theAllowUnknownVersions; 291 } 292 293 /** 294 * <p> 295 * If this property is set, the value provides a default datatype ("ST", 296 * "NM", etc) for an OBX segment with a missing OBX-2 value. This is useful 297 * when parsing messages from systems which do not correctly populate OBX-2. 298 * </p> 299 * <p> 300 * For example, if this property is set to "ST", and the following OBX 301 * segment is encountered: 302 * 303 * <pre> 304 * OBX|||||This is a value 305 * </pre> 306 * 307 * It will be parsed as though it had read: 308 * 309 * <pre> 310 * OBX||ST|||This is a value 311 * </pre> 312 * 313 * </p> 314 * <p> 315 * Note that this configuration can also be set globally using the system 316 * property {@link Varies#DEFAULT_OBX2_TYPE_PROP}, but any value provided to 317 * {@link ParserConfiguration} takes priority over the system property. 318 * </p> 319 * 320 * @param theDefaultObx2Type 321 * If this property is set, the value provides a default datatype 322 * ("ST", "NM", etc) for an OBX segment with a missing OBX-2 323 * value 324 * @see #setInvalidObx2Type(String) 325 * @see Varies#INVALID_OBX2_TYPE_PROP 326 */ 327 public void setDefaultObx2Type(String theDefaultObx2Type) { 328 myDefaultObx2Type = theDefaultObx2Type; 329 } 330 331 /** 332 * <p> 333 * If set to <code>true</code> (default is <code>true</code>), when encoding 334 * a group using the PipeParser where the first segment is required, but no 335 * data has been populated in that segment, the empty segment is now still 336 * encoded if needed as a blank segment in order to give parsers a hint 337 * about which group subsequent segments are in. This helps to ensure that 338 * messages can be "round tripped", meaning that a message which is parsed, 339 * encoded, and then re-parsed should contain exactly the same structure 340 * from beginning to end. 341 * </p> 342 * <p> 343 * </p> 344 * For example, in an ORU^R01 message with a populated OBX segment, but no 345 * data in the mandatory OBR segment which begins the ORDER_OBSERVATION 346 * group the message would still contain an empty OBR segment when encoded: 347 * 348 * <pre> 349 * MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5 350 * OBR| 351 * OBX||ST|||Value Data 352 * </pre> 353 * 354 * Previously, the following encoding would have occurred, which would have 355 * incorrectly been parsed as having a custom OBX segment instead of having 356 * a normal ORDER_OBSERVATION group: 357 * 358 * <pre> 359 * MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5 360 * OBX||ST|||Value Data 361 * </pre> 362 * 363 * @param theEncodeEmptyMandatorySegments 364 * If set to <code>true</code> (default is <code>true</code>), 365 * when encoding a group using the PipeParser where the first 366 * segment is required, but no data has been populated in that 367 * segment, the empty segment is now still encoded if needed as a 368 * blank segment in order to give parsers a hint about which 369 * group subsequent segments are in 370 */ 371 public void setEncodeEmptyMandatoryFirstSegments(boolean theEncodeEmptyMandatorySegments) { 372 myEncodeEmptyMandatorySegments = theEncodeEmptyMandatorySegments; 373 } 374 375 /** 376 * Set to <code>true</code> if subcomponent delimiters in OBX-5 shall be 377 * ignored 378 * @param escapeSubcomponentDelimiterInPrimitive boolean flag to enable or disable this behavior 379 */ 380 public void setEscapeSubcomponentDelimiterInPrimitive(boolean escapeSubcomponentDelimiterInPrimitive) { 381 this.escapeSubcomponentDelimiterInPrimitive = escapeSubcomponentDelimiterInPrimitive; 382 } 383 384 /** 385 * @param idGenerator 386 * the {@link IDGenerator} to be used for generating IDs for new 387 * messages, preferable initialized using the methods described 388 * in IDGeneratorFactory. 389 * 390 * @see IDGenerator 391 */ 392 public void setIdGenerator(IDGenerator idGenerator) { 393 this.idGenerator = idGenerator; 394 } 395 396 /** 397 * <p> 398 * If this property is set, the value provides a default datatype ("ST", 399 * "NM", etc) for an OBX segment with an invalid OBX-2 value. This is useful 400 * when parsing messages from systems which do not correctly populate OBX-2. 401 * </p> 402 * <p> 403 * For example, if this property is set to "ST", and the following OBX 404 * segment is encountered: 405 * 406 * <pre> 407 * OBX||INVALID|||This is a value 408 * </pre> 409 * 410 * It will be parsed as though it had read: 411 * 412 * <pre> 413 * OBX||ST|||This is a value 414 * </pre> 415 * 416 * </p> 417 * <p> 418 * Note that this configuration can also be set globally using the system 419 * property {@link Varies#INVALID_OBX2_TYPE_PROP}, but any value provided to 420 * {@link ParserConfiguration} takes priority over the system property. 421 * </p> 422 * 423 * @param theInvalidObx2Type 424 * If this property is set, the value provides a default datatype 425 * ("ST", "NM", etc) for an OBX segment with an invalid OBX-2 426 * value. This is useful when parsing messages from systems which 427 * do not correctly populate OBX-2. 428 * @see ParserConfiguration#setDefaultObx2Type(String) 429 * @see Varies#DEFAULT_OBX2_TYPE_PROP 430 */ 431 public void setInvalidObx2Type(String theInvalidObx2Type) { 432 myInvalidObx2Type = theInvalidObx2Type; 433 } 434 435 /** 436 * If set to <code>true</code> (default is <code>false</code>), pipe parser will be 437 * put in non-greedy mode. This setting applies only to {@link PipeParser Pipe Parsers} and 438 * will have no effect on {@link XMLParser XML Parsers}. 439 * 440 * <p> 441 * In non-greedy mode, if the message structure being parsed has an ambiguous 442 * choice of where to put a segment because there is a segment matching the 443 * current segment name in both a later position in the message, and 444 * in an earlier position as a part of a repeating group, the earlier 445 * position will be chosen. 446 * </p> 447 * <p> 448 * This is perhaps best explained with an example. Consider the following structure: 449 * </p> 450 * <pre> 451 * MSH 452 * GROUP_1 (start) 453 * { 454 * AAA 455 * BBB 456 * GROUP_2 (start) 457 * { 458 * AAA 459 * } 460 * GROUP_2 (end) 461 * } 462 * GROUP_1 (end) 463 * </pre> 464 * <p> 465 * </p> 466 * For the above example, consider a message containing the following segments:<br/> 467 * <code>MSH<br/> 468 * AAA<br/> 469 * BBB<br/> 470 * AAA</code> 471 * </p> 472 * <p> 473 * In this example, when the second AAA segment is encountered, there are two 474 * possible choices. It would be placed in GROUP_2, or it could be placed in 475 * a second repetition of GROUP_1. By default it will be placed in GROUP_2, but 476 * in non-greedy mode it will be put in a new repetition of GROUP_1. 477 * </p> 478 * <p> 479 * This mode is useful for example when parsing OML^O21 messages containing 480 * multiple orders. 481 * </p> 482 */ 483 public void setNonGreedyMode(boolean theNonGreedyMode) { 484 nonGreedyMode = theNonGreedyMode; 485 } 486 487 /** 488 * If set to <code>true</code> (which is the default), {@link XMLParser XML Parsers} 489 * will attempt to pretty-print the XML they generate. This means the messages will look 490 * nicer to humans, but may take up slightly more space/bandwidth. 491 */ 492 public void setPrettyPrintWhenEncodingXml(boolean thePrettyPrintWhenEncodingXml) { 493 prettyPrintWhenEncodingXml = thePrettyPrintWhenEncodingXml; 494 } 495 496 /** 497 * Sets the behaviour to use when parsing a message and a nonstandard 498 * segment is found 499 * 500 * @param theUnexpectedSegmentBehaviour behaviour to use when a nonstandard segment is found 501 */ 502 public void setUnexpectedSegmentBehaviour(UnexpectedSegmentBehaviourEnum theUnexpectedSegmentBehaviour) { 503 if (theUnexpectedSegmentBehaviour == null) { 504 throw new NullPointerException("UnexpectedSegmentBehaviour can not be null"); 505 } 506 myUnexpectedSegmentBehaviour = theUnexpectedSegmentBehaviour; 507 } 508 509 /** 510 * Determines whether the parser validates using a configured 511 * {@link ValidationContext} or not. This allows to disable message 512 * validation although a validation context is defined. 513 * 514 * @param validating 515 * <code>true</code> if parser shall validate, <code>false</code> 516 * if not 517 */ 518 public void setValidating(boolean validating) { 519 this.validating = validating; 520 } 521 522}