001/** 002The contents of this file are subject to the Mozilla Public License Version 1.1 003(the "License"); you may not use this file except in compliance with the License. 004You may obtain a copy of the License at http://www.mozilla.org/MPL/ 005Software distributed under the License is distributed on an "AS IS" basis, 006WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 007specific language governing rights and limitations under the License. 008 009The Original Code is "Escape.java". Description: 010"Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules 011defined in section 2.10 of the standard (version 2.4)" 012 013The Initial Developer of the Original Code is University Health Network. Copyright (C) 0142001. All Rights Reserved. 015 016Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz 017 018Alternatively, the contents of this file may be used under the terms of the 019GNU General Public License (the ?GPL?), in which case the provisions of the GPL are 020applicable instead of those above. If you wish to allow use of your version of this 021file only under the terms of the GPL and not to allow others to use your version 022of this file under the MPL, indicate your decision by deleting the provisions above 023and replace them with the notice and other provisions required by the GPL License. 024If you do not delete the provisions above, a recipient may use your version of 025this file under either the MPL or the GPL. 026 */ 027package ca.uhn.hl7v2.parser; 028 029import java.util.Collections; 030import java.util.LinkedHashMap; 031import java.util.Map; 032 033/** 034 * Handles "escaping" and "unescaping" of text according to the HL7 escape 035 * sequence rules defined in section 2.10 of the standard (version 2.4). 036 * Currently, escape sequences for multiple character sets are unsupported. The 037 * highlighting, hexademical, and locally defined escape sequences are also 038 * unsupported. 039 * 040 * @author Bryan Tripp 041 * @author Mark Lee (Skeva Technologies) 042 * @author Elmar Hinz 043 * @author Christian Ohr 044 */ 045public class Escape { 046 047 /** 048 * limits the size of variousEncChars to 1000, can be overridden by system property. 049 */ 050 private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap<EncodingCharacters, EncLookup>(5, 0.75f, true) { 051 052 private static final long serialVersionUID = 1L; 053 final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000")); 054 055 @Override 056 protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) { 057 return this.size() > maxSize; 058 } 059 }); 060 061 /** Creates a new instance of Escape */ 062 public Escape() { 063 } 064 065 /** 066 * @param text string to be escaped 067 * @param encChars encoding characters to be used 068 * @return the escaped string 069 */ 070 public static String escape(String text, EncodingCharacters encChars) { 071 EncLookup esc = getEscapeSequences(encChars); 072 int textLength = text.length(); 073 074 StringBuilder result = new StringBuilder(textLength); 075 for (int i = 0; i < textLength; i++) { 076 boolean charReplaced = false; 077 char c = text.charAt(i); 078 079 FORENCCHARS: 080 for (int j = 0; j < 6; j++) { 081 if (text.charAt(i) == esc.characters[j]) { 082 083 // Formatting escape sequences such as \.br\ should be left alone 084 if (j == 4) { 085 086 if (i+1 < textLength) { 087 088 // Check for \.br\ 089 char nextChar = text.charAt(i + 1); 090 switch (nextChar) { 091 case '.': 092 case 'C': 093 case 'M': 094 case 'X': 095 case 'Z': 096 { 097 int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1); 098 if (nextEscapeIndex > 0) { 099 result.append(text.substring(i, nextEscapeIndex + 1)); 100 charReplaced = true; 101 i = nextEscapeIndex; 102 break FORENCCHARS; 103 } 104 break; 105 } 106 case 'H': 107 case 'N': 108 { 109 if (i+2 < textLength && text.charAt(i+2) == '\\') { 110 int nextEscapeIndex = i + 2; 111 if (nextEscapeIndex > 0) { 112 result.append(text.substring(i, nextEscapeIndex + 1)); 113 charReplaced = true; 114 i = nextEscapeIndex; 115 break FORENCCHARS; 116 } 117 } 118 break; 119 } 120 } 121 122 } 123 124 } 125 126 result.append(esc.encodings[j]); 127 charReplaced = true; 128 break; 129 } 130 } 131 if (!charReplaced) { 132 result.append(c); 133 } 134 } 135 return result.toString(); 136 } 137 138 /** 139 * @param text string to be unescaped 140 * @param encChars encoding characters to be used 141 * @return the unescaped string 142 */ 143 public static String unescape(String text, EncodingCharacters encChars) { 144 145 // If the escape char isn't found, we don't need to look for escape sequences 146 char escapeChar = encChars.getEscapeCharacter(); 147 boolean foundEscapeChar = false; 148 for (int i = 0; i < text.length(); i++) { 149 if (text.charAt(i) == escapeChar) { 150 foundEscapeChar = true; 151 break; 152 } 153 } 154 if (!foundEscapeChar) { 155 return text; 156 } 157 158 int textLength = text.length(); 159 StringBuilder result = new StringBuilder(textLength + 20); 160 EncLookup esc = getEscapeSequences(encChars); 161 char escape = esc.characters[4]; 162 int encodingsCount = esc.characters.length; 163 int i = 0; 164 while (i < textLength) { 165 char c = text.charAt(i); 166 if (c != escape) { 167 result.append(c); 168 i++; 169 } else { 170 boolean foundEncoding = false; 171 172 // Test against the standard encodings 173 for (int j = 0; j < encodingsCount; j++) { 174 String encoding = esc.encodings[j]; 175 int encodingLength = encoding.length(); 176 if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength) 177 .equals(encoding)) { 178 result.append(esc.characters[j]); 179 i += encodingLength; 180 foundEncoding = true; 181 break; 182 } 183 } 184 185 if (!foundEncoding) { 186 187 // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are 188 // formatting codes. They should be left intact 189 if (i + 1 < textLength) { 190 char nextChar = text.charAt(i + 1); 191 switch (nextChar) { 192 case '.': 193 case 'C': 194 case 'M': 195 case 'X': 196 case 'Z': 197 { 198 int closingEscape = text.indexOf(escape, i + 1); 199 if (closingEscape > 0) { 200 String substring = text.substring(i, closingEscape + 1); 201 result.append(substring); 202 i += substring.length(); 203 } else { 204 i++; 205 } 206 break; 207 } 208 case 'H': 209 case 'N': 210 { 211 int closingEscape = text.indexOf(escape, i + 1); 212 if (closingEscape == i + 2) { 213 String substring = text.substring(i, closingEscape + 1); 214 result.append(substring); 215 i += substring.length(); 216 } else { 217 i++; 218 } 219 break; 220 } 221 default: 222 { 223 i++; 224 } 225 } 226 227 } else { 228 i++; 229 } 230 } 231 232 233 } 234 } 235 return result.toString(); 236 } 237 238 /** 239 * Returns a HashTable with escape sequences as keys, and corresponding 240 * Strings as values. 241 */ 242 private static EncLookup getEscapeSequences(EncodingCharacters encChars) { 243 EncLookup escapeSequences = variousEncChars.get(encChars); 244 if (escapeSequences == null) { 245 // this means we haven't got the sequences for these encoding 246 // characters yet - let's make them 247 escapeSequences = new EncLookup(encChars); 248 variousEncChars.put(encChars, escapeSequences); 249 } 250 return escapeSequences; 251 } 252 253 254 255 256 /** 257 * A performance-optimized replacement for using when 258 * mapping from HL7 special characters to their respective 259 * encodings 260 * 261 * @author Christian Ohr 262 */ 263 private static class EncLookup { 264 265 char[] characters = new char[6]; 266 String[] encodings = new String[6]; 267 268 EncLookup(EncodingCharacters ec) { 269 characters[0] = ec.getFieldSeparator(); 270 characters[1] = ec.getComponentSeparator(); 271 characters[2] = ec.getSubcomponentSeparator(); 272 characters[3] = ec.getRepetitionSeparator(); 273 characters[4] = ec.getEscapeCharacter(); 274 characters[5] = '\r'; 275 char[] codes = {'F', 'S', 'T', 'R', 'E'}; 276 for (int i = 0; i < codes.length; i++) { 277 StringBuilder seq = new StringBuilder(); 278 seq.append(ec.getEscapeCharacter()); 279 seq.append(codes[i]); 280 seq.append(ec.getEscapeCharacter()); 281 encodings[i] = seq.toString(); 282 } 283 encodings[5] = "\\X000d\\"; 284 } 285 } 286}