001/**
002The contents of this file are subject to the Mozilla Public License Version 1.1 
003(the "License"); you may not use this file except in compliance with the License. 
004You may obtain a copy of the License at http://www.mozilla.org/MPL/ 
005Software distributed under the License is distributed on an "AS IS" basis, 
006WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 
007specific language governing rights and limitations under the License. 
008
009The Original Code is "Escape.java".  Description: 
010"Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules
011defined in section 2.10 of the standard (version 2.4)" 
012
013The Initial Developer of the Original Code is University Health Network. Copyright (C) 
0142001.  All Rights Reserved. 
015
016Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz 
017
018Alternatively, the contents of this file may be used under the terms of the 
019GNU General Public License (the  ?GPL?), in which case the provisions of the GPL are 
020applicable instead of those above.  If you wish to allow use of your version of this 
021file only under the terms of the GPL and not to allow others to use your version 
022of this file under the MPL, indicate your decision by deleting  the provisions above 
023and replace  them with the notice and other provisions required by the GPL License.  
024If you do not delete the provisions above, a recipient may use your version of 
025this file under either the MPL or the GPL. 
026 */
027package ca.uhn.hl7v2.parser;
028
029import java.util.Collections;
030import java.util.LinkedHashMap;
031import java.util.Map;
032
033/**
034 * Handles "escaping" and "unescaping" of text according to the HL7 escape
035 * sequence rules defined in section 2.10 of the standard (version 2.4).
036 * Currently, escape sequences for multiple character sets are unsupported. The
037 * highlighting, hexademical, and locally defined escape sequences are also
038 * unsupported.
039 * 
040 * @author Bryan Tripp
041 * @author Mark Lee (Skeva Technologies)
042 * @author Elmar Hinz
043 * @author Christian Ohr
044 */
045public class Escape {
046
047    /**
048     * limits the size of variousEncChars to 1000, can be overridden by system property.
049     */
050    private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap<EncodingCharacters, EncLookup>(5, 0.75f, true) {
051
052        private static final long serialVersionUID = 1L;
053        final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000"));
054
055        @Override
056        protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) {
057            return this.size() > maxSize;
058        }
059    });
060
061    /** Creates a new instance of Escape */
062    public Escape() {
063    }
064
065    /**
066     * @param text string to be escaped
067     * @param encChars encoding characters to be used
068     * @return the escaped string
069     */
070    public static String escape(String text, EncodingCharacters encChars) {
071        EncLookup esc = getEscapeSequences(encChars);
072        int textLength = text.length();
073
074        StringBuilder result = new StringBuilder(textLength);
075        for (int i = 0; i < textLength; i++) {
076            boolean charReplaced = false;
077            char c = text.charAt(i);
078
079            FORENCCHARS:
080                        for (int j = 0; j < 6; j++) {
081                if (text.charAt(i) == esc.characters[j]) {
082
083                                        // Formatting escape sequences such as \.br\ should be left alone
084                                        if (j == 4) {
085                                                
086                                                if (i+1 < textLength) {
087                                                        
088                                                        // Check for \.br\
089                                                        char nextChar = text.charAt(i + 1);
090                                                        switch (nextChar) {
091                                                        case '.':
092                                                        case 'C':
093                                                        case 'M':
094                                                        case 'X':
095                                                        case 'Z':
096                                                        {
097                                                                int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1);
098                                                                if (nextEscapeIndex > 0) {
099                                                                        result.append(text.substring(i, nextEscapeIndex + 1));
100                                                                        charReplaced = true;
101                                                                        i = nextEscapeIndex;
102                                                                        break FORENCCHARS;
103                                                                }
104                                                                break;
105                                                        }
106                                                        case 'H':
107                                                        case 'N':
108                                                        {
109                                                                if (i+2 < textLength && text.charAt(i+2) == '\\') {
110                                                                        int nextEscapeIndex = i + 2;
111                                                                        if (nextEscapeIndex > 0) {
112                                                                                result.append(text.substring(i, nextEscapeIndex + 1));
113                                                                                charReplaced = true;
114                                                                                i = nextEscapeIndex;
115                                                                                break FORENCCHARS;
116                                                                        }
117                                                                }
118                                                                break;
119                                                        }
120                                                        }
121                                                        
122                                                }
123                                                
124                                        }
125
126                    result.append(esc.encodings[j]);
127                    charReplaced = true;
128                    break;
129                }
130            }
131            if (!charReplaced) {
132                result.append(c);
133            }
134        }
135        return result.toString();
136    }
137
138    /**
139     * @param text string to be unescaped
140     * @param encChars encoding characters to be used
141     * @return the unescaped string
142     */
143    public static String unescape(String text, EncodingCharacters encChars) {
144
145        // If the escape char isn't found, we don't need to look for escape sequences
146        char escapeChar = encChars.getEscapeCharacter();
147        boolean foundEscapeChar = false;
148        for (int i = 0; i < text.length(); i++) {
149            if (text.charAt(i) == escapeChar) {
150                foundEscapeChar = true;
151                break;
152            }
153        }
154        if (!foundEscapeChar) {
155            return text;
156        }
157
158        int textLength = text.length();
159        StringBuilder result = new StringBuilder(textLength + 20);
160        EncLookup esc = getEscapeSequences(encChars);
161        char escape = esc.characters[4];
162        int encodingsCount = esc.characters.length;
163        int i = 0;
164        while (i < textLength) {
165            char c = text.charAt(i);
166            if (c != escape) {
167                result.append(c);
168                i++;
169            } else {
170                boolean foundEncoding = false;
171
172                                // Test against the standard encodings
173                                for (int j = 0; j < encodingsCount; j++) {
174                    String encoding = esc.encodings[j];
175                                        int encodingLength = encoding.length();
176                                        if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength)
177                            .equals(encoding)) {
178                        result.append(esc.characters[j]);
179                        i += encodingLength;
180                        foundEncoding = true;
181                        break;
182                    }
183                }
184
185                if (!foundEncoding) {
186                                        
187                                        // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are
188                                        // formatting codes. They should be left intact
189                                        if (i + 1 < textLength) {
190                                                char nextChar = text.charAt(i + 1);
191                                                switch (nextChar) {
192                                                        case '.':
193                                                        case 'C':
194                                                        case 'M':
195                                                        case 'X':
196                                                        case 'Z':
197                                                        {
198                                                                int closingEscape = text.indexOf(escape, i + 1);
199                                                                if (closingEscape > 0) {
200                                                                        String substring = text.substring(i, closingEscape + 1);
201                                                                        result.append(substring);
202                                                                        i += substring.length();
203                                                                } else {
204                                                                        i++;
205                                                                }
206                                                                break;
207                                                        }
208                                                        case 'H':
209                                                        case 'N':
210                                                        {
211                                                                int closingEscape = text.indexOf(escape, i + 1);
212                                                                if (closingEscape == i + 2) {
213                                                                        String substring = text.substring(i, closingEscape + 1);
214                                                                        result.append(substring);
215                                                                        i += substring.length();
216                                                                } else {
217                                                                        i++;
218                                                                }
219                                                                break;
220                                                        }
221                                                        default:
222                                                        {
223                                                                i++;
224                                                        }
225                                                }
226                                                
227                                        } else {
228                                                i++;
229                                        }
230                }
231
232
233            }
234        }
235        return result.toString();
236    }
237
238    /**
239     * Returns a HashTable with escape sequences as keys, and corresponding
240     * Strings as values.
241     */
242    private static EncLookup getEscapeSequences(EncodingCharacters encChars) {
243        EncLookup escapeSequences = variousEncChars.get(encChars);
244        if (escapeSequences == null) {
245            // this means we haven't got the sequences for these encoding
246            // characters yet - let's make them
247            escapeSequences = new EncLookup(encChars);
248            variousEncChars.put(encChars, escapeSequences);
249        }
250        return escapeSequences;
251    }
252
253
254
255
256    /**
257     * A performance-optimized replacement for using when
258     * mapping from HL7 special characters to their respective
259     * encodings
260     *
261     * @author Christian Ohr
262     */
263    private static class EncLookup {
264
265        char[] characters = new char[6];
266        String[] encodings = new String[6];
267
268        EncLookup(EncodingCharacters ec) {
269            characters[0] = ec.getFieldSeparator();
270            characters[1] = ec.getComponentSeparator();
271            characters[2] = ec.getSubcomponentSeparator();
272            characters[3] = ec.getRepetitionSeparator();
273            characters[4] = ec.getEscapeCharacter();
274            characters[5] = '\r';
275            char[] codes = {'F', 'S', 'T', 'R', 'E'};
276            for (int i = 0; i < codes.length; i++) {
277                StringBuilder seq = new StringBuilder();
278                seq.append(ec.getEscapeCharacter());
279                seq.append(codes[i]);
280                seq.append(ec.getEscapeCharacter());
281                encodings[i] = seq.toString();
282            }
283            encodings[5] = "\\X000d\\";
284        }
285    }
286}