001/** 002The contents of this file are subject to the Mozilla Public License Version 1.1 003(the "License"); you may not use this file except in compliance with the License. 004You may obtain a copy of the License at http://www.mozilla.org/MPL/ 005Software distributed under the License is distributed on an "AS IS" basis, 006WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 007specific language governing rights and limitations under the License. 008 009The Initial Developer of the Original Code is University Health Network. Copyright (C) 0102001. All Rights Reserved. 011 012Contributor(s): Jens Kristian Villadsen from Cetrea A/S 013 014Alternatively, the contents of this file may be used under the terms of the 015GNU General Public License (the "GPL"), in which case the provisions of the GPL are 016applicable instead of those above. If you wish to allow use of your version of this 017file only under the terms of the GPL and not to allow others to use your version 018of this file under the MPL, indicate your decision by deleting the provisions above 019and replace them with the notice and other provisions required by the GPL License. 020If you do not delete the provisions above, a recipient may use your version of 021this file under either the MPL or the GPL. 022 023*/ 024 025package ca.uhn.hl7v2.llp; 026 027import static ca.uhn.hl7v2.llp.MinLLPReader.*; 028 029import java.io.ByteArrayInputStream; 030import java.io.ByteArrayOutputStream; 031import java.io.IOException; 032import java.io.InputStream; 033import java.io.InputStreamReader; 034import java.net.SocketException; 035import java.net.SocketTimeoutException; 036import java.nio.charset.Charset; 037 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041import ca.uhn.hl7v2.HL7Exception; 042import ca.uhn.hl7v2.parser.EncodingNotSupportedException; 043import ca.uhn.hl7v2.preparser.PreParser; 044 045/** 046 * Charset-aware MLLP stream reader 047 * 048 * @see ExtendedMinLowerLayerProtocol 049 * @author Jens Kristian Villadsen from Cetrea A/S 050 */ 051public class ExtendedMinLLPReader implements HL7Reader 052{ 053 054 private static final Logger log = LoggerFactory.getLogger(ExtendedMinLLPReader.class); 055 056 private InputStream inputStream; 057 private Charset myLastCharset; 058 private InputStreamReader myReader; 059 060 /** 061 * Creates a MinLLPReader with no setup - setInputStream must be set later. 062 */ 063 public ExtendedMinLLPReader() 064 { 065 super(); 066 } 067 068 /** 069 * Creates a MinLLPReader which reads from the given InputStream. The stream is assumed to be an ASCII bit stream. 070 */ 071 public ExtendedMinLLPReader(InputStream in) throws IOException 072 { 073 setInputStream(in); 074 } 075 076 /** 077 * Closes the underlying BufferedReader. 078 */ 079 public synchronized void close() throws java.io.IOException 080 { 081 myReader.close(); 082 } 083 084 private Charset getCharacterEncoding(InputStream in) throws IOException 085 { 086 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 087 int next = in.read(); 088 while((next != -1 || bos.size() == 0) && next != END_MESSAGE && next != LAST_CHARACTER) 089 { 090 bos.write(next); 091 next = in.read(); 092 } 093 bos.flush(); 094 095 try 096 { 097 String firstLine; 098 if ((bos.toByteArray()[0] == -2 && bos.toByteArray()[1] == -1) || 099 bos.toByteArray()[1] == -2 && bos.toByteArray()[0] == -1) { 100 101 // if the string is little endian, then we will be missing the second byte of the 102 // last character (a "\r"), so add it manually 103 if (bos.toByteArray()[1] == -2 && bos.toByteArray()[0] == -1) { 104 bos.write(0); 105 } 106 107 firstLine = bos.toString("UTF-16"); 108 } else { 109 firstLine = bos.toString("US-ASCII"); 110 } 111 112 String[] fields; 113 try { 114 fields = PreParser.getFields(firstLine, "MSH-18(0)"); 115 } catch (HL7Exception e) { 116 log.warn("Failed to parse MSH segment. Defaulting to US-ASCII", e); 117 return Charset.forName("US-ASCII"); 118 } 119 String charset = stripNonLowAscii(fields[0]); 120 Charset javaCs; 121 try { 122 javaCs = CharSetUtil.convertHL7CharacterEncodingToCharSetvalue(charset); 123 } catch (EncodingNotSupportedException e) { 124 log.warn("Invalid or unsupported charset in MSH-18: \"{}\". Defaulting to US-ASCII", e); 125 return Charset.forName("US-ASCII"); 126 } 127 log.debug("Detected MSH-18 value \"{}\" so using charset {}", charset, javaCs.displayName()); 128 return javaCs; 129 } 130// catch(Exception e) 131// { 132// log.warn("Nonvalid charset - defaulting to US-ASCII", e); 133// } 134 finally 135 { 136 bos.close(); 137 } 138 } 139 140 private String stripNonLowAscii(String theString) { 141 if (theString == null) return ""; 142 StringBuilder b = new StringBuilder(); 143 144 for (int i = 0; i < theString.length(); i++) { 145 char next = theString.charAt(i); 146 if (next > 0 && next < 127) { 147 b.append(next); 148 } 149 } 150 151 return b.toString(); 152 } 153 154 /** 155 * @return the lastCharset 156 */ 157 public Charset getLastCharset() { 158 return myLastCharset; 159 } 160 161 public synchronized String getMessage() throws LLPException, IOException 162 { 163 ByteArrayOutputStream baos = verifyAndCopyToOutputStream(this.inputStream); 164 165 if(baos == null) 166 return null; 167 168 byte[] byteArray = baos.toByteArray(); 169 myLastCharset = getCharacterEncoding(new ByteArrayInputStream(byteArray)); 170 171 myReader = new InputStreamReader(new ByteArrayInputStream(byteArray), myLastCharset); 172 baos.close(); 173 174 StringBuilder s_buffer = new StringBuilder(); 175 176 int c = myReader.read(); 177 while(c != -1) 178 { 179 s_buffer.append((char) c); 180 c = myReader.read(); 181 } 182 return s_buffer.toString(); 183 } 184 185 /** 186 * Sets the InputStream from which to read messages. The InputStream must be set before any calls to <code>getMessage()</code>. 187 */ 188 public synchronized void setInputStream(InputStream in) throws IOException 189 { 190 this.inputStream = in; 191 } 192 193 private ByteArrayOutputStream verifyAndCopyToOutputStream(InputStream stream) throws IOException, LLPException 194 { 195 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 196 boolean end_of_message = false; 197 198 int c; 199 try 200 { 201 c = stream.read(); 202 } 203 catch(SocketException e) 204 { 205 log.info("SocketException on read() attempt. Socket appears to have been closed: {}", e.getMessage()); 206 throw e; 207 } 208 catch(SocketTimeoutException e) 209 { 210 log.debug("SocketTimeoutException on read() attempt."); 211 return null; 212 } 213 // trying to read when there is no data (stream may have been closed at other end) 214 if(c == -1) 215 { 216 log.info("End of input stream reached."); 217 throw new SocketException("End of input stream reached"); 218 } 219 LowerLayerProtocol.logCharacterReceived(c); 220 221 if(c != START_MESSAGE) 222 { 223 throw new LLPException("Message violates the " + "minimal lower layer protocol: no start of message indicator " + "received. Received: " + c); 224 } 225 226 while(!end_of_message) 227 { 228 c = stream.read(); 229 230 if(c == -1) 231 { 232 throw new LLPException("Message violates the " + "minimal lower protocol: message terminated without " + "a terminating character."); 233 } 234 LowerLayerProtocol.logCharacterReceived(c); 235 236 if(c == END_MESSAGE) 237 { 238 // subsequent character should be a carriage return 239 c = stream.read(); 240 if(c >= 0) 241 LowerLayerProtocol.logCharacterReceived(c); 242 if(c != LAST_CHARACTER) 243 { 244 throw new LLPException("Message " + "violates the minimal lower layer protocol: " + "message terminator not followed by a return " + "character."); 245 } 246 end_of_message = true; 247 } 248 else 249 { 250 // the character wasn't the end of message, append it to the message 251 bos.write(c); 252 } 253 } 254 255 bos.flush(); 256 return bos; 257 } 258}