001/**
002The contents of this file are subject to the Mozilla Public License Version 1.1 
003(the "License"); you may not use this file except in compliance with the License. 
004You may obtain a copy of the License at http://www.mozilla.org/MPL/ 
005Software distributed under the License is distributed on an "AS IS" basis, 
006WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 
007specific language governing rights and limitations under the License. 
008
009The Initial Developer of the Original Code is University Health Network. Copyright (C) 
0102001.  All Rights Reserved. 
011
012Contributor(s): Jens Kristian Villadsen from Cetrea A/S
013
014Alternatively, the contents of this file may be used under the terms of the 
015GNU General Public License (the "GPL"), in which case the provisions of the GPL are 
016applicable instead of those above.  If you wish to allow use of your version of this 
017file only under the terms of the GPL and not to allow others to use your version 
018of this file under the MPL, indicate your decision by deleting  the provisions above 
019and replace  them with the notice and other provisions required by the GPL License.  
020If you do not delete the provisions above, a recipient may use your version of 
021this file under either the MPL or the GPL. 
022
023*/
024
025package ca.uhn.hl7v2.llp;
026
027import static ca.uhn.hl7v2.llp.MinLLPReader.*;
028
029import java.io.ByteArrayInputStream;
030import java.io.ByteArrayOutputStream;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.InputStreamReader;
034import java.net.SocketException;
035import java.net.SocketTimeoutException;
036import java.nio.charset.Charset;
037
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041import ca.uhn.hl7v2.HL7Exception;
042import ca.uhn.hl7v2.parser.EncodingNotSupportedException;
043import ca.uhn.hl7v2.preparser.PreParser;
044
045/**
046 * Charset-aware MLLP stream reader
047 * 
048 * @see ExtendedMinLowerLayerProtocol
049 * @author Jens Kristian Villadsen from Cetrea A/S
050 */
051public class ExtendedMinLLPReader implements HL7Reader
052{
053
054        private static final Logger log = LoggerFactory.getLogger(ExtendedMinLLPReader.class);
055
056        private InputStream inputStream;
057        private Charset myLastCharset;
058        private InputStreamReader myReader;
059
060        /**
061         * Creates a MinLLPReader with no setup - setInputStream must be set later.
062         */
063        public ExtendedMinLLPReader()
064        {
065                super();
066        }
067
068        /**
069         * Creates a MinLLPReader which reads from the given InputStream. The stream is assumed to be an ASCII bit stream.
070         */
071        public ExtendedMinLLPReader(InputStream in) throws IOException
072        {
073                setInputStream(in);
074        }
075
076        /**
077         * Closes the underlying BufferedReader.
078         */
079        public synchronized void close() throws java.io.IOException
080        {
081                myReader.close();
082        }
083
084        private Charset getCharacterEncoding(InputStream in) throws IOException
085        {
086                ByteArrayOutputStream bos = new ByteArrayOutputStream();
087                int next = in.read();
088                while((next != -1 || bos.size() == 0) && next != END_MESSAGE && next != LAST_CHARACTER)
089                {
090                        bos.write(next);
091                        next = in.read();
092                }
093                bos.flush();
094                
095                try
096                {
097                        String firstLine;
098                        if ((bos.toByteArray()[0] == -2 && bos.toByteArray()[1] == -1) ||
099                                        bos.toByteArray()[1] == -2 && bos.toByteArray()[0] == -1) {
100                                
101                                // if the string is little endian, then we will be missing the second byte of the 
102                                // last character (a "\r"), so add it manually
103                                if (bos.toByteArray()[1] == -2 && bos.toByteArray()[0] == -1) {
104                                        bos.write(0);
105                                }
106                                
107                                firstLine = bos.toString("UTF-16");
108                        } else {
109                                firstLine = bos.toString("US-ASCII");
110                        }
111                        
112                        String[] fields;
113                        try {
114                                fields = PreParser.getFields(firstLine, "MSH-18(0)");
115                        } catch (HL7Exception e) {
116                                log.warn("Failed to parse MSH segment. Defaulting to US-ASCII", e);
117                                return Charset.forName("US-ASCII");
118                        }
119                        String charset = stripNonLowAscii(fields[0]);
120                        Charset javaCs;
121                        try {
122                                javaCs = CharSetUtil.convertHL7CharacterEncodingToCharSetvalue(charset);
123                        } catch (EncodingNotSupportedException e) {
124                                log.warn("Invalid or unsupported charset in MSH-18: \"{}\". Defaulting to US-ASCII", e);
125                                return Charset.forName("US-ASCII");
126                        }                       
127                        log.debug("Detected MSH-18 value \"{}\" so using charset {}", charset, javaCs.displayName());                   
128                        return javaCs;
129                }
130//              catch(Exception e)
131//              {
132//                      log.warn("Nonvalid charset - defaulting to US-ASCII", e);
133//              }
134                finally
135                {
136                        bos.close();
137                }
138        }
139
140        private String stripNonLowAscii(String theString) {
141                if (theString == null) return "";
142                StringBuilder b = new StringBuilder();
143                
144                for (int i = 0; i < theString.length(); i++) {
145                        char next = theString.charAt(i);
146                        if (next > 0 && next < 127) {
147                                b.append(next);
148                        }
149                }
150                
151                return b.toString();
152        }
153
154        /**
155         * @return the lastCharset
156         */
157        public Charset getLastCharset() {
158                return myLastCharset;
159        }
160
161        public synchronized String getMessage() throws LLPException, IOException
162        {
163                ByteArrayOutputStream baos = verifyAndCopyToOutputStream(this.inputStream);
164
165                if(baos == null)
166                        return null;
167                
168                byte[] byteArray = baos.toByteArray();
169                myLastCharset = getCharacterEncoding(new ByteArrayInputStream(byteArray));
170
171                myReader = new InputStreamReader(new ByteArrayInputStream(byteArray), myLastCharset);
172                baos.close();
173
174                StringBuilder s_buffer = new StringBuilder();
175
176                int c = myReader.read();
177                while(c != -1)
178                {
179                        s_buffer.append((char) c);
180                        c = myReader.read();
181                }
182                return s_buffer.toString();
183        }
184
185        /**
186         * Sets the InputStream from which to read messages. The InputStream must be set before any calls to <code>getMessage()</code>.
187         */
188        public synchronized void setInputStream(InputStream in) throws IOException
189        {
190                this.inputStream = in;
191        }
192
193        private ByteArrayOutputStream verifyAndCopyToOutputStream(InputStream stream) throws IOException, LLPException
194        {
195                ByteArrayOutputStream bos = new ByteArrayOutputStream();
196                boolean end_of_message = false;
197
198                int c;
199                try
200                {
201                        c = stream.read();
202                }
203                catch(SocketException e)
204                {
205                        log.info("SocketException on read() attempt.  Socket appears to have been closed: {}", e.getMessage());
206                        throw e;
207                }
208                catch(SocketTimeoutException e)
209                {
210                        log.debug("SocketTimeoutException on read() attempt.");
211                        return null;
212                }
213                // trying to read when there is no data (stream may have been closed at other end)
214                if(c == -1)
215                {
216                        log.info("End of input stream reached.");
217                        throw new SocketException("End of input stream reached");
218                }
219                LowerLayerProtocol.logCharacterReceived(c);
220
221                if(c != START_MESSAGE)
222                {
223                        throw new LLPException("Message violates the " + "minimal lower layer protocol: no start of message indicator " + "received. Received: " + c);
224                }
225
226                while(!end_of_message)
227                {
228                        c = stream.read();
229
230                        if(c == -1)
231                        {
232                                throw new LLPException("Message violates the " + "minimal lower protocol: message terminated without " + "a terminating character.");
233                        }
234                        LowerLayerProtocol.logCharacterReceived(c);
235
236                        if(c == END_MESSAGE)
237                        {
238                                // subsequent character should be a carriage return
239                                c = stream.read();
240                                if(c >= 0)
241                                        LowerLayerProtocol.logCharacterReceived(c);
242                                if(c != LAST_CHARACTER)
243                                {
244                                        throw new LLPException("Message " + "violates the minimal lower layer protocol: " + "message terminator not followed by a return " + "character.");
245                                }
246                                end_of_message = true;
247                        }
248                        else
249                        {
250                                // the character wasn't the end of message, append it to the message
251                                bos.write(c);
252                        }
253                }
254
255                bos.flush();
256                return bos;
257        }
258}