001/*
002 * Hl7InputStreamReader.java
003 */
004
005package ca.uhn.hl7v2.util;
006
007import java.io.BufferedReader;
008import java.io.FileNotFoundException;
009import java.io.IOException;
010import java.io.InputStream;
011import java.io.InputStreamReader;
012import java.io.PushbackReader;
013import java.io.Reader;
014import java.util.ArrayList;
015import java.util.List;
016import java.util.regex.Matcher;
017import java.util.regex.Pattern;
018
019import org.slf4j.Logger;
020import org.slf4j.LoggerFactory;
021
022/**
023 * Reads HL7 messages from an InputStream
024 * 
025 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author: jamesagnew $
026 * @deprecated see {@link Hl7InputStreamMessageIterator} or
027 *             {@link Hl7InputStreamMessageStringIterator}
028 */
029public class Hl7InputStreamReader {
030
031        private static final Logger ourLog = LoggerFactory.getLogger(Hl7InputStreamReader.class);
032
033        /**
034         * Reads HL7 messages from an InputStream and outputs an array of HL7 message strings
035         * 
036         * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author: jamesagnew $
037         */
038        public static String[] read(InputStream theMsgInputStream) throws FileNotFoundException,
039                        IOException {
040                BufferedReader in = null;
041                try {
042                        in = new BufferedReader(new CommentFilterReader(
043                                        new InputStreamReader(theMsgInputStream)));
044
045                        StringBuffer rawMsgBuffer = new StringBuffer();
046                        int c = 0;
047                        while ((c = in.read()) >= 0) {
048                                rawMsgBuffer.append((char) c);
049                        }
050
051                        String[] messages = getHL7Messages(rawMsgBuffer.toString());
052                        ourLog.info(messages.length + " messages sent.");
053                        return messages;
054                } finally {
055                        if (in != null)
056                                in.close();
057                }
058
059        }
060
061        /**
062         * Given a string that contains HL7 messages, and possibly other junk, returns an array of the
063         * HL7 messages. An attempt is made to recognize segments even if there is other content between
064         * segments, for example if a log file logs segments individually with timestamps between them.
065         * 
066         * @param theSource a string containing HL7 messages
067         * @return the HL7 messages contained in theSource
068         */
069        private static String[] getHL7Messages(String theSource) {
070                List<String> messages = new ArrayList<String>(20);
071                Pattern startPattern = Pattern.compile("^MSH", Pattern.MULTILINE);
072                Matcher startMatcher = startPattern.matcher(theSource);
073
074                while (startMatcher.find()) {
075                        String messageExtent = getMessageExtent(theSource.substring(startMatcher.start()),
076                                        startPattern);
077
078                        char fieldDelim = messageExtent.charAt(3);
079                        Pattern segmentPattern = Pattern.compile("^[A-Z]{3}\\" + fieldDelim + ".*$",
080                                        Pattern.MULTILINE);
081                        Matcher segmentMatcher = segmentPattern.matcher(messageExtent);
082                        StringBuffer msg = new StringBuffer();
083                        while (segmentMatcher.find()) {
084                                msg.append(segmentMatcher.group().trim());
085                                msg.append('\r');
086                        }
087                        messages.add(msg.toString());
088                }
089                return messages.toArray(new String[0]);
090        }
091
092        /**
093         * Given a string that contains at least one HL7 message, returns the smallest string that
094         * contains the first of these messages.
095         */
096        private static String getMessageExtent(String theSource, Pattern theStartPattern) {
097                Matcher startMatcher = theStartPattern.matcher(theSource);
098                if (!startMatcher.find()) {
099                        throw new IllegalArgumentException(theSource + "does not contain message start pattern"
100                                        + theStartPattern.toString());
101                }
102
103                int start = startMatcher.start();
104                int end = theSource.length();
105                if (startMatcher.find()) {
106                        end = startMatcher.start();
107                }
108
109                return theSource.substring(start, end).trim();
110        }
111
112        /**
113         * TODO: this code is copied from HAPI ... should make it part of HAPI public API instead
114         * Removes C and C++ style comments from a reader stream. C style comments are distinguished
115         * from URL protocol delimiters by the preceding colon in the latter.
116         */
117        private static class CommentFilterReader extends PushbackReader {
118
119                private final char[] startCPPComment = { '/', '*' };
120                private final char[] endCPPComment = { '*', '/' };
121                private final char[] startCComment = { '/', '/' };
122                private final char[] endCComment = { '\n' };
123                private final char[] protocolDelim = { ':', '/', '/' };
124
125                public CommentFilterReader(Reader in) {
126                        super(in, 5);
127                }
128
129                /**
130                 * Returns the next character, not including comments.
131                 */
132                public int read() throws IOException {
133                        if (atSequence(protocolDelim)) {
134                                // proceed normally
135                        } else if (atSequence(startCPPComment)) {
136                                // skip() doesn't seem to work for some reason
137                                while (!atSequence(endCPPComment))
138                                        super.read();
139                                for (int i = 0; i < endCPPComment.length; i++)
140                                        super.read();
141                        } else if (atSequence(startCComment)) {
142                                while (!atSequence(endCComment))
143                                        super.read();
144                                for (int i = 0; i < endCComment.length; i++)
145                                        super.read();
146                        }
147                        int ret = super.read();
148                        if (ret == 65535)
149                                ret = -1;
150                        return ret;
151                }
152
153                public int read(char[] cbuf, int off, int len) throws IOException {
154                        int i = -1;
155                        boolean done = false;
156                        while (++i < len) {
157                                int next = read();
158                                if (next == 65535 || next == -1) { // Pushback causes -1 to convert to 65535
159                                        done = true;
160                                        break;
161                                }
162                                cbuf[off + i] = (char) next;
163                        }
164                        if (i == 0 && done)
165                                i = -1;
166                        return i;
167                }
168
169                /**
170                 * Tests incoming data for match with char sequence, resets reader when done.
171                 */
172                private boolean atSequence(char[] sequence) throws IOException {
173                        boolean result = true;
174                        int i = -1;
175                        int[] data = new int[sequence.length];
176                        while (++i < sequence.length && result == true) {
177                                data[i] = super.read();
178                                if ((char) data[i] != sequence[i])
179                                        result = false; // includes case where end of stream reached
180                        }
181                        for (int j = i - 1; j >= 0; j--) {
182                                this.unread(data[j]);
183                        }
184                        return result;
185                }
186        }
187
188}