001/* 002 * Hl7InputStreamReader.java 003 */ 004 005package ca.uhn.hl7v2.util; 006 007import java.io.BufferedReader; 008import java.io.FileNotFoundException; 009import java.io.IOException; 010import java.io.InputStream; 011import java.io.InputStreamReader; 012import java.io.PushbackReader; 013import java.io.Reader; 014import java.util.ArrayList; 015import java.util.List; 016import java.util.regex.Matcher; 017import java.util.regex.Pattern; 018 019import org.slf4j.Logger; 020import org.slf4j.LoggerFactory; 021 022/** 023 * Reads HL7 messages from an InputStream 024 * 025 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author: jamesagnew $ 026 * @deprecated see {@link Hl7InputStreamMessageIterator} or 027 * {@link Hl7InputStreamMessageStringIterator} 028 */ 029public class Hl7InputStreamReader { 030 031 private static final Logger ourLog = LoggerFactory.getLogger(Hl7InputStreamReader.class); 032 033 /** 034 * Reads HL7 messages from an InputStream and outputs an array of HL7 message strings 035 * 036 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author: jamesagnew $ 037 */ 038 public static String[] read(InputStream theMsgInputStream) throws FileNotFoundException, 039 IOException { 040 BufferedReader in = null; 041 try { 042 in = new BufferedReader(new CommentFilterReader( 043 new InputStreamReader(theMsgInputStream))); 044 045 StringBuffer rawMsgBuffer = new StringBuffer(); 046 int c = 0; 047 while ((c = in.read()) >= 0) { 048 rawMsgBuffer.append((char) c); 049 } 050 051 String[] messages = getHL7Messages(rawMsgBuffer.toString()); 052 ourLog.info(messages.length + " messages sent."); 053 return messages; 054 } finally { 055 if (in != null) 056 in.close(); 057 } 058 059 } 060 061 /** 062 * Given a string that contains HL7 messages, and possibly other junk, returns an array of the 063 * HL7 messages. An attempt is made to recognize segments even if there is other content between 064 * segments, for example if a log file logs segments individually with timestamps between them. 065 * 066 * @param theSource a string containing HL7 messages 067 * @return the HL7 messages contained in theSource 068 */ 069 private static String[] getHL7Messages(String theSource) { 070 List<String> messages = new ArrayList<String>(20); 071 Pattern startPattern = Pattern.compile("^MSH", Pattern.MULTILINE); 072 Matcher startMatcher = startPattern.matcher(theSource); 073 074 while (startMatcher.find()) { 075 String messageExtent = getMessageExtent(theSource.substring(startMatcher.start()), 076 startPattern); 077 078 char fieldDelim = messageExtent.charAt(3); 079 Pattern segmentPattern = Pattern.compile("^[A-Z]{3}\\" + fieldDelim + ".*$", 080 Pattern.MULTILINE); 081 Matcher segmentMatcher = segmentPattern.matcher(messageExtent); 082 StringBuffer msg = new StringBuffer(); 083 while (segmentMatcher.find()) { 084 msg.append(segmentMatcher.group().trim()); 085 msg.append('\r'); 086 } 087 messages.add(msg.toString()); 088 } 089 return messages.toArray(new String[0]); 090 } 091 092 /** 093 * Given a string that contains at least one HL7 message, returns the smallest string that 094 * contains the first of these messages. 095 */ 096 private static String getMessageExtent(String theSource, Pattern theStartPattern) { 097 Matcher startMatcher = theStartPattern.matcher(theSource); 098 if (!startMatcher.find()) { 099 throw new IllegalArgumentException(theSource + "does not contain message start pattern" 100 + theStartPattern.toString()); 101 } 102 103 int start = startMatcher.start(); 104 int end = theSource.length(); 105 if (startMatcher.find()) { 106 end = startMatcher.start(); 107 } 108 109 return theSource.substring(start, end).trim(); 110 } 111 112 /** 113 * TODO: this code is copied from HAPI ... should make it part of HAPI public API instead 114 * Removes C and C++ style comments from a reader stream. C style comments are distinguished 115 * from URL protocol delimiters by the preceding colon in the latter. 116 */ 117 private static class CommentFilterReader extends PushbackReader { 118 119 private final char[] startCPPComment = { '/', '*' }; 120 private final char[] endCPPComment = { '*', '/' }; 121 private final char[] startCComment = { '/', '/' }; 122 private final char[] endCComment = { '\n' }; 123 private final char[] protocolDelim = { ':', '/', '/' }; 124 125 public CommentFilterReader(Reader in) { 126 super(in, 5); 127 } 128 129 /** 130 * Returns the next character, not including comments. 131 */ 132 public int read() throws IOException { 133 if (atSequence(protocolDelim)) { 134 // proceed normally 135 } else if (atSequence(startCPPComment)) { 136 // skip() doesn't seem to work for some reason 137 while (!atSequence(endCPPComment)) 138 super.read(); 139 for (int i = 0; i < endCPPComment.length; i++) 140 super.read(); 141 } else if (atSequence(startCComment)) { 142 while (!atSequence(endCComment)) 143 super.read(); 144 for (int i = 0; i < endCComment.length; i++) 145 super.read(); 146 } 147 int ret = super.read(); 148 if (ret == 65535) 149 ret = -1; 150 return ret; 151 } 152 153 public int read(char[] cbuf, int off, int len) throws IOException { 154 int i = -1; 155 boolean done = false; 156 while (++i < len) { 157 int next = read(); 158 if (next == 65535 || next == -1) { // Pushback causes -1 to convert to 65535 159 done = true; 160 break; 161 } 162 cbuf[off + i] = (char) next; 163 } 164 if (i == 0 && done) 165 i = -1; 166 return i; 167 } 168 169 /** 170 * Tests incoming data for match with char sequence, resets reader when done. 171 */ 172 private boolean atSequence(char[] sequence) throws IOException { 173 boolean result = true; 174 int i = -1; 175 int[] data = new int[sequence.length]; 176 while (++i < sequence.length && result == true) { 177 data[i] = super.read(); 178 if ((char) data[i] != sequence[i]) 179 result = false; // includes case where end of stream reached 180 } 181 for (int j = i - 1; j >= 0; j--) { 182 this.unread(data[j]); 183 } 184 return result; 185 } 186 } 187 188}