001/*- 002 * #%L 003 * HAPI FHIR - Core Library 004 * %% 005 * Copyright (C) 2014 - 2023 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.context.phonetic; 021 022import org.apache.commons.codec.EncoderException; 023import org.apache.commons.codec.StringEncoder; 024import org.apache.commons.lang3.StringUtils; 025import org.slf4j.Logger; 026import org.slf4j.LoggerFactory; 027 028import java.util.StringJoiner; 029 030public class ApacheEncoder implements IPhoneticEncoder { 031 private static final Logger ourLog = LoggerFactory.getLogger(ApacheEncoder.class); 032 033 private final String myName; 034 private final StringEncoder myStringEncoder; 035 036 public ApacheEncoder(String theName, StringEncoder theStringEncoder) { 037 myName = theName; 038 myStringEncoder = theStringEncoder; 039 } 040 041 @Override 042 public String name() { 043 return myName; 044 } 045 046 @Override 047 public String encode(String theString) { 048 try { 049 // If the string contains a space, encode alpha parts separately so, for example, numbers are preserved in address lines. 050 if (theString.contains(" ")) { 051 return encodeStringWithSpaces(theString); 052 } 053 return myStringEncoder.encode(theString); 054 } catch (EncoderException e) { 055 ourLog.error("Failed to encode string " + theString, e); 056 return theString; 057 } 058 } 059 060 private String encodeStringWithSpaces(String theString) throws EncoderException { 061 StringJoiner joiner = new StringJoiner(" "); 062 063 // This sub-stack holds the alpha parts 064 StringJoiner alphaJoiner = new StringJoiner(" "); 065 066 for (String part : theString.split("[\\s\\W]+")) { 067 if (StringUtils.isAlpha(part)) { 068 alphaJoiner.add(part); 069 } else { 070 // Once we hit a non-alpha part, encode all the alpha parts together as a single string 071 // This is to allow encoders like METAPHONE to match Hans Peter to Hanspeter 072 alphaJoiner = encodeAlphaParts(joiner, alphaJoiner); 073 joiner.add(part); 074 } 075 } 076 encodeAlphaParts(joiner, alphaJoiner); 077 078 return joiner.toString(); 079 } 080 081 private StringJoiner encodeAlphaParts(StringJoiner theJoiner, StringJoiner theAlphaJoiner) throws EncoderException { 082 // Encode the alpha parts as a single string and then flush the alpha encoder 083 if (theAlphaJoiner.length() > 0) { 084 theJoiner.add(myStringEncoder.encode(theAlphaJoiner.toString())); 085 theAlphaJoiner = new StringJoiner(" "); 086 } 087 return theAlphaJoiner; 088 } 089}