001/*- 002 * #%L 003 * HAPI FHIR - Core Library 004 * %% 005 * Copyright (C) 2014 - 2023 Smile CDR, Inc. 006 * %% 007 * Licensed under the Apache License, Version 2.0 (the "License"); 008 * you may not use this file except in compliance with the License. 009 * You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 * #L% 019 */ 020package ca.uhn.fhir.util; 021 022import ca.uhn.fhir.context.phonetic.ApacheEncoder; 023import ca.uhn.fhir.context.phonetic.IPhoneticEncoder; 024import ca.uhn.fhir.context.phonetic.NumericEncoder; 025import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum; 026import org.apache.commons.codec.language.Caverphone1; 027import org.apache.commons.codec.language.Caverphone2; 028import org.apache.commons.codec.language.ColognePhonetic; 029import org.apache.commons.codec.language.DoubleMetaphone; 030import org.apache.commons.codec.language.MatchRatingApproachEncoder; 031import org.apache.commons.codec.language.Metaphone; 032import org.apache.commons.codec.language.Nysiis; 033import org.apache.commons.codec.language.RefinedSoundex; 034import org.apache.commons.codec.language.Soundex; 035import org.apache.commons.lang3.EnumUtils; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038 039public final class PhoneticEncoderUtil { 040 041 // embedded class only for parameter returns 042 private static class ParsedValues { 043 private final Integer maxCodeLength; 044 private final String encoderString; 045 046 public ParsedValues(String theString, Integer theMaxCode) { 047 maxCodeLength = theMaxCode; 048 encoderString = theString; 049 } 050 051 public Integer getMaxCodeLength() { 052 return maxCodeLength; 053 } 054 055 public String getEncoderString() { 056 return encoderString; 057 } 058 } 059 060 private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderUtil.class); 061 062 private PhoneticEncoderUtil() { 063 } 064 065 /** 066 * Creates the phonetic encoder wrapper from 067 * an input string. 068 * 069 * <p> 070 * String must be in the format of... 071 * </p> 072 * 073 * PhoneticEncoderEnum(MAX_LENGTH) 074 * 075 * @return The IPhoneticEncoder 076 */ 077 public static IPhoneticEncoder getEncoder(String theString) { 078 ParsedValues values = parseIntValue(theString); 079 String encoderType = values.getEncoderString(); 080 Integer encoderMaxString = values.getMaxCodeLength(); 081 082 IPhoneticEncoder encoder = getEncoderFromString(encoderType, encoderMaxString); 083 if (encoder != null) { 084 return encoder; 085 } 086 else { 087 ourLog.warn("Invalid phonetic param string " + theString); 088 return null; 089 } 090 } 091 092 private static ParsedValues parseIntValue(String theString) { 093 String encoderType = null; 094 Integer encoderMaxString = null; 095 096 int braceIndex = theString.indexOf("("); 097 if (braceIndex != -1) { 098 int len = theString.length(); 099 if (theString.charAt(len - 1) == ')') { 100 encoderType = theString.substring(0, braceIndex); 101 String num = theString.substring(braceIndex + 1, len - 1); 102 try { 103 encoderMaxString = Integer.parseInt(num); 104 } catch (NumberFormatException ex) { 105 // invalid number parse error 106 } 107 108 if (encoderMaxString == null 109 || encoderMaxString < 0) { 110 // parse error 111 ourLog.error("Invalid encoder max character length: " + num); 112 encoderType = null; 113 } 114 } 115 // else - parse error 116 } 117 else { 118 encoderType = theString; 119 } 120 121 return new ParsedValues(encoderType, encoderMaxString); 122 } 123 124 private static IPhoneticEncoder getEncoderFromString(String theName, Integer theMax) { 125 IPhoneticEncoder encoder = null; 126 PhoneticEncoderEnum enumVal = EnumUtils.getEnum(PhoneticEncoderEnum.class, theName); 127 128 if (enumVal != null) { 129 switch (enumVal) { 130 case CAVERPHONE1: 131 Caverphone1 caverphone1 = new Caverphone1(); 132 encoder = new ApacheEncoder(theName, caverphone1); 133 break; 134 case CAVERPHONE2: 135 Caverphone2 caverphone2 = new Caverphone2(); 136 encoder = new ApacheEncoder(theName, caverphone2); 137 break; 138 case COLOGNE: 139 ColognePhonetic colognePhonetic = new ColognePhonetic(); 140 encoder = new ApacheEncoder(theName, colognePhonetic); 141 break; 142 case DOUBLE_METAPHONE: 143 DoubleMetaphone doubleMetaphone = new DoubleMetaphone(); 144 if (theMax != null) { 145 doubleMetaphone.setMaxCodeLen(theMax); 146 } 147 encoder = new ApacheEncoder(theName, doubleMetaphone); 148 break; 149 case MATCH_RATING_APPROACH: 150 MatchRatingApproachEncoder matchRatingApproachEncoder = new MatchRatingApproachEncoder(); 151 encoder = new ApacheEncoder(theName, matchRatingApproachEncoder); 152 break; 153 case METAPHONE: 154 Metaphone metaphone = new Metaphone(); 155 if (theMax != null) { 156 metaphone.setMaxCodeLen(theMax); 157 } 158 encoder = new ApacheEncoder(theName, metaphone); 159 break; 160 case NYSIIS: 161 Nysiis nysiis = new Nysiis(); 162 encoder = new ApacheEncoder(theName, nysiis); 163 break; 164 case NYSIIS_LONG: 165 Nysiis nysiis1_long = new Nysiis(false); 166 encoder = new ApacheEncoder(theName, nysiis1_long); 167 break; 168 case REFINED_SOUNDEX: 169 RefinedSoundex refinedSoundex = new RefinedSoundex(); 170 encoder = new ApacheEncoder(theName, refinedSoundex); 171 break; 172 case SOUNDEX: 173 Soundex soundex = new Soundex(); 174 // soundex has deprecated setting the max size 175 encoder = new ApacheEncoder(theName, soundex); 176 break; 177 case NUMERIC: 178 encoder = new NumericEncoder(); 179 break; 180 default: 181 // we don't ever expect to be here 182 // this log message is purely for devs who update this 183 // enum, but not this method 184 ourLog.error("Unhandled PhoneticParamEnum value " + enumVal.name()); 185 break; 186 } 187 } 188 return encoder; 189 } 190}