001/*-
002 * #%L
003 * HAPI FHIR - Core Library
004 * %%
005 * Copyright (C) 2014 - 2023 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.util;
021
022import ca.uhn.fhir.context.phonetic.ApacheEncoder;
023import ca.uhn.fhir.context.phonetic.IPhoneticEncoder;
024import ca.uhn.fhir.context.phonetic.NumericEncoder;
025import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum;
026import org.apache.commons.codec.language.Caverphone1;
027import org.apache.commons.codec.language.Caverphone2;
028import org.apache.commons.codec.language.ColognePhonetic;
029import org.apache.commons.codec.language.DoubleMetaphone;
030import org.apache.commons.codec.language.MatchRatingApproachEncoder;
031import org.apache.commons.codec.language.Metaphone;
032import org.apache.commons.codec.language.Nysiis;
033import org.apache.commons.codec.language.RefinedSoundex;
034import org.apache.commons.codec.language.Soundex;
035import org.apache.commons.lang3.EnumUtils;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039public final class PhoneticEncoderUtil {
040
041        // embedded class only for parameter returns
042        private static class ParsedValues {
043                private final Integer maxCodeLength;
044                private final String encoderString;
045
046                public ParsedValues(String theString, Integer theMaxCode) {
047                        maxCodeLength = theMaxCode;
048                        encoderString = theString;
049                }
050
051                public Integer getMaxCodeLength() {
052                        return maxCodeLength;
053                }
054
055                public String getEncoderString() {
056                        return encoderString;
057                }
058        }
059
060        private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderUtil.class);
061
062        private PhoneticEncoderUtil() {
063        }
064
065        /**
066         * Creates the phonetic encoder wrapper from
067         * an input string.
068         *
069         * <p>
070         * String must be in the format of...
071         *      </p>
072         *
073         * PhoneticEncoderEnum(MAX_LENGTH)
074         *
075         * @return The IPhoneticEncoder
076         */
077        public static IPhoneticEncoder getEncoder(String theString) {
078                ParsedValues values = parseIntValue(theString);
079                String encoderType = values.getEncoderString();
080                Integer encoderMaxString = values.getMaxCodeLength();
081
082                IPhoneticEncoder encoder = getEncoderFromString(encoderType, encoderMaxString);
083                if (encoder != null) {
084                        return encoder;
085                }
086                else {
087                        ourLog.warn("Invalid phonetic param string " + theString);
088                        return null;
089                }
090        }
091
092        private static ParsedValues parseIntValue(String theString) {
093                String encoderType = null;
094                Integer encoderMaxString = null;
095
096                int braceIndex = theString.indexOf("(");
097                if (braceIndex != -1) {
098                        int len = theString.length();
099                        if (theString.charAt(len - 1) == ')') {
100                                encoderType = theString.substring(0, braceIndex);
101                                String num = theString.substring(braceIndex + 1, len - 1);
102                                try {
103                                        encoderMaxString = Integer.parseInt(num);
104                                } catch (NumberFormatException ex) {
105                                        // invalid number parse error
106                                }
107
108                                if (encoderMaxString == null
109                                                || encoderMaxString < 0) {
110                                        // parse error
111                                        ourLog.error("Invalid encoder max character length: " + num);
112                                        encoderType = null;
113                                }
114                        }
115                        // else - parse error
116                }
117                else {
118                        encoderType = theString;
119                }
120
121                return new ParsedValues(encoderType, encoderMaxString);
122        }
123
124        private static IPhoneticEncoder getEncoderFromString(String theName, Integer theMax) {
125                IPhoneticEncoder encoder = null;
126                PhoneticEncoderEnum enumVal = EnumUtils.getEnum(PhoneticEncoderEnum.class, theName);
127
128                if (enumVal != null) {
129                        switch (enumVal) {
130                                case CAVERPHONE1:
131                                        Caverphone1 caverphone1 = new Caverphone1();
132                                        encoder = new ApacheEncoder(theName, caverphone1);
133                                        break;
134                                case CAVERPHONE2:
135                                        Caverphone2 caverphone2 = new Caverphone2();
136                                        encoder = new ApacheEncoder(theName, caverphone2);
137                                        break;
138                                case COLOGNE:
139                                        ColognePhonetic colognePhonetic = new ColognePhonetic();
140                                        encoder = new ApacheEncoder(theName, colognePhonetic);
141                                        break;
142                                case DOUBLE_METAPHONE:
143                                        DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
144                                        if (theMax != null) {
145                                                doubleMetaphone.setMaxCodeLen(theMax);
146                                        }
147                                        encoder = new ApacheEncoder(theName, doubleMetaphone);
148                                        break;
149                                case MATCH_RATING_APPROACH:
150                                        MatchRatingApproachEncoder matchRatingApproachEncoder = new MatchRatingApproachEncoder();
151                                        encoder = new ApacheEncoder(theName, matchRatingApproachEncoder);
152                                        break;
153                                case METAPHONE:
154                                        Metaphone metaphone = new Metaphone();
155                                        if (theMax != null) {
156                                                metaphone.setMaxCodeLen(theMax);
157                                        }
158                                        encoder = new ApacheEncoder(theName, metaphone);
159                                        break;
160                                case NYSIIS:
161                                        Nysiis nysiis = new Nysiis();
162                                        encoder = new ApacheEncoder(theName, nysiis);
163                                        break;
164                                case NYSIIS_LONG:
165                                        Nysiis nysiis1_long = new Nysiis(false);
166                                        encoder = new ApacheEncoder(theName, nysiis1_long);
167                                        break;
168                                case REFINED_SOUNDEX:
169                                        RefinedSoundex refinedSoundex = new RefinedSoundex();
170                                        encoder = new ApacheEncoder(theName, refinedSoundex);
171                                        break;
172                                case SOUNDEX:
173                                        Soundex soundex = new Soundex();
174                                        // soundex has deprecated setting the max size
175                                        encoder = new ApacheEncoder(theName, soundex);
176                                        break;
177                                case NUMERIC:
178                                        encoder = new NumericEncoder();
179                                        break;
180                                default:
181                                        // we don't ever expect to be here
182                                        // this log message is purely for devs who update this
183                                        // enum, but not this method
184                                        ourLog.error("Unhandled PhoneticParamEnum value " + enumVal.name());
185                                        break;
186                        }
187                }
188                return encoder;
189        }
190}