001/*-
002 * #%L
003 * HAPI FHIR - Core Library
004 * %%
005 * Copyright (C) 2014 - 2023 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.context.phonetic;
021
022import org.apache.commons.codec.EncoderException;
023import org.apache.commons.codec.StringEncoder;
024import org.apache.commons.lang3.StringUtils;
025import org.slf4j.Logger;
026import org.slf4j.LoggerFactory;
027
028import java.util.StringJoiner;
029
030public class ApacheEncoder implements IPhoneticEncoder {
031        private static final Logger ourLog = LoggerFactory.getLogger(ApacheEncoder.class);
032
033        private final String myName;
034        private final StringEncoder myStringEncoder;
035
036        public ApacheEncoder(String theName, StringEncoder theStringEncoder) {
037                myName = theName;
038                myStringEncoder = theStringEncoder;
039        }
040
041        @Override
042        public String name() {
043                return myName;
044        }
045
046        @Override
047        public String encode(String theString) {
048                try {
049                        // If the string contains a space, encode alpha parts separately so, for example, numbers are preserved in address lines.
050                        if (theString.contains(" ")) {
051                                return encodeStringWithSpaces(theString);
052                        }
053                        return myStringEncoder.encode(theString);
054                } catch (EncoderException e) {
055                        ourLog.error("Failed to encode string " + theString, e);
056                        return theString;
057                }
058        }
059
060        private String encodeStringWithSpaces(String theString) throws EncoderException {
061                StringJoiner joiner = new StringJoiner(" ");
062
063                // This sub-stack holds the alpha parts
064                StringJoiner alphaJoiner = new StringJoiner(" ");
065
066                for (String part : theString.split("[\\s\\W]+")) {
067                        if (StringUtils.isAlpha(part)) {
068                                alphaJoiner.add(part);
069                        } else {
070                                // Once we hit a non-alpha part, encode all the alpha parts together as a single string
071                                // This is to allow encoders like METAPHONE to match Hans Peter to Hanspeter
072                                alphaJoiner = encodeAlphaParts(joiner, alphaJoiner);
073                                joiner.add(part);
074                        }
075                }
076                encodeAlphaParts(joiner, alphaJoiner);
077
078                return joiner.toString();
079        }
080
081        private StringJoiner encodeAlphaParts(StringJoiner theJoiner, StringJoiner theAlphaJoiner) throws EncoderException {
082                // Encode the alpha parts as a single string and then flush the alpha encoder
083                if (theAlphaJoiner.length() > 0) {
084                        theJoiner.add(myStringEncoder.encode(theAlphaJoiner.toString()));
085                        theAlphaJoiner = new StringJoiner(" ");
086                }
087                return theAlphaJoiner;
088        }
089}