package org.elasticsearch.xpack.core.ml.inference.preprocessing;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.lucene.analysis.miscellaneous.LengthFilterFactory;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.xcontent.ConstructingObjectParser;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor;
import org.elasticsearch.xpack.core.ml.process.writer.RecordWriter;
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;

/* loaded from: input_file:lib/x-pack-core-7.17.13.jar:org/elasticsearch/xpack/core/ml/inference/preprocessing/NGram.class */
public class NGram implements LenientlyParsedPreProcessor, StrictlyParsedPreProcessor {
    private static final int DEFAULT_START = 0;
    private static final int DEFAULT_LENGTH = 50;
    private static final int MAX_LENGTH = 100;
    private static final int MIN_GRAM = 1;
    private static final int MAX_GRAM = 5;
    public static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(NGram.class);
    public static final ParseField NAME = new ParseField(org.elasticsearch.client.ml.inference.preprocessing.NGram.NAME, new String[0]);
    public static final ParseField FIELD = new ParseField("field", new String[0]);
    public static final ParseField FEATURE_PREFIX = new ParseField("feature_prefix", new String[0]);
    public static final ParseField NGRAMS = new ParseField("n_grams", new String[0]);
    public static final ParseField START = new ParseField("start", new String[0]);
    public static final ParseField LENGTH = new ParseField(LengthFilterFactory.NAME, new String[0]);
    public static final ParseField CUSTOM = new ParseField("custom", new String[0]);
    private static final ConstructingObjectParser<NGram, PreProcessor.PreProcessorParseContext> STRICT_PARSER = createParser(false);
    private static final ConstructingObjectParser<NGram, PreProcessor.PreProcessorParseContext> LENIENT_PARSER = createParser(true);
    private final String field;
    private final String featurePrefix;
    private final int[] nGrams;
    private final int start;
    private final int length;
    private final boolean custom;

    private static String defaultPrefix(Integer num, Integer num2) {
        return "ngram_" + (num == null ? 0 : num.intValue()) + "_" + (num2 == null ? 50 : num2.intValue());
    }

    private static ConstructingObjectParser<NGram, PreProcessor.PreProcessorParseContext> createParser(boolean z) {
        ConstructingObjectParser<NGram, PreProcessor.PreProcessorParseContext> constructingObjectParser = new ConstructingObjectParser<>(NAME.getPreferredName(), z, (BiFunction<Object[], PreProcessor.PreProcessorParseContext, NGram>) (objArr, preProcessorParseContext) -> {
            return new NGram((String) objArr[0], (List<Integer>) objArr[1], (Integer) objArr[2], (Integer) objArr[3], Boolean.valueOf(objArr[4] == null ? preProcessorParseContext.isCustomByDefault() : ((Boolean) objArr[4]).booleanValue()), (String) objArr[5]);
        });
        constructingObjectParser.declareString(ConstructingObjectParser.constructorArg(), FIELD);
        constructingObjectParser.declareIntArray(ConstructingObjectParser.constructorArg(), NGRAMS);
        constructingObjectParser.declareInt(ConstructingObjectParser.optionalConstructorArg(), START);
        constructingObjectParser.declareInt(ConstructingObjectParser.optionalConstructorArg(), LENGTH);
        constructingObjectParser.declareBoolean(ConstructingObjectParser.optionalConstructorArg(), CUSTOM);
        constructingObjectParser.declareString(ConstructingObjectParser.optionalConstructorArg(), FEATURE_PREFIX);
        return constructingObjectParser;
    }

    public static NGram fromXContentStrict(XContentParser xContentParser, PreProcessor.PreProcessorParseContext preProcessorParseContext) {
        return STRICT_PARSER.apply2(xContentParser, (XContentParser) (preProcessorParseContext == null ? PreProcessor.PreProcessorParseContext.DEFAULT : preProcessorParseContext));
    }

    public static NGram fromXContentLenient(XContentParser xContentParser, PreProcessor.PreProcessorParseContext preProcessorParseContext) {
        return LENIENT_PARSER.apply2(xContentParser, (XContentParser) (preProcessorParseContext == null ? PreProcessor.PreProcessorParseContext.DEFAULT : preProcessorParseContext));
    }

    NGram(String str, List<Integer> list, Integer num, Integer num2, Boolean bool, String str2) {
        this(str, str2 == null ? defaultPrefix(num, num2) : str2, Sets.newHashSet(list).stream().mapToInt((v0) -> {
            return v0.intValue();
        }).toArray(), num == null ? 0 : num.intValue(), num2 == null ? 50 : num2.intValue(), bool != null && bool.booleanValue());
    }

    public NGram(String str, String str2, int[] iArr, int i, int i2, boolean z) {
        this.field = (String) ExceptionsHelper.requireNonNull(str, FIELD);
        this.featurePrefix = (String) ExceptionsHelper.requireNonNull(str2, FEATURE_PREFIX);
        this.nGrams = (int[]) ExceptionsHelper.requireNonNull(iArr, NGRAMS);
        if (iArr.length == 0) {
            throw ExceptionsHelper.badRequestException("[{}] must not be empty", NGRAMS.getPreferredName());
        }
        if (Arrays.stream(this.nGrams).anyMatch(i3 -> {
            return i3 < 1 || i3 > 5;
        })) {
            throw ExceptionsHelper.badRequestException("[{}] is invalid [{}]; minimum supported value is [{}]; maximum supported value is [{}]", NGRAMS.getPreferredName(), Arrays.stream(iArr).mapToObj(String::valueOf).collect(Collectors.joining(", ")), 1, 5);
        }
        this.start = i;
        if (i < 0 && i2 + i > 0) {
            throw ExceptionsHelper.badRequestException("if [start] is negative, [length] + [start] must be less than 0", new Object[0]);
        }
        this.length = i2;
        if (i2 <= 0) {
            throw ExceptionsHelper.badRequestException("[{}] must be a positive integer", LENGTH.getPreferredName());
        }
        if (i2 > 100) {
            throw ExceptionsHelper.badRequestException("[{}] must be not be greater than [{}]", LENGTH.getPreferredName(), 100);
        }
        if (Arrays.stream(this.nGrams).anyMatch(i4 -> {
            return i4 > i2;
        })) {
            throw ExceptionsHelper.badRequestException("[{}] and [{}] are invalid; all ngrams must be shorter than or equal to length [{}]", NGRAMS.getPreferredName(), LENGTH.getPreferredName(), Integer.valueOf(i2));
        }
        this.custom = z;
    }

    public NGram(StreamInput streamInput) throws IOException {
        this.field = streamInput.readString();
        this.featurePrefix = streamInput.readString();
        this.nGrams = streamInput.readVIntArray();
        this.start = streamInput.readInt();
        this.length = streamInput.readVInt();
        this.custom = streamInput.readBoolean();
    }

    @Override // org.elasticsearch.common.io.stream.Writeable
    public void writeTo(StreamOutput streamOutput) throws IOException {
        streamOutput.writeString(this.field);
        streamOutput.writeString(this.featurePrefix);
        streamOutput.writeVIntArray(this.nGrams);
        streamOutput.writeInt(this.start);
        streamOutput.writeVInt(this.length);
        streamOutput.writeBoolean(this.custom);
    }

    public String toString() {
        return Strings.toString(this);
    }

    @Override // org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor
    public List<String> inputFields() {
        return Collections.singletonList(this.field);
    }

    @Override // org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor
    public List<String> outputFields() {
        return allPossibleNGramOutputFeatureNames();
    }

    @Override // org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor
    public void process(Map<String, Object> map) {
        Object obj = map.get(this.field);
        if (obj == null) {
            return;
        }
        String obj2 = obj.toString();
        if (this.start > obj2.length() || obj2.length() + this.start < 0) {
            return;
        }
        int length = this.start < 0 ? obj2.length() + this.start : this.start;
        int min = Math.min(length + this.length, obj2.length());
        for (int i : this.nGrams) {
            for (int i2 = 0; i2 < min && length + i2 + i <= min; i2++) {
                map.put(nGramFeature(i, i2), obj2.substring(length + i2, length + i2 + i));
            }
        }
    }

    @Override // org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor
    public Map<String, String> reverseLookup() {
        return (Map) outputFields().stream().collect(Collectors.toMap(Function.identity(), str -> {
            return this.field;
        }));
    }

    @Override // org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor
    public String getOutputFieldType(String str) {
        return "text";
    }

    @Override // org.apache.lucene.util.Accountable
    public long ramBytesUsed() {
        return SHALLOW_SIZE + RamUsageEstimator.sizeOf(this.field) + RamUsageEstimator.sizeOf(this.featurePrefix) + RamUsageEstimator.sizeOf(this.nGrams);
    }

    @Override // org.elasticsearch.common.io.stream.NamedWriteable
    public String getWriteableName() {
        return NAME.getPreferredName();
    }

    @Override // org.elasticsearch.xpack.core.ml.utils.NamedXContentObject
    public String getName() {
        return NAME.getPreferredName();
    }

    @Override // org.elasticsearch.xcontent.ToXContent
    public XContentBuilder toXContent(XContentBuilder xContentBuilder, ToXContent.Params params) throws IOException {
        xContentBuilder.startObject();
        xContentBuilder.field(FIELD.getPreferredName(), this.field);
        xContentBuilder.field(FEATURE_PREFIX.getPreferredName(), this.featurePrefix);
        xContentBuilder.field(NGRAMS.getPreferredName(), this.nGrams);
        xContentBuilder.field(START.getPreferredName(), this.start);
        xContentBuilder.field(LENGTH.getPreferredName(), this.length);
        xContentBuilder.field(CUSTOM.getPreferredName(), this.custom);
        xContentBuilder.endObject();
        return xContentBuilder;
    }

    public String getField() {
        return this.field;
    }

    public String getFeaturePrefix() {
        return this.featurePrefix;
    }

    public int[] getnGrams() {
        return this.nGrams;
    }

    public int getStart() {
        return this.start;
    }

    public int getLength() {
        return this.length;
    }

    @Override // org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor
    public boolean isCustom() {
        return this.custom;
    }

    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj == null || getClass() != obj.getClass()) {
            return false;
        }
        NGram nGram = (NGram) obj;
        return this.start == nGram.start && this.length == nGram.length && this.custom == nGram.custom && Objects.equals(this.field, nGram.field) && Objects.equals(this.featurePrefix, nGram.featurePrefix) && Arrays.equals(this.nGrams, nGram.nGrams);
    }

    public int hashCode() {
        return (31 * Objects.hash(this.field, this.featurePrefix, Integer.valueOf(this.start), Integer.valueOf(this.length), Boolean.valueOf(this.custom))) + Arrays.hashCode(this.nGrams);
    }

    private String nGramFeature(int i, int i2) {
        return this.featurePrefix + RecordWriter.CONTROL_FIELD_NAME + i + i2;
    }

    private List<String> allPossibleNGramOutputFeatureNames() {
        int i = 0;
        for (int i2 : this.nGrams) {
            i += this.length - (i2 - 1);
        }
        if (i <= 0) {
            return Collections.emptyList();
        }
        ArrayList arrayList = new ArrayList(i);
        for (int i3 : this.nGrams) {
            Stream mapToObj = IntStream.range(0, this.length - (i3 - 1)).mapToObj(i4 -> {
                return nGramFeature(i3, i4);
            });
            Objects.requireNonNull(arrayList);
            mapToObj.forEach((v1) -> {
                r1.add(v1);
            });
        }
        return arrayList;
    }
}
