package org.apache.rya.indexing.accumulo.freetext;

import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.nio.charset.CharacterCodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.MultiTableBatchWriter;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.admin.TableOperations;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.file.keyfunctor.ColumnFamilyFunctor;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.Validate;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.log4j.Logger;
import org.apache.rya.accumulo.experimental.AbstractAccumuloIndexer;
import org.apache.rya.api.RdfCloudTripleStoreConfiguration;
import org.apache.rya.api.domain.RyaStatement;
import org.apache.rya.api.resolver.RyaToRdfConversions;
import org.apache.rya.indexing.FreeTextIndexer;
import org.apache.rya.indexing.Md5Hash;
import org.apache.rya.indexing.StatementConstraints;
import org.apache.rya.indexing.StatementSerializer;
import org.apache.rya.indexing.accumulo.ConfigUtils;
import org.apache.rya.indexing.accumulo.freetext.iterators.BooleanTreeIterator;
import org.apache.rya.indexing.accumulo.freetext.query.ASTExpression;
import org.apache.rya.indexing.accumulo.freetext.query.ASTNodeUtils;
import org.apache.rya.indexing.accumulo.freetext.query.ASTSimpleNode;
import org.apache.rya.indexing.accumulo.freetext.query.ASTTerm;
import org.apache.rya.indexing.accumulo.freetext.query.ParseException;
import org.apache.rya.indexing.accumulo.freetext.query.QueryParser;
import org.apache.rya.indexing.accumulo.freetext.query.SimpleNode;
import org.apache.rya.indexing.accumulo.freetext.query.TokenMgrError;
import org.eclipse.rdf4j.common.iteration.CloseableIteration;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.query.QueryEvaluationException;

/* loaded from: input_file:org/apache/rya/indexing/accumulo/freetext/AccumuloFreeTextIndexer.class */
public class AccumuloFreeTextIndexer extends AbstractAccumuloIndexer implements FreeTextIndexer {
    private static final String TABLE_SUFFIX_TERM = "freetext_term";
    private static final String TABLE_SUFFFIX_DOC = "freetext";
    private static final boolean IS_TERM_TABLE_TOKEN_DELETION_ENABLED = true;
    private Tokenizer tokenizer;
    private BatchWriter docTableBw;
    private BatchWriter termTableBw;
    private MultiTableBatchWriter mtbw;
    private int queryTermLimit;
    private int docTableNumPartitions;
    private Set<IRI> validPredicates;
    private Configuration conf;
    private boolean isInit = false;
    private static final Logger logger = Logger.getLogger(AccumuloFreeTextIndexer.class);
    private static final byte[] EMPTY_BYTES = new byte[0];
    private static final Text EMPTY_TEXT = new Text(EMPTY_BYTES);
    private static final Value EMPTY_VALUE = new Value(EMPTY_BYTES);

    private void initInternal() throws AccumuloException, AccumuloSecurityException, TableNotFoundException, TableExistsException {
        String freeTextDocTablename = getFreeTextDocTablename(this.conf);
        String freeTextTermTablename = getFreeTextTermTablename(this.conf);
        this.docTableNumPartitions = ConfigUtils.getFreeTextDocNumPartitions(this.conf);
        int freeTextTermNumPartitions = ConfigUtils.getFreeTextTermNumPartitions(this.conf);
        TableOperations tableOperations = ConfigUtils.getConnector(this.conf).tableOperations();
        if (ConfigUtils.createTableIfNotExists(this.conf, freeTextTermTablename) && !ConfigUtils.useMockInstance(this.conf) && freeTextTermNumPartitions > 0) {
            TreeSet treeSet = new TreeSet();
            treeSet.add(new Text(ColumnPrefixes.getRevTermListColFam("")));
            int i = (freeTextTermNumPartitions - 1) / 2;
            if (i > 0) {
                int i2 = 26 / i;
                for (int i3 = 0; i3 < i; i3++) {
                    String valueOf = String.valueOf((char) (97 + (i2 * i3)));
                    treeSet.add(new Text(ColumnPrefixes.getTermListColFam(valueOf)));
                    treeSet.add(new Text(ColumnPrefixes.getRevTermListColFam(valueOf)));
                }
            }
            tableOperations.addSplits(freeTextTermTablename, treeSet);
        }
        if (ConfigUtils.createTableIfNotExists(this.conf, freeTextDocTablename) && !ConfigUtils.useMockInstance(this.conf)) {
            TreeSet treeSet2 = new TreeSet();
            for (int i4 = 0; i4 < this.docTableNumPartitions; i4++) {
                treeSet2.add(genPartition(i4, this.docTableNumPartitions));
            }
            tableOperations.addSplits(freeTextDocTablename, treeSet2);
            tableOperations.setProperty(freeTextDocTablename, "table.bloom.key.functor", ColumnFamilyFunctor.class.getCanonicalName());
            tableOperations.setProperty(freeTextDocTablename, "table.bloom.enabled", Boolean.TRUE.toString());
        }
        if (this.mtbw != null) {
            this.docTableBw = this.mtbw.getBatchWriter(freeTextDocTablename);
            this.termTableBw = this.mtbw.getBatchWriter(freeTextTermTablename);
        }
        this.tokenizer = ConfigUtils.getFreeTextTokenizer(this.conf);
        this.validPredicates = ConfigUtils.getFreeTextPredicates(this.conf);
        this.queryTermLimit = ConfigUtils.getFreeTextTermLimit(this.conf);
    }

    public void setConf(Configuration configuration) {
        this.conf = configuration;
        if (this.isInit) {
            return;
        }
        try {
            initInternal();
            this.isInit = true;
        } catch (AccumuloException | AccumuloSecurityException | TableNotFoundException | TableExistsException e) {
            logger.warn("Unable to initialize index.  Throwing Runtime Exception. ", e);
            throw new RuntimeException((Throwable) e);
        }
    }

    public Configuration getConf() {
        return this.conf;
    }

    private void storeStatement(Statement statement) throws IOException {
        Objects.requireNonNull(this.mtbw, "Freetext indexer attempting to store, but setMultiTableBatchWriter() was not set.");
        if ((this.validPredicates.isEmpty() || this.validPredicates.contains(statement.getPredicate())) && (statement.getObject() instanceof Literal)) {
            SortedSet<String> sortedSet = this.tokenizer.tokenize(statement.getObject().stringValue().toLowerCase());
            if (sortedSet.isEmpty()) {
                return;
            }
            String writeStatement = StatementSerializer.writeStatement(statement);
            String md5Base64 = Md5Hash.md5Base64(writeStatement);
            Mutation mutation = new Mutation(genPartition(writeStatement.hashCode(), this.docTableNumPartitions));
            ArrayList arrayList = new ArrayList();
            Text text = new Text(md5Base64);
            mutation.put(ColumnPrefixes.DOCS_CF_PREFIX, text, new Value(writeStatement.getBytes(Charsets.UTF_8)));
            mutation.put(ColumnPrefixes.getSubjColFam(statement), text, EMPTY_VALUE);
            mutation.put(ColumnPrefixes.getPredColFam(statement), text, EMPTY_VALUE);
            mutation.put(ColumnPrefixes.getObjColFam(statement), text, EMPTY_VALUE);
            mutation.put(ColumnPrefixes.getContextColFam(statement), text, EMPTY_VALUE);
            for (String str : sortedSet) {
                mutation.put(ColumnPrefixes.getTermColFam(str), text, EMPTY_VALUE);
                arrayList.add(createEmptyPutMutation(ColumnPrefixes.getTermListColFam(str)));
                arrayList.add(createEmptyPutMutation(ColumnPrefixes.getRevTermListColFam(str)));
            }
            try {
                this.docTableBw.addMutation(mutation);
                this.termTableBw.addMutations(arrayList);
            } catch (MutationsRejectedException e) {
                logger.error("error adding mutation", e);
                throw new IOException((Throwable) e);
            }
        }
    }

    public void storeStatement(RyaStatement ryaStatement) throws IOException {
        storeStatement(RyaToRdfConversions.convertStatement(ryaStatement));
    }

    private static Mutation createEmptyPutMutation(Text text) {
        Mutation mutation = new Mutation(text);
        mutation.put(EMPTY_TEXT, EMPTY_TEXT, EMPTY_VALUE);
        return mutation;
    }

    private static Mutation createEmptyPutDeleteMutation(Text text) {
        Mutation mutation = new Mutation(text);
        mutation.putDelete(EMPTY_TEXT, EMPTY_TEXT);
        return mutation;
    }

    private static Text genPartition(int i, int i2) {
        return new Text(String.format("%0" + Integer.toString(i2).length() + "d", Integer.valueOf(Math.abs(i % i2))));
    }

    public Set<IRI> getIndexablePredicates() {
        return this.validPredicates;
    }

    public void flush() throws IOException {
        try {
            this.mtbw.flush();
        } catch (MutationsRejectedException e) {
            logger.error("error flushing the batch writer", e);
            throw new IOException((Throwable) e);
        }
    }

    public void close() throws IOException {
        try {
            if (this.mtbw != null) {
                this.mtbw.close();
            }
        } catch (MutationsRejectedException e) {
            logger.error("error closing the batch writer", e);
            throw new IOException((Throwable) e);
        }
    }

    private Set<String> unrollWildcard(String str, boolean z) throws IOException {
        Scanner scanner = getScanner(getFreeTextTermTablename(this.conf));
        HashSet hashSet = new HashSet();
        scanner.setRange(Range.prefix(z ? ColumnPrefixes.getRevTermListColFam(StringUtils.removeStart(str, "*").toLowerCase()) : ColumnPrefixes.getTermListColFam(StringUtils.removeEnd(str, "*").toLowerCase())));
        Iterator it = scanner.iterator();
        while (it.hasNext()) {
            String text = ColumnPrefixes.removePrefix(((Key) ((Map.Entry) it.next()).getKey()).getRow()).toString();
            if (z) {
                hashSet.add(StringUtils.reverse(text));
            } else {
                hashSet.add(text);
            }
        }
        if (hashSet.isEmpty()) {
            hashSet.add("\u0001\u0001\u0001");
        }
        return hashSet;
    }

    private void unrollWildcards(SimpleNode simpleNode) throws IOException {
        if ((simpleNode instanceof ASTExpression) || (simpleNode instanceof ASTSimpleNode)) {
            Iterator<SimpleNode> it = ASTNodeUtils.getNodeIterator(simpleNode).iterator();
            while (it.hasNext()) {
                unrollWildcards(it.next());
            }
            return;
        }
        if (!(simpleNode instanceof ASTTerm)) {
            throw new IllegalArgumentException("Node is of unknown type: " + simpleNode.getClass().getName());
        }
        ASTTerm aSTTerm = (ASTTerm) simpleNode;
        boolean equals = aSTTerm.getType().equals(ASTTerm.WILDTERM);
        boolean equals2 = aSTTerm.getType().equals(ASTTerm.PREFIXTERM);
        if (equals || equals2) {
            Set<String> unrollWildcard = unrollWildcard(aSTTerm.getTerm(), equals2);
            ASTExpression aSTExpression = new ASTExpression(1);
            aSTExpression.setType(ASTExpression.OR);
            aSTExpression.setNotFlag(aSTTerm.isNotFlag());
            for (String str : unrollWildcard) {
                ASTTerm aSTTerm2 = new ASTTerm(3);
                aSTTerm2.setNotFlag(false);
                aSTTerm2.setTerm(str);
                aSTTerm2.setType(ASTTerm.TERM);
                ASTNodeUtils.pushChild(aSTExpression, aSTTerm2);
            }
            SimpleNode simpleNode2 = (SimpleNode) aSTTerm.jjtGetParent();
            int childIndex = ASTNodeUtils.getChildIndex(simpleNode2, aSTTerm);
            Validate.isTrue(childIndex >= 0, "child not found in parent");
            simpleNode2.jjtAddChild(aSTExpression, childIndex);
        }
    }

    private Scanner getScanner(String str) throws IOException {
        try {
            return ConfigUtils.createScanner(str, this.conf);
        } catch (AccumuloException | AccumuloSecurityException | TableNotFoundException e) {
            logger.error("Error connecting to " + str);
            throw new IOException((Throwable) e);
        }
    }

    @Override // org.apache.rya.indexing.FreeTextIndexer
    public CloseableIteration<Statement, QueryEvaluationException> queryText(String str, StatementConstraints statementConstraints) throws IOException {
        Scanner scanner = getScanner(getFreeTextDocTablename(this.conf));
        SimpleNode parseQuery = parseQuery(str);
        unrollWildcards(parseQuery);
        StringBuilder sb = new StringBuilder("(" + ASTNodeUtils.serializeExpression(parseQuery) + ")");
        if (statementConstraints.hasSubject()) {
            sb.append(" AND ");
            sb.append(ColumnPrefixes.getSubjColFam(statementConstraints.getSubject().toString()).toString());
        }
        if (statementConstraints.hasContext()) {
            sb.append(" AND ");
            sb.append(ColumnPrefixes.getContextColFam(statementConstraints.getContext().toString()).toString());
        }
        if (statementConstraints.hasPredicates()) {
            sb.append(" AND (");
            ArrayList arrayList = new ArrayList();
            Iterator<IRI> it = statementConstraints.getPredicates().iterator();
            while (it.hasNext()) {
                arrayList.add(ColumnPrefixes.getPredColFam(it.next().stringValue()).toString());
            }
            sb.append(StringUtils.join(arrayList, " OR "));
            sb.append(")");
        }
        int termCount = ASTNodeUtils.termCount(parseQuery(sb.toString()));
        if (termCount > this.queryTermLimit) {
            throw new IOException("Query contains too many terms.  Term limit: " + this.queryTermLimit + ".  Term Count: " + termCount);
        }
        scanner.clearScanIterators();
        scanner.clearColumns();
        IteratorSetting iteratorSetting = new IteratorSetting(20, "booleanTree", BooleanTreeIterator.class);
        BooleanTreeIterator.setQuery(iteratorSetting, sb.toString());
        scanner.addScanIterator(iteratorSetting);
        scanner.setRange(new Range());
        return getIteratorWrapper(scanner);
    }

    private static CloseableIteration<Statement, QueryEvaluationException> getIteratorWrapper(final Scanner scanner) {
        final Iterator it = scanner.iterator();
        return new CloseableIteration<Statement, QueryEvaluationException>() { // from class: org.apache.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer.1
            public boolean hasNext() {
                return it.hasNext();
            }

            /* renamed from: next, reason: merged with bridge method [inline-methods] */
            public Statement m56next() throws QueryEvaluationException {
                Value value = (Value) ((Map.Entry) it.next()).getValue();
                try {
                    return StatementSerializer.readStatement(Text.decode(value.get(), 0, value.getSize()));
                } catch (CharacterCodingException e) {
                    AccumuloFreeTextIndexer.logger.error("Error decoding value", e);
                    throw new QueryEvaluationException(e);
                } catch (IOException e2) {
                    AccumuloFreeTextIndexer.logger.error("Error deserializing statement", e2);
                    throw new QueryEvaluationException(e2);
                }
            }

            public void remove() {
                throw new UnsupportedOperationException("Remove not implemented");
            }

            public void close() throws QueryEvaluationException {
                if (scanner != null) {
                    scanner.close();
                }
            }
        };
    }

    private static SimpleNode parseQuery(String str) throws IOException {
        try {
            return QueryParser.parse(str);
        } catch (ParseException e) {
            logger.error("Parser Exception on Client Side. Query: " + str, e);
            throw new IOException(e);
        } catch (TokenMgrError e2) {
            logger.error("Token Manager Exception on Client Side. Query: " + str, e2);
            throw new IOException(e2);
        }
    }

    public String getTableName() {
        return getFreeTextDocTablename(this.conf);
    }

    public static List<String> getTableNames(Configuration configuration) {
        Objects.requireNonNull(configuration);
        return Collections.unmodifiableList(makeTableNames(ConfigUtils.getTablePrefix(configuration)));
    }

    public static String getFreeTextDocTablename(Configuration configuration) {
        Objects.requireNonNull(configuration);
        return makeFreeTextDocTablename(ConfigUtils.getTablePrefix(configuration));
    }

    public void setMultiTableBatchWriter(MultiTableBatchWriter multiTableBatchWriter) throws IOException {
        this.mtbw = multiTableBatchWriter;
    }

    public static String getFreeTextTermTablename(Configuration configuration) {
        Objects.requireNonNull(configuration);
        return makeFreeTextTermTablename(ConfigUtils.getTablePrefix(configuration));
    }

    public static List<String> makeTableNames(String str) {
        Objects.requireNonNull(str);
        return Lists.newArrayList(new String[]{makeFreeTextDocTablename(str), makeFreeTextTermTablename(str)});
    }

    public static String makeFreeTextDocTablename(String str) {
        Objects.requireNonNull(str);
        return str + TABLE_SUFFFIX_DOC;
    }

    public static String makeFreeTextTermTablename(String str) {
        Objects.requireNonNull(str);
        return str + TABLE_SUFFIX_TERM;
    }

    private void deleteStatement(Statement statement) throws IOException {
        Objects.requireNonNull(this.mtbw, "Freetext indexer attempting to delete, but setMultiTableBatchWriter() was not set.");
        if ((this.validPredicates.isEmpty() || this.validPredicates.contains(statement.getPredicate())) && (statement.getObject() instanceof Literal)) {
            SortedSet<String> sortedSet = this.tokenizer.tokenize(statement.getObject().stringValue().toLowerCase());
            if (sortedSet.isEmpty()) {
                return;
            }
            String writeStatement = StatementSerializer.writeStatement(statement);
            String md5Base64 = Md5Hash.md5Base64(writeStatement);
            Text genPartition = genPartition(writeStatement.hashCode(), this.docTableNumPartitions);
            Mutation mutation = new Mutation(genPartition);
            ArrayList arrayList = new ArrayList();
            Text text = new Text(md5Base64);
            mutation.putDelete(ColumnPrefixes.DOCS_CF_PREFIX, text);
            mutation.putDelete(ColumnPrefixes.getSubjColFam(statement), text);
            mutation.putDelete(ColumnPrefixes.getPredColFam(statement), text);
            mutation.putDelete(ColumnPrefixes.getObjColFam(statement), text);
            mutation.putDelete(ColumnPrefixes.getContextColFam(statement), text);
            for (String str : sortedSet) {
                if (!doesTermExistInOtherDocs(str, Integer.parseInt(genPartition.toString()), text)) {
                    arrayList.add(createEmptyPutDeleteMutation(ColumnPrefixes.getTermListColFam(str)));
                    arrayList.add(createEmptyPutDeleteMutation(ColumnPrefixes.getRevTermListColFam(str)));
                }
                mutation.putDelete(ColumnPrefixes.getTermColFam(str), text);
            }
            try {
                this.docTableBw.addMutation(mutation);
                this.termTableBw.addMutations(arrayList);
            } catch (MutationsRejectedException e) {
                logger.error("error adding mutation", e);
                throw new IOException((Throwable) e);
            }
        }
    }

    public void deleteStatement(RyaStatement ryaStatement) throws IOException {
        deleteStatement(RyaToRdfConversions.convertStatement(ryaStatement));
    }

    private boolean doesTermExistInOtherDocs(String str, int i, Text text) {
        try {
            Scanner scanner = getScanner(getFreeTextDocTablename(this.conf));
            scanner.fetchColumnFamily(ColumnPrefixes.getTermColFam(StringUtils.removeEnd(str, "*").toLowerCase()));
            Iterator it = scanner.iterator();
            while (it.hasNext()) {
                Key key = (Key) ((Map.Entry) it.next()).getKey();
                if (Integer.parseInt(key.getRow().toString()) != i) {
                    Text columnFamily = key.getColumnFamily();
                    if (columnFamily.toString().startsWith(ColumnPrefixes.TERM_CF_PREFIX.toString()) && ColumnPrefixes.removePrefix(columnFamily).toString().equals(str)) {
                        return true;
                    }
                }
            }
            return false;
        } catch (IOException e) {
            logger.error("Error searching for the existance of the term in other documents", e);
            return false;
        }
    }

    public void init() {
        Objects.requireNonNull(this.mtbw, "Freetext indexer failed to initialize temporal index, setMultiTableBatchWriter() was not set.");
        Objects.requireNonNull(this.conf, "Freetext indexer failed to initialize temporal index, setConf() was not set.");
        try {
            this.docTableBw = this.mtbw.getBatchWriter(getFreeTextDocTablename(this.conf));
            this.termTableBw = this.mtbw.getBatchWriter(getFreeTextTermTablename(this.conf));
        } catch (AccumuloException | AccumuloSecurityException | TableNotFoundException e) {
            logger.error("Unable to initialize index.  Throwing Runtime Exception. ", e);
            throw new RuntimeException((Throwable) e);
        }
    }

    public void setConnector(Connector connector) {
    }

    public void destroy() {
    }

    public void purge(RdfCloudTripleStoreConfiguration rdfCloudTripleStoreConfiguration) {
    }

    public void dropAndDestroy() {
    }
}
