package org.languagetool.tokenizers.de;

import com.google.common.base.Suppliers;
import de.danielnaber.jwordsplitter.EmbeddedGermanDictionary;
import de.danielnaber.jwordsplitter.GermanWordSplitter;
import de.danielnaber.jwordsplitter.InputTooLongException;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.function.Supplier;
import org.languagetool.tokenizers.Tokenizer;

/* loaded from: input_file:org/languagetool/tokenizers/de/GermanCompoundTokenizer.class */
public class GermanCompoundTokenizer implements Tokenizer {
    private static final Supplier<GermanCompoundTokenizer> strictInstance = Suppliers.memoize(() -> {
        try {
            return new GermanCompoundTokenizer(true);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    });
    private static final Supplier<GermanCompoundTokenizer> nonStrictInstance = Suppliers.memoize(() -> {
        try {
            return new GermanCompoundTokenizer(false);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    });
    private final ExtendedGermanWordSplitter wordSplitter;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/languagetool/tokenizers/de/GermanCompoundTokenizer$ExtendedGermanWordSplitter.class */
    public static class ExtendedGermanWordSplitter extends GermanWordSplitter {
        ExtendedGermanWordSplitter(boolean z) throws IOException {
            super(z, extendedList());
        }

        static Set<String> extendedList() {
            ObjectOpenHashSet objectOpenHashSet = new ObjectOpenHashSet(EmbeddedGermanDictionary.getWords());
            objectOpenHashSet.add("influencer");
            objectOpenHashSet.add("katheterisierung");
            objectOpenHashSet.add("rücklass");
            objectOpenHashSet.add("abdichtung");
            objectOpenHashSet.add("laptop");
            objectOpenHashSet.add("verschattung");
            objectOpenHashSet.add("paradeiser");
            objectOpenHashSet.add("einreichung");
            objectOpenHashSet.add("bestatter");
            objectOpenHashSet.add("divergenz");
            objectOpenHashSet.add("schrumpf");
            objectOpenHashSet.add("degustation");
            objectOpenHashSet.add("schaft");
            objectOpenHashSet.add("abstreifer");
            objectOpenHashSet.add("aufputz");
            objectOpenHashSet.add("glühwürmchen");
            objectOpenHashSet.add("aufwertung");
            objectOpenHashSet.add("einhausung");
            objectOpenHashSet.add("lackier");
            objectOpenHashSet.add("zarge");
            objectOpenHashSet.add("pluralisierung");
            objectOpenHashSet.add("schanzen");
            objectOpenHashSet.add("abscheide");
            objectOpenHashSet.add("rangier");
            objectOpenHashSet.add("temporal");
            objectOpenHashSet.add("kartonage");
            objectOpenHashSet.add("kartonagen");
            objectOpenHashSet.add("rebellion");
            objectOpenHashSet.add("binokular");
            objectOpenHashSet.add("umverlegung");
            objectOpenHashSet.add("umhausung");
            objectOpenHashSet.add("überholung");
            objectOpenHashSet.add("chloroplasten");
            objectOpenHashSet.add("nachrangigkeit");
            objectOpenHashSet.add("spital");
            objectOpenHashSet.add("turnus");
            objectOpenHashSet.add("teilnehmenden");
            objectOpenHashSet.add("pensionisten");
            objectOpenHashSet.add("graduierten");
            objectOpenHashSet.add("beladung");
            objectOpenHashSet.add("controller");
            objectOpenHashSet.add("resilienz");
            objectOpenHashSet.add("mitführ");
            objectOpenHashSet.add("trauma");
            objectOpenHashSet.add("abtau");
            objectOpenHashSet.add("normung");
            objectOpenHashSet.add("mikroskopie");
            objectOpenHashSet.add("bitumen");
            objectOpenHashSet.add("erfolglosigkeit");
            objectOpenHashSet.add("pneumatik");
            objectOpenHashSet.add("anlasser");
            objectOpenHashSet.add("allozierung");
            objectOpenHashSet.add("alphabetisierung");
            objectOpenHashSet.add("aktuator");
            objectOpenHashSet.add("akademisierung");
            objectOpenHashSet.add("allergiker");
            objectOpenHashSet.add("queer");
            objectOpenHashSet.add("filament");
            objectOpenHashSet.add("querung");
            objectOpenHashSet.add("curling");
            objectOpenHashSet.add("opioid");
            objectOpenHashSet.add("booster");
            objectOpenHashSet.add("schmuse");
            objectOpenHashSet.add("thrombozyten");
            objectOpenHashSet.add("dysfunktion");
            objectOpenHashSet.add("storchen");
            objectOpenHashSet.add("nasch");
            objectOpenHashSet.add("esperanto");
            objectOpenHashSet.add("passivierung");
            objectOpenHashSet.add("radikalisierung");
            objectOpenHashSet.add("erleuchtung");
            objectOpenHashSet.add("verwalter");
            objectOpenHashSet.add("verbiss");
            objectOpenHashSet.add("ausleih");
            objectOpenHashSet.add("rutsch");
            objectOpenHashSet.add("kufen");
            objectOpenHashSet.add("entferner");
            objectOpenHashSet.add("debitoren");
            objectOpenHashSet.add("terrakotta");
            objectOpenHashSet.add("graffiti");
            objectOpenHashSet.add("auffahr");
            objectOpenHashSet.add("anmutung");
            objectOpenHashSet.add("kritzel");
            objectOpenHashSet.add("salami");
            objectOpenHashSet.add("eukalyptus");
            objectOpenHashSet.add("kreativ");
            objectOpenHashSet.add("hochvolt");
            objectOpenHashSet.add("trading");
            objectOpenHashSet.add("extraktion");
            objectOpenHashSet.add("verstetigung");
            objectOpenHashSet.add("diagonal");
            objectOpenHashSet.add("margen");
            objectOpenHashSet.add("synonym");
            objectOpenHashSet.add("aufbringung");
            objectOpenHashSet.add("robustheit");
            objectOpenHashSet.add("nachuntersuchung");
            objectOpenHashSet.add("erstkommunion");
            objectOpenHashSet.add("hauptstadt");
            objectOpenHashSet.add("neustart");
            objectOpenHashSet.add("polarisierung");
            objectOpenHashSet.add("vollstreckbarkeit");
            objectOpenHashSet.add("vollziehung");
            objectOpenHashSet.add("kasko");
            objectOpenHashSet.add("blitzableiter");
            objectOpenHashSet.add("abschattungen");
            objectOpenHashSet.add("kuscheltier");
            objectOpenHashSet.add("gastro");
            objectOpenHashSet.add("hortensien");
            objectOpenHashSet.trim();
            return objectOpenHashSet;
        }
    }

    public GermanCompoundTokenizer() throws IOException {
        this(true);
    }

    public GermanCompoundTokenizer(boolean z) throws IOException {
        this.wordSplitter = new ExtendedGermanWordSplitter(false);
        this.wordSplitter.setStrictMode(z);
        this.wordSplitter.setMinimumWordLength(3);
        this.wordSplitter.addException("Absolventen", Arrays.asList("Absolventen"));
        this.wordSplitter.addException("Acetat", Arrays.asList("Acetat"));
        this.wordSplitter.addException("Alkoholabstinenz", Arrays.asList("Alkohol", "abstinenz"));
        this.wordSplitter.addException("Androgen", Arrays.asList("Androgen"));
        this.wordSplitter.addException("Auberginen", Arrays.asList("Auberginen"));
        this.wordSplitter.addException("Auckland", Arrays.asList("Auckland"));
        this.wordSplitter.addException("Boston", Arrays.asList("Boston"));
        this.wordSplitter.addException("Brandenburg", Arrays.asList("Brandenburg"));
        this.wordSplitter.addException("Broadcast", Arrays.asList("Broadcast"));
        this.wordSplitter.addException("Buchsbaum", Arrays.asList("Buchsbaum"));
        this.wordSplitter.addException("Chiemsee", Arrays.asList("Chiemsee"));
        this.wordSplitter.addException("Coffein", Arrays.asList("Coffein"));
        this.wordSplitter.addException("Drohnen", Arrays.asList("Drohnen"));
        this.wordSplitter.addException("Eiben", Arrays.asList("Eiben"));
        this.wordSplitter.addException("Eingroschen", Arrays.asList("Eingroschen"));
        this.wordSplitter.addException("Einkomponenten", Arrays.asList("Einkomponenten"));
        this.wordSplitter.addException("Elster", Arrays.asList("Elster"));
        this.wordSplitter.addException("Engineering", Arrays.asList("Engineering"));
        this.wordSplitter.addException("Factoring", Arrays.asList("Factoring"));
        this.wordSplitter.addException("Flexodruck", Arrays.asList("Flexo", "druck"));
        this.wordSplitter.addException("Graviton", Arrays.asList("Graviton"));
        this.wordSplitter.addException("Göttinnen", Arrays.asList("Göttinnen"));
        this.wordSplitter.addException("Hallesche", Arrays.asList("Hallesche"));
        this.wordSplitter.addException("Hinspiel", Arrays.asList("Hinspiel"));
        this.wordSplitter.addException("Homogen", Arrays.asList("Homogen"));
        this.wordSplitter.addException("Kolleggen", Arrays.asList("Kolleggen"));
        this.wordSplitter.addException("Karstadt", Arrays.asList("Karstadt"));
        this.wordSplitter.addException("Kartier", Arrays.asList("Kartier"));
        this.wordSplitter.addException("Kaukasus", Arrays.asList("Kaukasus"));
        this.wordSplitter.addException("Knoblauch", Arrays.asList("Knoblauch"));
        this.wordSplitter.addException("Kollagen", Arrays.asList("Kollagen"));
        this.wordSplitter.addException("Kommerz", Arrays.asList("Kommerz"));
        this.wordSplitter.addException("Mentoring", Arrays.asList("Mentoring"));
        this.wordSplitter.addException("Monarchen", Arrays.asList("Monarchen"));
        this.wordSplitter.addException("Oligarchen", Arrays.asList("Oligarchen"));
        this.wordSplitter.addException("Optimal", Arrays.asList("Optimal"));
        this.wordSplitter.addException("Saunieren", Arrays.asList("Saunieren"));
        this.wordSplitter.addException("Schiessen", Arrays.asList("Schiessen"));
        this.wordSplitter.addException("Spielgeleier", Arrays.asList("Spielgeleier"));
        this.wordSplitter.addException("Halleschen", Arrays.asList("Halleschen"));
        this.wordSplitter.addException("Reinigungstab", Arrays.asList("Reinigungs", "tab"));
        this.wordSplitter.addException("Reinigungstabs", Arrays.asList("Reinigungs", "tabs"));
        this.wordSplitter.addException("Tauschwerte", Arrays.asList("Tausch", "werte"));
        this.wordSplitter.addException("Tauschwertes", Arrays.asList("Tausch", "wertes"));
        this.wordSplitter.addException("Kinderspielen", Arrays.asList("Kinder", "spielen"));
        this.wordSplitter.addException("Buchhaltungstrick", Arrays.asList("Buchhaltungs", "trick"));
        this.wordSplitter.addException("Buchhaltungstricks", Arrays.asList("Buchhaltungs", "tricks"));
        this.wordSplitter.addException("Haushaltstrick", Arrays.asList("Haushalts", "trick"));
        this.wordSplitter.addException("Haushaltstricks", Arrays.asList("Haushalts", "tricks"));
        this.wordSplitter.addException("Verkaufstrick", Arrays.asList("Verkaufs", "trick"));
        this.wordSplitter.addException("Verkaufstricks", Arrays.asList("Verkaufs", "tricks"));
        this.wordSplitter.addException("Ablenkungstrick", Arrays.asList("Ablenkungs", "trick"));
        this.wordSplitter.addException("Ablenkungstricks", Arrays.asList("Ablenkungs", "tricks"));
        this.wordSplitter.addException("Manipulationstrick", Arrays.asList("Manipulations", "trick"));
        this.wordSplitter.addException("Manipulationstricks", Arrays.asList("Manipulations", "tricks"));
        this.wordSplitter.addException("Erziehungstrick", Arrays.asList("Erziehungs", "trick"));
        this.wordSplitter.addException("Erziehungstricks", Arrays.asList("Erziehungs", "tricks"));
        this.wordSplitter.addException("Messetage", Arrays.asList("Messe", "tage"));
        this.wordSplitter.addException("Messetagen", Arrays.asList("Messe", "tagen"));
        this.wordSplitter.addException("karamelligen", Arrays.asList("karamelligen"));
        this.wordSplitter.addException("Häkelnadel", Arrays.asList("Häkel", "nadel"));
        this.wordSplitter.addException("Häkelnadeln", Arrays.asList("Häkel", "nadeln"));
        this.wordSplitter.addException("Freiberg", Arrays.asList("Freiberg"));
        this.wordSplitter.addException("Abtestat", Arrays.asList("Abtestat"));
        this.wordSplitter.addException("Abtestaten", Arrays.asList("Abtestaten"));
        this.wordSplitter.addException("Freibergs", Arrays.asList("Freibergs"));
        this.wordSplitter.addException("Kreuzberg", Arrays.asList("Kreuzberg"));
        this.wordSplitter.addException("Kreuzbergs", Arrays.asList("Kreuzbergs"));
        this.wordSplitter.addException("Digitalisierung", Arrays.asList("Digitalisierung"));
        this.wordSplitter.addException("Abtrocknung", Arrays.asList("Abtrocknung"));
        this.wordSplitter.addException("Erlösung", Arrays.asList("Erlösung"));
        this.wordSplitter.addException("Feuerung", Arrays.asList("Feuerung"));
        this.wordSplitter.addException("Aktivierung", Arrays.asList("Aktivierung"));
        this.wordSplitter.addException("Protokollierung", Arrays.asList("Protokollierung"));
        this.wordSplitter.addException("Budgetierung", Arrays.asList("Budgetierung"));
        this.wordSplitter.addException("Faltung", Arrays.asList("Faltung"));
        this.wordSplitter.addException("Anhäufung", Arrays.asList("Anhäufung"));
        this.wordSplitter.addException("Aufkohlung", Arrays.asList("Aufkohlung"));
        this.wordSplitter.addException("Festigung", Arrays.asList("Festigung"));
        this.wordSplitter.addException("Allerheiligen", Arrays.asList("Allerheiligen"));
        this.wordSplitter.addException("Druckerpressen", Arrays.asList("Drucker", "pressen"));
        this.wordSplitter.addException("Habitat", Arrays.asList("Habitat"));
        this.wordSplitter.addException("Augarten", Arrays.asList("Augarten"));
        this.wordSplitter.addException("Auszeit", Arrays.asList("Auszeit"));
        this.wordSplitter.addException("Bewegtbild", Arrays.asList("Bewegt", "bild"));
        this.wordSplitter.addException("Bigband", Arrays.asList("Bigband"));
        this.wordSplitter.addException("Bisexuelle", Arrays.asList("Bisexuelle"));
        this.wordSplitter.addException("Bisexuellen", Arrays.asList("Bisexuellen"));
        this.wordSplitter.addException("Bunsenbrenner", Arrays.asList("Bunsenbrenner"));
        this.wordSplitter.addException("Carbon", Arrays.asList("Carbon"));
        this.wordSplitter.addException("Carsharing", Arrays.asList("Carsharing"));
        this.wordSplitter.addException("Castor", Arrays.asList("Castor"));
        this.wordSplitter.addException("Catering", Arrays.asList("Catering"));
        this.wordSplitter.addException("Cholesterin", Arrays.asList("Cholesterin"));
        this.wordSplitter.addException("Damast", Arrays.asList("Damast"));
        this.wordSplitter.addException("Dispositiv", Arrays.asList("Dispositiv"));
        this.wordSplitter.addException("Emittent", Arrays.asList("Emittent"));
        this.wordSplitter.addException("Emittenten", Arrays.asList("Emittenten"));
        this.wordSplitter.addException("Express", Arrays.asList("Express"));
        this.wordSplitter.addException("Fairness", Arrays.asList("Fairness"));
        this.wordSplitter.addException("Fiberglas", Arrays.asList("Fiberglas"));
        this.wordSplitter.addException("Globus", Arrays.asList("Globus"));
        this.wordSplitter.addException("Göttinnen", Arrays.asList("Göttinnen"));
        this.wordSplitter.addException("Illustration", Arrays.asList("Illustration"));
        this.wordSplitter.addException("Muttertag", Arrays.asList("Muttertag"));
        this.wordSplitter.addException("Muttertags", Arrays.asList("Muttertags"));
        this.wordSplitter.addException("Patriarchen", Arrays.asList("Patriarchen"));
        this.wordSplitter.addException("Phosgen", Arrays.asList("Phosgen"));
        this.wordSplitter.addException("Vatertag", Arrays.asList("Vatertag"));
        this.wordSplitter.addException("Vatertags", Arrays.asList("Vatertags"));
        this.wordSplitter.addException("Vaterland", Arrays.asList("Vaterland"));
        this.wordSplitter.addException("Vaterlands", Arrays.asList("Vaterlands"));
        this.wordSplitter.addException("Wehrmacht", Arrays.asList("Wehrmacht"));
        this.wordSplitter.addException("Wehrmachts", Arrays.asList("Wehrmachts"));
    }

    public List<String> tokenize(String str) {
        try {
            return this.wordSplitter.splitWord(str);
        } catch (InputTooLongException e) {
            return Collections.singletonList(str);
        }
    }

    public static GermanCompoundTokenizer getStrictInstance() {
        return strictInstance.get();
    }

    public static GermanCompoundTokenizer getNonStrictInstance() {
        return nonStrictInstance.get();
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length == 0) {
            System.out.println("Usage: " + GermanCompoundTokenizer.class.getSimpleName() + " <wordsToSplit... or file>");
            System.exit(1);
        }
        GermanCompoundTokenizer germanCompoundTokenizer = new GermanCompoundTokenizer();
        if (new File(strArr[0]).exists()) {
            System.out.println("Working on lines from " + strArr[0] + ":");
            Iterator<String> it = Files.readAllLines(Paths.get(strArr[0], new String[0])).iterator();
            while (it.hasNext()) {
                System.out.println(germanCompoundTokenizer.tokenize(it.next()));
            }
            return;
        }
        for (String str : strArr) {
            System.out.println(germanCompoundTokenizer.tokenize(str));
        }
    }
}
