package de.metanome.algorithms.tireless.algorithm;

import de.metanome.algorithms.tireless.preprocessing.AlgorithmConfiguration;
import de.metanome.algorithms.tireless.preprocessing.alphabet.Alphabet;
import de.metanome.algorithms.tireless.regularexpression.RegularExpressionComparator;
import de.metanome.algorithms.tireless.regularexpression.containerclasses.ExpressionType;
import de.metanome.algorithms.tireless.regularexpression.containerclasses.RegularExpression;
import de.metanome.algorithms.tireless.regularexpression.containerclasses.RegularExpressionCharacterClass;
import de.metanome.algorithms.tireless.regularexpression.containerclasses.RegularExpressionConjunction;
import de.metanome.algorithms.tireless.regularexpression.containerclasses.RegularExpressionDisjunctionOfTokens;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;

/* loaded from: input_file:de/metanome/algorithms/tireless/algorithm/RecursiveSubgroupDisjunctionAlgorithm.class */
public class RecursiveSubgroupDisjunctionAlgorithm {
    private final Map<String, Integer> rawContent;
    private final Alphabet alphabet;
    private final Map<Character, Alphabet> charMap;
    private final AlgorithmConfiguration configuration;
    private int minimalOccurrenceThreshold = 0;

    public RecursiveSubgroupDisjunctionAlgorithm(Map<String, Integer> map, Alphabet alphabet, AlgorithmConfiguration algorithmConfiguration) {
        this.rawContent = map;
        this.alphabet = alphabet;
        this.charMap = alphabet.getCharMap();
        this.configuration = algorithmConfiguration;
    }

    public RegularExpressionConjunction computeExpression() {
        RegularExpressionDisjunctionOfTokens regularExpressionDisjunctionOfTokens = new RegularExpressionDisjunctionOfTokens(this.rawContent);
        regularExpressionDisjunctionOfTokens.addAppearanceCountAlphabet(this.alphabet, regularExpressionDisjunctionOfTokens.getAppearanceCount());
        this.minimalOccurrenceThreshold = (int) Math.floor(this.configuration.OUTLIER_THRESHOLD * regularExpressionDisjunctionOfTokens.getAppearanceCount());
        if (regularExpressionDisjunctionOfTokens.getLength() == 0) {
            return null;
        }
        return computeExpression(1, regularExpressionDisjunctionOfTokens);
    }

    protected RegularExpressionConjunction computeExpression(int i, final RegularExpressionDisjunctionOfTokens regularExpressionDisjunctionOfTokens) {
        if (regularExpressionDisjunctionOfTokens.getLength() < this.configuration.DISJUNCTION_MERGING_THRESHOLD) {
            return new RegularExpressionConjunction() { // from class: de.metanome.algorithms.tireless.algorithm.RecursiveSubgroupDisjunctionAlgorithm.1
                {
                    addChild(regularExpressionDisjunctionOfTokens);
                }
            };
        }
        RegularExpressionConjunction alignExpressions = alignExpressions(i, regularExpressionDisjunctionOfTokens);
        if (i < this.alphabet.getDepth()) {
            new OutlierDetection(regularExpressionDisjunctionOfTokens, alignExpressions, this.minimalOccurrenceThreshold).detectAndRemoveOutliers();
        }
        return lengthDeviationExceedsThreshold(regularExpressionDisjunctionOfTokens, alignExpressions) ? makeCharacterClass(regularExpressionDisjunctionOfTokens) : refineChildren(i, alignExpressions);
    }

    protected RegularExpressionConjunction refineChildren(int i, RegularExpressionConjunction regularExpressionConjunction) {
        RegularExpressionConjunction regularExpressionConjunction2 = new RegularExpressionConjunction();
        for (RegularExpression regularExpression : regularExpressionConjunction.getChildren()) {
            if (regularExpression.getExpressionType() != ExpressionType.DISJUNCTION_OF_TOKENS || i >= this.alphabet.getDepth()) {
                regularExpressionConjunction2.addChild(regularExpression);
            } else {
                regularExpressionConjunction2.addChildren(computeExpression(i + 1, (RegularExpressionDisjunctionOfTokens) regularExpression).getChildren());
            }
        }
        return regularExpressionConjunction2;
    }

    protected RegularExpressionConjunction makeCharacterClass(final RegularExpressionDisjunctionOfTokens regularExpressionDisjunctionOfTokens) {
        final RegularExpressionCharacterClass regularExpressionCharacterClass = new RegularExpressionCharacterClass() { // from class: de.metanome.algorithms.tireless.algorithm.RecursiveSubgroupDisjunctionAlgorithm.2
            {
                setCharacters(regularExpressionDisjunctionOfTokens.getRepresentation());
                setMinCount(regularExpressionDisjunctionOfTokens.getRepresentationMinCount());
                setMaxCount(regularExpressionDisjunctionOfTokens.getRepresentationMaxCount());
            }
        };
        return new RegularExpressionConjunction() { // from class: de.metanome.algorithms.tireless.algorithm.RecursiveSubgroupDisjunctionAlgorithm.3
            {
                addChild(regularExpressionCharacterClass);
            }
        };
    }

    protected boolean lengthDeviationExceedsThreshold(RegularExpressionDisjunctionOfTokens regularExpressionDisjunctionOfTokens, RegularExpressionConjunction regularExpressionConjunction) {
        return (((((double) regularExpressionConjunction.getRepresentationMinCount()) * this.configuration.MAXIMUM_LENGTH_DEVIATION_FACTOR) > ((double) regularExpressionDisjunctionOfTokens.getRepresentationMinCount()) ? 1 : ((((double) regularExpressionConjunction.getRepresentationMinCount()) * this.configuration.MAXIMUM_LENGTH_DEVIATION_FACTOR) == ((double) regularExpressionDisjunctionOfTokens.getRepresentationMinCount()) ? 0 : -1)) < 0) || (((((double) regularExpressionConjunction.getRepresentationMaxCount()) / this.configuration.MAXIMUM_LENGTH_DEVIATION_FACTOR) > ((double) regularExpressionDisjunctionOfTokens.getRepresentationMaxCount()) ? 1 : ((((double) regularExpressionConjunction.getRepresentationMaxCount()) / this.configuration.MAXIMUM_LENGTH_DEVIATION_FACTOR) == ((double) regularExpressionDisjunctionOfTokens.getRepresentationMaxCount()) ? 0 : -1)) > 0);
    }

    protected RegularExpressionConjunction alignExpressions(int i, RegularExpressionDisjunctionOfTokens regularExpressionDisjunctionOfTokens) {
        List<RegularExpressionConjunction> splitTokensToConjunction = splitTokensToConjunction(i, regularExpressionDisjunctionOfTokens);
        splitTokensToConjunction.sort(new RegularExpressionComparator());
        RegularExpressionConjunction runAlignment = runAlignment(splitTokensToConjunction);
        for (RegularExpression regularExpression : runAlignment.getChildren()) {
            regularExpression.setMinCount(regularExpression.getMinCount() * regularExpressionDisjunctionOfTokens.getMinCount());
        }
        return runAlignment;
    }

    protected List<RegularExpressionConjunction> splitTokensToConjunction(int i, RegularExpressionDisjunctionOfTokens regularExpressionDisjunctionOfTokens) {
        ArrayList arrayList = new ArrayList();
        for (String str : regularExpressionDisjunctionOfTokens.getChildren().keySet()) {
            arrayList.add(i >= this.alphabet.getDepth() ? new RegularExpressionConjunction(str.toCharArray()) : new RegularExpressionConjunction(str.toCharArray(), this.charMap, i, regularExpressionDisjunctionOfTokens.getChildren().get(str).intValue()));
        }
        return arrayList;
    }

    protected RegularExpressionConjunction runAlignment(List<RegularExpressionConjunction> list) {
        AtomicReference atomicReference = new AtomicReference(list.get(0));
        list.stream().skip(1L).forEachOrdered(regularExpressionConjunction -> {
            atomicReference.set(new NeedlemanWunschAlignmentPunishElongation((RegularExpressionConjunction) atomicReference.get(), regularExpressionConjunction).mergeExpressions());
        });
        return (RegularExpressionConjunction) atomicReference.get();
    }
}
