package edu.ucla.sspace.clustering;

import edu.ucla.sspace.clustering.criterion.CriterionFunction;
import edu.ucla.sspace.matrix.ClutoSparseMatrixBuilder;
import edu.ucla.sspace.matrix.Matrices;
import edu.ucla.sspace.matrix.Matrix;
import edu.ucla.sspace.matrix.RowMagnitudeTransform;
import edu.ucla.sspace.matrix.SparseMatrix;
import edu.ucla.sspace.matrix.TfIdfDocStripedTransform;
import edu.ucla.sspace.util.ReflectionUtil;
import edu.ucla.sspace.vector.CompactSparseVector;
import edu.ucla.sspace.vector.SparseDoubleVector;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Properties;
import java.util.Random;
import java.util.Set;
import java.util.logging.Logger;

/* loaded from: classes.dex */
public class GapStatistic implements Clustering {
    private static final String DEFAULT_METHOD = "edu.ucla.sspace.clustering.criterion.H2Function";
    private static final String DEFAULT_NUM_CLUSTERS_END = "10";
    private static final String DEFAULT_NUM_CLUSTERS_START = "1";
    private static final String DEFAULT_NUM_REFERENCE_DATA_SETS = "5";
    public static final String METHOD_PROPERTY = "edu.ucla.sspace.clustering.GapStatistic.method";
    public static final String NUM_CLUSTERS_START = "edu.ucla.sspace.clustering.GapStatistic.numClusterStart";
    public static final String NUM_REFERENCE_DATA_SETS = "edu.ucla.sspace.clustering.GapStatistic.numReferenceDataSets";
    public static final String PROPERTY_PREFIX = "edu.ucla.sspace.clustering.GapStatistic";
    private static final Logger LOGGER = Logger.getLogger(GapStatistic.class.getName());
    private static final Random random = new Random();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes.dex */
    public class ReferenceDataGenerator {
        private final double averageNumValuesPerRow;
        private final double[] maxValues;
        private final double[] minValues;
        private Set<Integer> nonZeroFeatures = new HashSet();
        private final int rows;
        private final double stdevNumValuesPerRow;

        public ReferenceDataGenerator(Matrix matrix) {
            double d;
            this.rows = matrix.rows();
            this.minValues = new double[matrix.columns()];
            this.maxValues = new double[matrix.columns()];
            int[] iArr = new int[matrix.rows()];
            if (matrix instanceof SparseMatrix) {
                SparseMatrix sparseMatrix = (SparseMatrix) matrix;
                d = 0.0d;
                for (int i = 0; i < matrix.rows(); i++) {
                    SparseDoubleVector rowVector = sparseMatrix.getRowVector(i);
                    int[] nonZeroIndices = rowVector.getNonZeroIndices();
                    iArr[i] = iArr[i] + nonZeroIndices.length;
                    double length = nonZeroIndices.length;
                    Double.isNaN(length);
                    d += length;
                    for (int i2 : nonZeroIndices) {
                        this.nonZeroFeatures.add(Integer.valueOf(i2));
                        double d2 = rowVector.get(i2);
                        double[] dArr = this.minValues;
                        if (d2 < dArr[i2]) {
                            dArr[i2] = d2;
                        }
                        double[] dArr2 = this.maxValues;
                        if (d2 > dArr2[i2]) {
                            dArr2[i2] = d2;
                        }
                    }
                }
            } else {
                d = 0.0d;
                for (int i3 = 0; i3 < matrix.rows(); i3++) {
                    for (int i4 = 0; i4 < matrix.columns(); i4++) {
                        double d3 = matrix.get(i3, i4);
                        double[] dArr3 = this.minValues;
                        if (d3 < dArr3[i4]) {
                            dArr3[i4] = d3;
                        }
                        double[] dArr4 = this.maxValues;
                        if (d3 > dArr4[i4]) {
                            dArr4[i4] = d3;
                        }
                        if (d3 != 0.0d) {
                            iArr[i3] = iArr[i3] + 1;
                            d += 1.0d;
                            this.nonZeroFeatures.add(Integer.valueOf(i4));
                        }
                    }
                }
            }
            double rows = matrix.rows();
            Double.isNaN(rows);
            this.averageNumValuesPerRow = d / rows;
            double d4 = 0.0d;
            for (int i5 : iArr) {
                double d5 = this.averageNumValuesPerRow;
                double d6 = i5;
                Double.isNaN(d6);
                d4 += Math.pow(d5 - d6, 2.0d);
            }
            double rows2 = matrix.rows();
            Double.isNaN(rows2);
            this.stdevNumValuesPerRow = Math.sqrt(d4 / rows2);
        }

        private int getNonZeroColumn() {
            int nextInt;
            do {
                nextInt = GapStatistic.random.nextInt(this.minValues.length);
            } while (!this.nonZeroFeatures.contains(Integer.valueOf(nextInt)));
            return nextInt;
        }

        public Matrix generateTestData() {
            GapStatistic.this.verbose("Generating a new reference set");
            ArrayList arrayList = new ArrayList();
            new ClutoSparseMatrixBuilder();
            for (int i = 0; i < this.rows; i++) {
                CompactSparseVector compactSparseVector = new CompactSparseVector(this.minValues.length);
                int nextGaussian = (int) ((GapStatistic.random.nextGaussian() * this.stdevNumValuesPerRow) + this.averageNumValuesPerRow);
                if (nextGaussian == 0) {
                    nextGaussian++;
                }
                for (int i2 = 0; i2 < nextGaussian; i2++) {
                    int nonZeroColumn = getNonZeroColumn();
                    double nextDouble = GapStatistic.random.nextDouble();
                    double d = this.maxValues[nonZeroColumn];
                    double[] dArr = this.minValues;
                    compactSparseVector.set(nonZeroColumn, (nextDouble * (d - dArr[nonZeroColumn])) + dArr[nonZeroColumn]);
                }
                arrayList.add(compactSparseVector);
            }
            return Matrices.asSparseMatrix(arrayList);
        }
    }

    private void clusterIteration(int i, int i2, String str, Matrix matrix, Matrix[] matrixArr, double[] dArr, double[] dArr2, Assignments[] assignmentsArr) {
        int i3 = i + i2;
        double length = matrixArr.length;
        CriterionFunction criterionFunction = (CriterionFunction) ReflectionUtil.getObjectInstance(str);
        verbose("Clustering reference data for %d clusters\n", Integer.valueOf(i3));
        double[] dArr3 = new double[matrixArr.length];
        double d = 0.0d;
        double d2 = 0.0d;
        for (int i4 = 0; i4 < matrixArr.length; i4++) {
            verbose("Clustering reference data %d \n", Integer.valueOf(i4));
            DirectClustering.cluster(matrixArr[i4], i3, 1, criterionFunction);
            dArr3[i4] = Math.log(criterionFunction.score());
            d2 += dArr3[i4];
        }
        Double.isNaN(length);
        double d3 = d2 / length;
        int length2 = dArr3.length;
        int i5 = 0;
        while (i5 < length2) {
            d += Math.pow(dArr3[i5] - d3, 2.0d);
            i5++;
            dArr3 = dArr3;
        }
        Double.isNaN(length);
        double sqrt = Math.sqrt(d / length);
        Double.isNaN(length);
        double sqrt2 = sqrt * Math.sqrt((1.0d / length) + 1.0d);
        verbose("Clustering original data for %d clusters\n", Integer.valueOf(i3));
        Assignments cluster = DirectClustering.cluster(matrix, i3, 1, criterionFunction);
        double log = Math.log(criterionFunction.score());
        verbose("Completed iteration with referenceScore: %f, gap:%f\n", Double.valueOf(d3), Double.valueOf(log));
        double d4 = d3 - log;
        System.out.printf("k: %d gap: %f std: %f\n", Integer.valueOf(i), Double.valueOf(d4), Double.valueOf(sqrt2));
        dArr[i] = d4;
        dArr2[i] = sqrt2;
        assignmentsArr[i] = cluster;
    }

    @Override // edu.ucla.sspace.clustering.Clustering
    public Assignments cluster(Matrix matrix, int i, Properties properties) {
        int parseInt = Integer.parseInt(properties.getProperty(NUM_CLUSTERS_START, "1"));
        int parseInt2 = Integer.parseInt(properties.getProperty(NUM_REFERENCE_DATA_SETS, DEFAULT_NUM_REFERENCE_DATA_SETS));
        int i2 = i - parseInt;
        String property = properties.getProperty(METHOD_PROPERTY, DEFAULT_METHOD);
        verbose("Transforming the original data set");
        TfIdfDocStripedTransform tfIdfDocStripedTransform = new TfIdfDocStripedTransform();
        RowMagnitudeTransform rowMagnitudeTransform = new RowMagnitudeTransform();
        Matrix transform = rowMagnitudeTransform.transform(tfIdfDocStripedTransform.transform(matrix));
        verbose("Generating the reference data set");
        ReferenceDataGenerator referenceDataGenerator = new ReferenceDataGenerator(transform);
        Matrix[] matrixArr = new Matrix[parseInt2];
        for (int i3 = 0; i3 < parseInt2; i3++) {
            matrixArr[i3] = rowMagnitudeTransform.transform(tfIdfDocStripedTransform.transform(referenceDataGenerator.generateTestData()));
        }
        double[] dArr = new double[i2];
        double[] dArr2 = new double[i2];
        Assignments[] assignmentsArr = new Assignments[i2];
        Assignments assignments = null;
        double d = Double.NEGATIVE_INFINITY;
        int i4 = 0;
        while (i4 < i2) {
            int i5 = i4;
            Assignments[] assignmentsArr2 = assignmentsArr;
            double[] dArr3 = dArr2;
            clusterIteration(i4, parseInt, property, transform, matrixArr, dArr, dArr2, assignmentsArr2);
            if (d >= dArr[i5] - dArr3[i5]) {
                break;
            }
            d = dArr[i5];
            assignments = assignmentsArr2[i5];
            i4 = i5 + 1;
            assignmentsArr = assignmentsArr2;
            dArr2 = dArr3;
        }
        return assignments;
    }

    @Override // edu.ucla.sspace.clustering.Clustering
    public Assignments cluster(Matrix matrix, Properties properties) {
        return cluster(matrix, Integer.MAX_VALUE, properties);
    }

    protected void verbose(String str) {
        LOGGER.fine(str);
    }

    protected void verbose(String str, Object... objArr) {
        LOGGER.fine(String.format(str, objArr));
    }
}
