package edu.ucla.sspace.tools;

import com.moms.lib_modules.cpi.Setting_SharePreferences;
import edu.ucla.sspace.common.ArgOptions;
import edu.ucla.sspace.text.IteratorFactory;
import edu.ucla.sspace.tri.OrderedTemporalRandomIndexing;
import edu.ucla.sspace.util.LoggerUtil;
import edu.ucla.sspace.util.TrieMap;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;

/* loaded from: classes.dex */
public class BigramExtractor {
    private static final Logger LOGGER = Logger.getLogger(BigramExtractor.class.getName());
    private final Map<Long, Number> bigramCounts;
    private int numBigramsInCorpus;
    private final Map<String, TokenStats> tokenCounts;
    private int tokenIndexCounter;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: edu.ucla.sspace.tools.BigramExtractor$1, reason: invalid class name */
    /* loaded from: classes.dex */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$edu$ucla$sspace$tools$BigramExtractor$SignificanceTest = new int[SignificanceTest.values().length];

        static {
            try {
                $SwitchMap$edu$ucla$sspace$tools$BigramExtractor$SignificanceTest[SignificanceTest.PMI.ordinal()] = 1;
            } catch (NoSuchFieldError unused) {
            }
            try {
                $SwitchMap$edu$ucla$sspace$tools$BigramExtractor$SignificanceTest[SignificanceTest.CHI_SQUARED.ordinal()] = 2;
            } catch (NoSuchFieldError unused2) {
            }
            try {
                $SwitchMap$edu$ucla$sspace$tools$BigramExtractor$SignificanceTest[SignificanceTest.LOG_LIKELIHOOD.ordinal()] = 3;
            } catch (NoSuchFieldError unused3) {
            }
        }
    }

    /* loaded from: classes.dex */
    public enum SignificanceTest {
        CHI_SQUARED,
        FISHERS_EXACT,
        BARNARDS,
        PMI,
        LOG_LIKELIHOOD
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes.dex */
    public static class TokenStats {
        public int index;
        public int count = 0;
        public int leftCount = 0;
        public int rightCount = 0;

        public TokenStats(int i) {
            this.index = i;
        }
    }

    public BigramExtractor() {
        this(1000);
    }

    public BigramExtractor(int i) {
        this.tokenCounts = new TrieMap();
        this.bigramCounts = new HashMap(i);
        this.tokenIndexCounter = 0;
        this.numBigramsInCorpus = 0;
    }

    private double chiSq(int[] iArr) {
        int i = iArr[0] + iArr[2];
        int i2 = iArr[1] + iArr[3];
        int i3 = iArr[0] + iArr[1];
        int i4 = iArr[2] + iArr[3];
        double d = i3 + i4;
        double d2 = i3;
        Double.isNaN(d2);
        Double.isNaN(d);
        double d3 = d2 / d;
        double d4 = i;
        Double.isNaN(d4);
        double d5 = d3 * d4;
        double d6 = i2;
        Double.isNaN(d6);
        double d7 = d3 * d6;
        double d8 = i4;
        Double.isNaN(d8);
        Double.isNaN(d);
        double d9 = d8 / d;
        Double.isNaN(d4);
        double d10 = d4 * d9;
        Double.isNaN(d6);
        double d11 = d9 * d6;
        double d12 = iArr[0];
        Double.isNaN(d12);
        double d13 = iArr[0];
        Double.isNaN(d13);
        double d14 = ((d12 - d5) * (d13 - d5)) / d5;
        double d15 = iArr[1];
        Double.isNaN(d15);
        double d16 = iArr[1];
        Double.isNaN(d16);
        double d17 = d14 + (((d15 - d7) * (d16 - d7)) / d7);
        double d18 = iArr[2];
        Double.isNaN(d18);
        double d19 = iArr[2];
        Double.isNaN(d19);
        double d20 = d17 + (((d18 - d10) * (d19 - d10)) / d10);
        double d21 = iArr[3];
        Double.isNaN(d21);
        double d22 = iArr[3];
        Double.isNaN(d22);
        return d20 + (((d21 - d11) * (d22 - d11)) / d11);
    }

    private boolean excludeToken(String str) {
        return str.equals("");
    }

    private int[] getContingencyTable(TokenStats tokenStats, TokenStats tokenStats2, int i) {
        int i2 = tokenStats.leftCount;
        int i3 = tokenStats2.rightCount - i;
        int i4 = i2 - i;
        return new int[]{i, i3, i4, this.numBigramsInCorpus - ((i3 + i4) + i)};
    }

    private double getScore(int[] iArr, SignificanceTest significanceTest) {
        int i = AnonymousClass1.$SwitchMap$edu$ucla$sspace$tools$BigramExtractor$SignificanceTest[significanceTest.ordinal()];
        if (i == 1) {
            return pmi(iArr);
        }
        if (i == 2) {
            return chiSq(iArr);
        }
        if (i == 3) {
            return logLikelihood(iArr);
        }
        throw new Error(significanceTest + " not implemented yet");
    }

    private TokenStats getStatsFor(String str) {
        TokenStats tokenStats = this.tokenCounts.get(str);
        if (tokenStats != null) {
            return tokenStats;
        }
        int i = this.tokenIndexCounter;
        this.tokenIndexCounter = i + 1;
        TokenStats tokenStats2 = new TokenStats(i);
        this.tokenCounts.put(str, tokenStats2);
        return tokenStats2;
    }

    private double logLikelihood(int[] iArr) {
        int i = iArr[0] + iArr[2];
        int i2 = iArr[1] + iArr[3];
        int i3 = iArr[0] + iArr[1];
        int i4 = iArr[2] + iArr[3];
        double d = i3 + i4;
        double d2 = i3;
        Double.isNaN(d2);
        Double.isNaN(d);
        double d3 = d2 / d;
        double d4 = i;
        Double.isNaN(d4);
        double d5 = d3 * d4;
        double d6 = i2;
        Double.isNaN(d6);
        double d7 = d3 * d6;
        double d8 = i4;
        Double.isNaN(d8);
        Double.isNaN(d);
        double d9 = d8 / d;
        Double.isNaN(d4);
        double d10 = d4 * d9;
        Double.isNaN(d6);
        double d11 = d9 * d6;
        double d12 = iArr[0];
        double d13 = iArr[0];
        Double.isNaN(d13);
        double log = Math.log(d13 - d5);
        Double.isNaN(d12);
        double d14 = d12 * log;
        double d15 = iArr[1];
        double d16 = iArr[1];
        Double.isNaN(d16);
        double log2 = Math.log(d16 - d7);
        Double.isNaN(d15);
        double d17 = d14 + (d15 * log2);
        double d18 = iArr[2];
        double d19 = iArr[2];
        Double.isNaN(d19);
        double log3 = Math.log(d19 - d10);
        Double.isNaN(d18);
        double d20 = d17 + (d18 * log3);
        double d21 = iArr[3];
        double d22 = iArr[3];
        Double.isNaN(d22);
        double log4 = Math.log(d22 - d11);
        Double.isNaN(d21);
        return (d20 + (d21 * log4)) * 2.0d;
    }

    public static void main(String[] strArr) {
        ArgOptions argOptions = new ArgOptions();
        argOptions.addOption('F', "tokenFilter", "filters to apply to the input token stream", true, "FILTER_SPEC", "Tokenizing Options");
        argOptions.addOption('M', "minFreq", "minimum frequency of the reported bigrams", true, "INT", "Bigram Options");
        argOptions.addOption('v', "verbose", "Print verbose output about counting status", false, null, "Program Options");
        argOptions.parseOptions(strArr);
        if (argOptions.numPositionalArgs() < 3) {
            System.out.println("usage: java BigramExtractor [options] <OutputFile> <SignificanceTest> <InputFile> [<InputFile>...]\n significance test options: " + SignificanceTest.values() + "\n" + argOptions.prettyPrint());
            return;
        }
        if (argOptions.hasOption("verbose")) {
            LoggerUtil.setLevel(Level.FINE);
        }
        Properties properties = System.getProperties();
        if (argOptions.hasOption("tokenFilter")) {
            properties.setProperty(IteratorFactory.TOKEN_FILTER_PROPERTY, argOptions.getStringOption("tokenFilter"));
        }
        IteratorFactory.setProperties(properties);
        try {
            BigramExtractor bigramExtractor = new BigramExtractor(1000000);
            SignificanceTest valueOf = SignificanceTest.valueOf(argOptions.getPositionalArg(1).toUpperCase());
            PrintWriter printWriter = new PrintWriter(argOptions.getPositionalArg(0));
            int numPositionalArgs = argOptions.numPositionalArgs();
            for (int i = 2; i < numPositionalArgs; i++) {
                String positionalArg = argOptions.getPositionalArg(i);
                BufferedReader bufferedReader = new BufferedReader(new FileReader(positionalArg));
                int i2 = 0;
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine != null) {
                        bigramExtractor.process(readLine);
                        i2++;
                        if (i2 % OrderedTemporalRandomIndexing.DEFAULT_VECTOR_LENGTH == 0) {
                            LOGGER.fine(positionalArg + ": processed document " + i2);
                        }
                    }
                }
                bufferedReader.close();
            }
            bigramExtractor.printBigrams(printWriter, valueOf, argOptions.hasOption("minFreq") ? argOptions.getIntOption("minFreq") : 0);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private double pmi(int[] iArr) {
        double d = iArr[0];
        int i = this.numBigramsInCorpus;
        double d2 = i;
        Double.isNaN(d);
        Double.isNaN(d2);
        double d3 = d / d2;
        double d4 = iArr[0] + iArr[2];
        double d5 = i;
        Double.isNaN(d4);
        Double.isNaN(d5);
        double d6 = d4 / d5;
        double d7 = iArr[0] + iArr[1];
        double d8 = i;
        Double.isNaN(d7);
        Double.isNaN(d8);
        return d3 / (d6 * (d7 / d8));
    }

    private void processBigram(String str, String str2) {
        TokenStats statsFor = getStatsFor(str);
        TokenStats statsFor2 = getStatsFor(str2);
        statsFor.count++;
        statsFor2.count++;
        statsFor.leftCount++;
        statsFor2.rightCount++;
        this.numBigramsInCorpus++;
        long j = statsFor2.index | (statsFor.index << 32);
        Number number = this.bigramCounts.get(Long.valueOf(j));
        int intValue = number != null ? 1 + number.intValue() : 1;
        this.bigramCounts.put(Long.valueOf(j), intValue < 127 ? Byte.valueOf((byte) intValue) : intValue < 32767 ? Short.valueOf((short) intValue) : Integer.valueOf(intValue));
    }

    public void printBigrams(PrintWriter printWriter, SignificanceTest significanceTest, int i) {
        String[] strArr = new String[this.tokenCounts.size()];
        for (Map.Entry<String, TokenStats> entry : this.tokenCounts.entrySet()) {
            strArr[entry.getValue().index] = entry.getKey().toString();
        }
        LOGGER.info("Number of bigrams: " + this.bigramCounts.size());
        for (Map.Entry<Long, Number> entry2 : this.bigramCounts.entrySet()) {
            long longValue = entry2.getKey().longValue();
            int i2 = (int) (longValue >>> 32);
            int i3 = (int) (longValue & 4294967295L);
            int intValue = entry2.getValue().intValue();
            TokenStats tokenStats = this.tokenCounts.get(strArr[i2]);
            TokenStats tokenStats2 = this.tokenCounts.get(strArr[i3]);
            if (tokenStats.count >= i && tokenStats2.count >= i) {
                printWriter.println(getScore(getContingencyTable(tokenStats, tokenStats2, intValue), significanceTest) + Setting_SharePreferences.YOIL_SPLIT + strArr[i2] + Setting_SharePreferences.YOIL_SPLIT + strArr[i3]);
            }
        }
    }

    public void process(BufferedReader bufferedReader) {
        process(IteratorFactory.tokenizeOrdered(bufferedReader));
    }

    public void process(String str) {
        process(IteratorFactory.tokenizeOrdered(str));
    }

    public void process(Iterator<String> it) {
        String next = it.hasNext() ? it.next() : null;
        while (it.hasNext()) {
            String next2 = it.next();
            if (!excludeToken(next) && !excludeToken(next2)) {
                processBigram(next, next2);
            }
            next = next2;
        }
    }
}
