package edu.ucla.sspace.mains;

import edu.ucla.sspace.common.ArgOptions;
import edu.ucla.sspace.common.SemanticSpace;
import edu.ucla.sspace.common.SemanticSpaceIO;
import edu.ucla.sspace.text.CorpusReader;
import edu.ucla.sspace.text.Document;
import edu.ucla.sspace.text.FileListDocumentIterator;
import edu.ucla.sspace.text.IteratorFactory;
import edu.ucla.sspace.text.OneLinePerDocumentIterator;
import edu.ucla.sspace.util.CombinedIterator;
import edu.ucla.sspace.util.LimitedIterator;
import edu.ucla.sspace.util.LoggerUtil;
import edu.ucla.sspace.util.ReflectionUtil;
import edu.ucla.sspace.util.WorkQueue;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;

/* loaded from: classes.dex */
public abstract class GenericMain {
    public static final String EXT = ".sspace";
    private static final Logger LOGGER = Logger.getLogger(GenericMain.class.getName());
    protected final ArgOptions argOptions;
    protected final boolean isMultiThreaded;
    protected boolean verbose;

    public GenericMain() {
        this(true);
    }

    public GenericMain(boolean z) {
        this.isMultiThreaded = z;
        this.argOptions = setupOptions();
        this.verbose = false;
    }

    protected static Set<String> loadValidTermSet(String str) throws IOException {
        HashSet hashSet = new HashSet();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return hashSet;
            }
            hashSet.add(readLine);
        }
    }

    protected void addCorpusReaderIterators(Collection<Iterator<Document>> collection, String[] strArr) throws IOException {
        CorpusReader corpusReader = (CorpusReader) ReflectionUtil.getObjectInstance(strArr[0]);
        for (int i = 1; i < strArr.length; i++) {
            collection.add(corpusReader.read(new File(strArr[0])));
        }
    }

    protected void addDocIterators(Collection<Iterator<Document>> collection, String[] strArr) throws IOException {
        for (String str : strArr) {
            collection.add(new OneLinePerDocumentIterator(str));
        }
    }

    protected void addExtraOptions(ArgOptions argOptions) {
    }

    protected void addFileIterators(Collection<Iterator<Document>> collection, String[] strArr) throws IOException {
        for (String str : strArr) {
            collection.add(new FileListDocumentIterator(str));
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getAlgorithmSpecifics() {
        return "";
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Iterator<Document> getDocumentIterator() throws IOException {
        LinkedList linkedList = new LinkedList();
        if (this.argOptions.hasOption('R')) {
            addCorpusReaderIterators(linkedList, this.argOptions.getStringOption('R').split(","));
        }
        if (this.argOptions.hasOption('f')) {
            addFileIterators(linkedList, this.argOptions.getStringOption('f').split(","));
        }
        if (this.argOptions.hasOption('d')) {
            addDocIterators(linkedList, this.argOptions.getStringOption('d').split(","));
        }
        if (linkedList.size() == 0) {
            throw new Error("Must specify document sources");
        }
        CombinedIterator combinedIterator = new CombinedIterator((Collection) linkedList);
        return this.argOptions.hasOption("docLimit") ? new LimitedIterator(combinedIterator, this.argOptions.getIntOption("docLimit")) : combinedIterator;
    }

    protected abstract SemanticSpace getSpace();

    protected SemanticSpaceIO.SSpaceFormat getSpaceFormat() {
        return SemanticSpaceIO.SSpaceFormat.TEXT;
    }

    protected void handleExtraOptions() {
    }

    protected void parseDocumentsMultiThreaded(final SemanticSpace semanticSpace, final Iterator<Document> it, int i) throws IOException, InterruptedException {
        new LinkedList();
        final AtomicInteger atomicInteger = new AtomicInteger(0);
        WorkQueue workQueue = WorkQueue.getWorkQueue(i);
        Object registerTaskGroup = workQueue.registerTaskGroup(i);
        long currentTimeMillis = System.currentTimeMillis();
        verbose("Beginning processing using %d threads", Integer.valueOf(i));
        for (int i2 = 0; i2 < i; i2++) {
            workQueue.add(registerTaskGroup, new Runnable() { // from class: edu.ucla.sspace.mains.GenericMain.1
                @Override // java.lang.Runnable
                public void run() {
                    while (it.hasNext()) {
                        long currentTimeMillis2 = System.currentTimeMillis();
                        Document document = (Document) it.next();
                        int incrementAndGet = atomicInteger.incrementAndGet();
                        try {
                            semanticSpace.processDocument(document.reader());
                        } catch (Throwable th) {
                            th.printStackTrace();
                        }
                        long currentTimeMillis3 = System.currentTimeMillis();
                        GenericMain genericMain = GenericMain.this;
                        double d = currentTimeMillis3 - currentTimeMillis2;
                        Double.isNaN(d);
                        genericMain.verbose("parsed document #%d in %.3f seconds", Integer.valueOf(incrementAndGet), Double.valueOf(d / 1000.0d));
                    }
                }
            });
        }
        workQueue.await(registerTaskGroup);
        double currentTimeMillis2 = System.currentTimeMillis() - currentTimeMillis;
        Double.isNaN(currentTimeMillis2);
        verbose("Processed all %d documents in %.3f total seconds", Integer.valueOf(atomicInteger.get()), Double.valueOf(currentTimeMillis2 / 1000.0d));
    }

    protected void parseDocumentsSingleThreaded(SemanticSpace semanticSpace, Iterator<Document> it) throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        int i = 0;
        while (it.hasNext()) {
            long currentTimeMillis2 = System.currentTimeMillis();
            i++;
            semanticSpace.processDocument(it.next().reader());
            double currentTimeMillis3 = System.currentTimeMillis() - currentTimeMillis2;
            Double.isNaN(currentTimeMillis3);
            verbose("processed document #%d in %.3f seconds", Integer.valueOf(i), Double.valueOf(currentTimeMillis3 / 1000.0d));
        }
        double currentTimeMillis4 = System.currentTimeMillis() - currentTimeMillis;
        Double.isNaN(currentTimeMillis4);
        verbose("Processed all %d documents in %.3f total seconds", Integer.valueOf(i), Double.valueOf(currentTimeMillis4 / 1000.0d));
    }

    protected void postProcessing() {
    }

    protected void processDocumentsAndSpace(SemanticSpace semanticSpace, Iterator<Document> it, int i, Properties properties) throws Exception {
        parseDocumentsMultiThreaded(semanticSpace, it, i);
        long currentTimeMillis = System.currentTimeMillis();
        semanticSpace.processSpace(properties);
        double currentTimeMillis2 = System.currentTimeMillis() - currentTimeMillis;
        Double.isNaN(currentTimeMillis2);
        verbose("processed space in %.3f seconds", Double.valueOf(currentTimeMillis2 / 1000.0d));
    }

    public void run(String[] strArr) throws Exception {
        if (strArr.length == 0) {
            usage();
            System.exit(1);
        }
        this.argOptions.parseOptions(strArr);
        if (this.argOptions.numPositionalArgs() == 0) {
            throw new IllegalArgumentException("must specify output path");
        }
        this.verbose = this.argOptions.hasOption('v') || this.argOptions.hasOption("verbose");
        if (this.verbose) {
            LoggerUtil.setLevel(Level.FINE);
        }
        int availableProcessors = this.isMultiThreaded ? Runtime.getRuntime().availableProcessors() : 1;
        if (this.argOptions.hasOption("threads")) {
            availableProcessors = this.argOptions.getIntOption("threads");
        }
        WorkQueue.getWorkQueue(availableProcessors);
        boolean booleanOption = this.argOptions.hasOption("overwrite") ? this.argOptions.getBooleanOption("overwrite") : true;
        handleExtraOptions();
        Properties properties = setupProperties();
        if (this.argOptions.hasOption("tokenFilter")) {
            properties.setProperty(IteratorFactory.TOKEN_FILTER_PROPERTY, this.argOptions.getStringOption("tokenFilter"));
        }
        if (this.argOptions.hasOption("stemmingAlgorithm")) {
            properties.setProperty(IteratorFactory.STEMMER_PROPERTY, this.argOptions.getStringOption("stemmingAlgorithm"));
        }
        if (this.argOptions.hasOption("compoundWords")) {
            properties.setProperty(IteratorFactory.COMPOUND_TOKENS_FILE_PROPERTY, this.argOptions.getStringOption("compoundWords"));
        }
        if (this.argOptions.hasOption("wordLimit")) {
            properties.setProperty(IteratorFactory.TOKEN_COUNT_LIMIT_PROPERTY, this.argOptions.getStringOption("wordLimit"));
        }
        IteratorFactory.setProperties(properties);
        SemanticSpace space = getSpace();
        processDocumentsAndSpace(space, getDocumentIterator(), availableProcessors, properties);
        File file = new File(this.argOptions.getPositionalArg(0));
        if (file.isDirectory()) {
            if (booleanOption) {
                file = new File(file, space.getSpaceName() + EXT);
            } else {
                file = File.createTempFile(space.getSpaceName(), EXT, file);
            }
        } else if (file.exists() && !booleanOption) {
            String name = file.getName();
            int lastIndexOf = name.lastIndexOf(".");
            String substring = (lastIndexOf >= 0 || lastIndexOf + 1 >= name.length()) ? name.substring(lastIndexOf) : "";
            String substring2 = name.substring(0, lastIndexOf);
            if (substring2.length() < 3) {
                substring2 = substring2 + Math.abs(Math.random() * 32767.0d * 10.0d);
            }
            File parentFile = file.getParentFile();
            if (parentFile == null) {
                parentFile = new File(".");
            }
            verbose("base dir: " + parentFile);
            file = File.createTempFile(substring2, substring, parentFile);
        }
        System.out.println("output File: " + file);
        long currentTimeMillis = System.currentTimeMillis();
        saveSSpace(space, file);
        double currentTimeMillis2 = (double) (System.currentTimeMillis() - currentTimeMillis);
        Double.isNaN(currentTimeMillis2);
        verbose("printed space in %.3f seconds", Double.valueOf(currentTimeMillis2 / 1000.0d));
        postProcessing();
    }

    protected void saveSSpace(SemanticSpace semanticSpace, File file) throws IOException {
        SemanticSpaceIO.save(semanticSpace, file, this.argOptions.hasOption("outputFormat") ? SemanticSpaceIO.SSpaceFormat.valueOf(this.argOptions.getStringOption("outputFormat").toUpperCase()) : getSpaceFormat());
    }

    protected ArgOptions setupOptions() {
        ArgOptions argOptions = new ArgOptions();
        argOptions.addOption('f', "fileList", "a list of document files", true, "FILE[,FILE...]", "Required (at least one of)");
        argOptions.addOption('d', "docFile", "a file where each line is a document", true, "FILE[,FILE...]", "Required (at least one of)");
        argOptions.addOption('R', "corpusReader", "Specifies a CorpusReader which will automatically parse the document files that are not in the formats expected by -f and -d.", true, "CLASSNAME,FILE[,FILE...]", "Required (at least one of)");
        argOptions.addOption('o', "outputFormat", "the .sspace format to use", true, "FORMAT", "Program Options");
        if (this.isMultiThreaded) {
            argOptions.addOption('t', "threads", "the number of threads to use", true, "INT", "Program Options");
        }
        argOptions.addOption('w', "overwrite", "specifies whether to overwrite the existing output", true, "BOOL", "Program Options");
        argOptions.addOption('v', "verbose", "prints verbose output", false, null, "Program Options");
        argOptions.addOption('Z', "stemmingAlgorithm", "specifices the stemming algorithm to use on tokens while iterating.  (default: none)", true, "CLASSNAME", "Tokenizing Options");
        argOptions.addOption('F', "tokenFilter", "filters to apply to the input token stream", true, "FILTER_SPEC", "Tokenizing Options");
        argOptions.addOption('C', "compoundWords", "a file where each line is a recognized compound word", true, "FILE", "Tokenizing Options");
        argOptions.addOption('z', "wordLimit", "Set the maximum number of words an document can return", true, "INT", "Tokenizing Options");
        addExtraOptions(argOptions);
        return argOptions;
    }

    protected Properties setupProperties() {
        return System.getProperties();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void usage() {
        String str;
        String algorithmSpecifics = getAlgorithmSpecifics();
        PrintStream printStream = System.out;
        StringBuilder sb = new StringBuilder();
        sb.append("usage: java ");
        sb.append(getClass().getName());
        sb.append(" [options] <output-dir>\n");
        sb.append(this.argOptions.prettyPrint());
        if (algorithmSpecifics.length() == 0) {
            str = "";
        } else {
            str = "\n" + algorithmSpecifics;
        }
        sb.append(str);
        sb.append("\n");
        sb.append(OptionDescriptions.COMPOUND_WORDS_DESCRIPTION);
        sb.append("\n\n");
        sb.append(OptionDescriptions.TOKEN_FILTER_DESCRIPTION);
        sb.append("\n\n");
        sb.append(OptionDescriptions.TOKEN_STEMMING_DESCRIPTION);
        sb.append("\n\n");
        sb.append(OptionDescriptions.FILE_FORMAT_DESCRIPTION);
        sb.append("\n\n");
        sb.append(OptionDescriptions.HELP_DESCRIPTION);
        printStream.println(sb.toString());
    }

    protected void verbose(String str) {
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.logp(Level.FINE, getClass().getName(), "verbose", str);
        }
    }

    protected void verbose(String str, Object... objArr) {
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.logp(Level.FINE, getClass().getName(), "verbose", String.format(str, objArr));
        }
    }
}
