package edu.ucla.sspace.text;

import com.moms.lib_modules.cpi.Setting_SharePreferences;
import edu.ucla.sspace.text.DirectoryCorpusReader;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOError;
import java.io.IOException;
import java.util.GregorianCalendar;
import java.util.Iterator;

/* loaded from: classes2.dex */
public class UsenetCorpusReader extends DirectoryCorpusReader<Document> {
    private static final String END_OF_DOCUMENT = "---END.OF.DOCUMENT---";
    private final boolean useTimestamps;

    /* loaded from: classes2.dex */
    public class InnerIterator extends DirectoryCorpusReader.BaseFileIterator {
        private long curDocTimestamp;
        private BufferedReader usenetReader;

        public InnerIterator(Iterator<File> it) {
            super(it);
        }

        @Override // edu.ucla.sspace.text.DirectoryCorpusReader.BaseFileIterator
        protected Document advanceInDoc() {
            StringBuilder sb = new StringBuilder();
            if (UsenetCorpusReader.this.useTimestamps) {
                sb.append(this.curDocTimestamp);
                sb.append(Setting_SharePreferences.YOIL_SPLIT);
            }
            while (true) {
                try {
                    String readLine = this.usenetReader.readLine();
                    if (readLine == null) {
                        return null;
                    }
                    if (readLine.contains(UsenetCorpusReader.END_OF_DOCUMENT)) {
                        return new StringDocument(cleanDoc(sb.toString()));
                    }
                    int i = 0;
                    char charAt = readLine.charAt(0);
                    while (i < readLine.length() && (charAt == '>' || charAt == ' ')) {
                        i++;
                        charAt = readLine.charAt(i);
                    }
                    sb.append(readLine.substring(i));
                    sb.append(Setting_SharePreferences.YOIL_SPLIT);
                } catch (IOException e) {
                    throw new IOError(e);
                }
            }
        }

        @Override // edu.ucla.sspace.text.DirectoryCorpusReader.BaseFileIterator
        protected void setupCurrentDoc(File file) {
            try {
                this.usenetReader = new BufferedReader(new FileReader(file));
                String str = file.getName().split("\\.")[r6.length - 2];
                this.curDocTimestamp = new GregorianCalendar(Integer.parseInt(str.substring(0, 4)), Integer.parseInt(str.substring(4, 6)), Integer.parseInt(str.substring(6, 8))).getTimeInMillis();
            } catch (IOException e) {
                throw new IOError(e);
            }
        }
    }

    public UsenetCorpusReader() {
        this(false);
    }

    public UsenetCorpusReader(boolean z) {
        this.useTimestamps = z;
    }

    @Override // edu.ucla.sspace.text.DirectoryCorpusReader
    protected Iterator<Document> corpusIterator(Iterator<File> it) {
        return new InnerIterator(it);
    }
}
