package edu.ucla.sspace.text.corpora;

import edu.ucla.sspace.text.DirectoryCorpusReader;
import edu.ucla.sspace.text.Document;
import edu.ucla.sspace.text.DocumentPreprocessor;
import edu.ucla.sspace.text.StringDocument;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOError;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.Iterator;

/* loaded from: classes2.dex */
public class BloglinesCorpusReader extends DirectoryCorpusReader<Document> {

    /* loaded from: classes2.dex */
    public class BloglinesIterator extends DirectoryCorpusReader.BaseFileIterator {
        private BufferedReader bloglinesReader;

        public BloglinesIterator(Iterator<File> it) {
            super(it);
        }

        @Override // edu.ucla.sspace.text.DirectoryCorpusReader.BaseFileIterator
        protected Document advanceInDoc() {
            StringBuilder sb = null;
            boolean z = false;
            while (true) {
                try {
                    String readLine = this.bloglinesReader.readLine();
                    if (readLine == null) {
                        return null;
                    }
                    if (readLine.contains("<content>")) {
                        int indexOf = readLine.indexOf(">") + 1;
                        int lastIndexOf = readLine.lastIndexOf("<");
                        if (lastIndexOf > indexOf) {
                            return new StringDocument(cleanDoc(readLine.substring(indexOf, lastIndexOf)));
                        }
                        sb = new StringBuilder(readLine.substring(indexOf));
                        z = true;
                    } else {
                        if (readLine.contains("</content>")) {
                            sb.append(readLine.substring(0, readLine.lastIndexOf("<")));
                            return new StringDocument(cleanDoc(sb.toString()));
                        }
                        if (readLine.contains("<updated>") && sb != null) {
                            String substring = readLine.substring(readLine.indexOf(">") + 1, readLine.lastIndexOf("<"));
                            return new StringDocument(String.format("%d %s", Long.valueOf(substring.equals("") ? 0L : Timestamp.valueOf(substring).getTime()), cleanDoc(sb.toString())));
                        }
                        if (z && sb != null) {
                            sb.append(readLine);
                        }
                    }
                } catch (IOException e) {
                    throw new IOError(e);
                }
            }
        }

        @Override // edu.ucla.sspace.text.DirectoryCorpusReader.BaseFileIterator
        protected void setupCurrentDoc(File file) {
            try {
                this.bloglinesReader = new BufferedReader(new FileReader(file));
            } catch (IOException e) {
                throw new IOError(e);
            }
        }
    }

    public BloglinesCorpusReader() {
    }

    public BloglinesCorpusReader(DocumentPreprocessor documentPreprocessor) {
        super(documentPreprocessor);
    }

    @Override // edu.ucla.sspace.text.DirectoryCorpusReader
    protected Iterator<Document> corpusIterator(Iterator<File> it) {
        return new BloglinesIterator(it);
    }
}
