package edu.ucla.sspace.text;

import edu.ucla.sspace.dependency.DependencyExtractor;
import edu.ucla.sspace.dependency.WaCKyDependencyExtractor;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOError;
import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;

/* loaded from: classes.dex */
public class PukWaCDocumentIterator implements Iterator<LabeledParsedDocument> {
    private static final DependencyExtractor extractor = new WaCKyDependencyExtractor();
    private String currentSource;
    private final BufferedReader documentsReader;
    private LabeledParsedDocument nextDoc;

    public PukWaCDocumentIterator(String str) {
        try {
            this.documentsReader = new BufferedReader(new FileReader(str));
            advance();
        } catch (IOException e) {
            throw new IOError(e);
        }
    }

    private void advance() throws IOException {
        int i;
        int indexOf;
        this.nextDoc = null;
        StringBuilder sb = new StringBuilder();
        loop0: while (true) {
            String readLine = this.documentsReader.readLine();
            if (readLine == null) {
                return;
            }
            if (readLine.contains("<text")) {
                int indexOf2 = readLine.indexOf("\"");
                if (indexOf2 >= 0 && (indexOf = readLine.indexOf("\"", (i = indexOf2 + 1))) >= 0) {
                    this.currentSource = readLine.substring(i, indexOf);
                }
            } else if (readLine.equals("<s>")) {
                while (true) {
                    String readLine2 = this.documentsReader.readLine();
                    if (readLine2 == null || readLine2.equals("</s>")) {
                        break loop0;
                    }
                    if (readLine2.contains("<text")) {
                        int indexOf3 = readLine2.indexOf("\"") + 1;
                        int indexOf4 = readLine2.indexOf("\"", indexOf3);
                        if (indexOf4 < 0) {
                            sb.setLength(0);
                            break;
                        }
                        this.currentSource = readLine2.substring(indexOf3, indexOf4);
                    } else if (!readLine2.contains("</text>")) {
                        sb.append(readLine2);
                        sb.append("\n");
                    }
                }
            } else {
                continue;
            }
        }
        this.nextDoc = new LabeledParsedStringDocument(this.currentSource, extractor, sb.toString());
    }

    @Override // java.util.Iterator
    public boolean hasNext() {
        return this.nextDoc != null;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.Iterator
    public LabeledParsedDocument next() {
        LabeledParsedDocument labeledParsedDocument = this.nextDoc;
        if (labeledParsedDocument == null) {
            throw new NoSuchElementException("No further documents");
        }
        while (true) {
            try {
                advance();
                return labeledParsedDocument;
            } catch (IOException e) {
                throw new IOError(e);
            } catch (Exception unused) {
            }
        }
    }

    @Override // java.util.Iterator
    public void remove() {
        throw new UnsupportedOperationException("removing documents is not supported");
    }
}
