package org.apache.lucene.benchmark.byTask.feeds;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.CodingErrorAction;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.mahout.cf.taste.impl.model.jdbc.AbstractJDBCDataModel;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

/* loaded from: input_file:org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.class */
public class EnwikiContentSource extends ContentSource {
    private static final int TITLE = 0;
    private static final int DATE = 1;
    private static final int BODY = 2;
    private static final int ID = 3;
    private static final int LENGTH = 4;
    private static final int PAGE = 5;
    private File file;
    private InputStream is;
    private static final Map<String, Integer> ELEMENTS = new HashMap();
    private static final String[] months = {"JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"};
    private boolean keepImages = true;
    private Parser parser = new Parser();

    /* loaded from: input_file:org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource$Parser.class */
    private class Parser extends DefaultHandler implements Runnable {
        private Thread t;
        private boolean threadDone;
        private boolean stopped;
        private String[] tuple;
        private NoMoreDataException nmde;
        private StringBuilder contents;
        private String title;
        private String body;
        private String time;
        private String id;

        private Parser() {
            this.stopped = false;
            this.contents = new StringBuilder();
        }

        String[] next() throws NoMoreDataException {
            String[] strArr;
            if (this.t == null) {
                this.threadDone = false;
                this.t = new Thread(this);
                this.t.setDaemon(true);
                this.t.start();
            }
            synchronized (this) {
                while (this.tuple == null && this.nmde == null && !this.threadDone && !this.stopped) {
                    try {
                        wait();
                    } catch (InterruptedException e) {
                        throw new ThreadInterruptedException(e);
                    }
                }
                if (this.tuple == null) {
                    if (this.nmde == null) {
                        throw new NoMoreDataException();
                    }
                    this.t = null;
                    throw this.nmde;
                }
                strArr = this.tuple;
                this.tuple = null;
                notify();
            }
            return strArr;
        }

        String time(String str) {
            return str.substring(8, 10) + '-' + EnwikiContentSource.months[Integer.valueOf(str.substring(5, 7)).intValue() - 1] + '-' + str.substring(0, 4) + ' ' + str.substring(11, 19) + ".000";
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) {
            this.contents.append(cArr, i, i2);
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            switch (EnwikiContentSource.getElementType(str3)) {
                case 0:
                    this.title = this.contents.toString();
                    return;
                case 1:
                    this.time = time(this.contents.toString());
                    return;
                case 2:
                    this.body = this.contents.toString();
                    if (this.body.substring(0, Math.min(10, this.contents.length())).toLowerCase(Locale.ROOT).startsWith("#redirect")) {
                        this.body = null;
                        return;
                    }
                    return;
                case 3:
                    if (this.id == null) {
                        this.id = this.contents.toString();
                        return;
                    }
                    return;
                case 4:
                default:
                    return;
                case 5:
                    if (this.body != null) {
                        if (EnwikiContentSource.this.keepImages || !this.title.startsWith("Image:")) {
                            String[] strArr = {this.title.replace('\t', ' '), this.time.replace('\t', ' '), this.body.replaceAll("[\t\n]", " "), this.id};
                            synchronized (this) {
                                while (this.tuple != null && !this.stopped) {
                                    try {
                                        wait();
                                    } catch (InterruptedException e) {
                                        throw new ThreadInterruptedException(e);
                                    }
                                }
                                this.tuple = strArr;
                                notify();
                            }
                            return;
                        }
                        return;
                    }
                    return;
            }
        }

        @Override // java.lang.Runnable
        public void run() {
            try {
                try {
                    XMLReader createXMLReader = XMLReaderFactory.createXMLReader();
                    createXMLReader.setContentHandler(this);
                    createXMLReader.setErrorHandler(this);
                    while (!this.stopped) {
                        InputStream inputStream = EnwikiContentSource.this.is;
                        if (inputStream != null) {
                            try {
                                createXMLReader.parse(new InputSource(new BufferedReader(new InputStreamReader(inputStream, IOUtils.CHARSET_UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT)))));
                            } catch (IOException e) {
                                synchronized (EnwikiContentSource.this) {
                                    if (inputStream == EnwikiContentSource.this.is) {
                                        throw e;
                                    }
                                }
                            }
                        }
                        synchronized (this) {
                            if (this.stopped || !EnwikiContentSource.this.forever) {
                                this.nmde = new NoMoreDataException();
                                notify();
                                synchronized (this) {
                                    this.threadDone = true;
                                    notify();
                                }
                                return;
                            }
                            if (inputStream == EnwikiContentSource.this.is) {
                                EnwikiContentSource.this.is = EnwikiContentSource.this.openInputStream();
                            }
                        }
                    }
                    synchronized (this) {
                        this.threadDone = true;
                        notify();
                    }
                } catch (IOException e2) {
                    throw new RuntimeException(e2);
                } catch (SAXException e3) {
                    throw new RuntimeException(e3);
                }
            } catch (Throwable th) {
                synchronized (this) {
                    this.threadDone = true;
                    notify();
                    throw th;
                }
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) {
            switch (EnwikiContentSource.getElementType(str3)) {
                case 0:
                case 1:
                case 2:
                case 3:
                    this.contents.setLength(0);
                    return;
                case 4:
                default:
                    return;
                case 5:
                    this.title = null;
                    this.body = null;
                    this.time = null;
                    this.id = null;
                    return;
            }
        }

        /* JADX INFO: Access modifiers changed from: private */
        public void stop() {
            synchronized (this) {
                this.stopped = true;
                if (this.tuple != null) {
                    this.tuple = null;
                    notify();
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static final int getElementType(String str) {
        Integer num = ELEMENTS.get(str);
        if (num == null) {
            return -1;
        }
        return num.intValue();
    }

    @Override // org.apache.lucene.benchmark.byTask.feeds.ContentItemsSource, java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        synchronized (this) {
            this.parser.stop();
            if (this.is != null) {
                this.is.close();
                this.is = null;
            }
        }
    }

    @Override // org.apache.lucene.benchmark.byTask.feeds.ContentSource
    public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
        String[] next = this.parser.next();
        docData.clear();
        docData.setName(next[3]);
        docData.setBody(next[2]);
        docData.setDate(next[1]);
        docData.setTitle(next[0]);
        return docData;
    }

    @Override // org.apache.lucene.benchmark.byTask.feeds.ContentItemsSource
    public void resetInputs() throws IOException {
        super.resetInputs();
        this.is = openInputStream();
    }

    protected InputStream openInputStream() throws IOException {
        return StreamUtils.inputStream(this.file);
    }

    @Override // org.apache.lucene.benchmark.byTask.feeds.ContentItemsSource
    public void setConfig(Config config) {
        super.setConfig(config);
        this.keepImages = config.get("keep.image.only.docs", true);
        String str = config.get("docs.file", (String) null);
        if (str != null) {
            this.file = new File(str).getAbsoluteFile();
        }
    }

    static {
        ELEMENTS.put("page", 5);
        ELEMENTS.put("text", 2);
        ELEMENTS.put(AbstractJDBCDataModel.DEFAULT_PREFERENCE_TIME_COLUMN, 1);
        ELEMENTS.put("title", 0);
        ELEMENTS.put("id", 3);
    }
}
