package de.julielab.jcore.reader.db;

import de.julielab.jcore.types.ext.DBProcessingMetaData;
import de.julielab.xmlData.dataBase.DBCIterator;
import de.julielab.xmlData.dataBase.DataBaseConnector;
import de.julielab.xmlData.dataBase.util.TableSchemaMismatchException;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name = "JCoRe Database Reader", description = "A collection reader that fetches documents from a PostgreSQL database. It is an abstract class and must be extended to actually populate CAS instances. It works with the corpus storage system (CoStoSys), thus subset tables may be specified to read from in the 'Table' parameter. Then, the reader will mark batches of IDs read from the subset as being 'in process', allowing multiple DBReaders in different pipelines to be synchronized and not read documents multiple times. Additional tables can be specified that will be joined to the main document database. This is used to load annotations that have been stored in separate tables. The jcore-xmi-db-writer is able to write such annotation tables and the jcore-xmi-db-reader implements the assembly of such distributed annotation data. All mentioned components are part of the Jena Document Information System, JeDIS, for document annotation and management.", vendor = "JULIE Lab Jena, Germany", copyright = "JULIE Lab Jena, Germany")
/* loaded from: input_file:de/julielab/jcore/reader/db/DBReader.class */
public abstract class DBReader extends DBSubsetReader {
    private static final Logger log = LoggerFactory.getLogger(DBReader.class);

    @ConfigurationParameter(name = "Timestamp", mandatory = false, description = "PostgreSQL timestamp expression that is evaluated against the data table. The data table schema, which must be the active data table schema in the CoStoSys configuration as always, must specify a single timestamp field for this parameter to work. Only data rows with a timestamp value larger than the given timestamp expression will be processed. Note that when reading from a subset table, there may be subset rows indicated to be in process which are finally not read from the data table. This is an implementational shortcoming and might be addressed if respective feature requests are given through the JULIE Lab GitHub page or JCoRe issues.")
    protected String dataTimestamp;
    private RetrievingThread retriever;
    private DBCIterator<byte[][]> xmlBytes;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:de/julielab/jcore/reader/db/DBReader$RetrievingThread.class */
    public class RetrievingThread extends Thread {
        private List<Object[]> ids;
        private DBCIterator<byte[][]> documents;

        public RetrievingThread() {
            if (DBReader.this.fetchIdsProactively.booleanValue()) {
                DBReader.log.debug("Fetching new documents in a background thread.");
                start();
            }
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            try {
                this.ids = DBReader.this.dbc.retrieveAndMark(DBReader.this.tableName, DBReader.this.getReaderComponentName(), DBReader.this.hostName, DBReader.this.pid, Math.min(DBReader.this.batchSize, DBReader.this.totalDocumentCount - DBReader.this.numberFetchedDocIDs), DBReader.this.selectionOrder);
                if (DBReader.log.isTraceEnabled()) {
                    ArrayList arrayList = new ArrayList();
                    for (Object[] objArr : this.ids) {
                        ArrayList arrayList2 = new ArrayList();
                        for (Object obj : objArr) {
                            arrayList2.add(String.valueOf(obj));
                        }
                        arrayList.add(StringUtils.join(arrayList2, "-"));
                    }
                    DBReader.log.trace("Reserved the following document IDs for processing: " + arrayList);
                }
                DBReader.this.numberFetchedDocIDs += this.ids.size();
                DBReader.log.debug("Retrieved {} document IDs to fetch from the database.", Integer.valueOf(this.ids.size()));
                if (this.ids.size() <= 0) {
                    DBReader.log.debug("No unfetched documents left.");
                    this.documents = new DBCIterator<byte[][]>() { // from class: de.julielab.jcore.reader.db.DBReader.RetrievingThread.1
                        public boolean hasNext() {
                            return false;
                        }

                        /* renamed from: next, reason: merged with bridge method [inline-methods] */
                        public byte[][] m2next() {
                            return (byte[][]) null;
                        }

                        public void remove() {
                        }

                        public void close() {
                        }
                    };
                    return;
                }
                DBReader.log.debug("Fetching {} documents from the database.", Integer.valueOf(this.ids.size()));
                if (DBReader.this.dataTimestamp != null) {
                    DBReader.log.trace("Fetching data from data table {} that is newer than timestamp {}", DBReader.this.dataTable, DBReader.this.dataTimestamp);
                    this.documents = DBReader.this.dbc.queryWithTime(this.ids, DBReader.this.dataTable, DBReader.this.dataTimestamp);
                } else if (DBReader.this.joinTables) {
                    DBReader.log.trace("Fetching data by joining tables {}. The used table schemas are {}.", DBReader.this.tables, DBReader.this.schemas);
                    this.documents = DBReader.this.dbc.retrieveColumnsByTableSchema(this.ids, DBReader.this.tables, DBReader.this.schemas);
                } else {
                    DBReader.log.trace("Fetching data from the data table {} without additional tables.", DBReader.this.dataTable);
                    this.documents = DBReader.this.dbc.retrieveColumnsByTableSchema(this.ids, DBReader.this.dataTable);
                }
            } catch (TableSchemaMismatchException e) {
                DBReader.log.error("Table schema mismatch: The active table schema {} specified in the CoStoSys configuration file {} does not match the columns in the subset table {}: {}", new Object[]{DBReader.this.dbc.getActiveTableSchema(), DBReader.this.costosysConfig, DBReader.this.tableName, e.getMessage()});
                throw new IllegalArgumentException((Throwable) e);
            }
        }

        public DBCIterator<byte[][]> getDocuments() {
            if (!DBReader.this.fetchIdsProactively.booleanValue()) {
                DBReader.log.debug("Fetching new documents (without employing a background thread).");
                run();
            }
            try {
                DBReader.log.debug("Waiting for the background thread to finish fetching documents to return them.");
                join();
                return this.documents;
            } catch (InterruptedException e) {
                e.printStackTrace();
                return null;
            }
        }
    }

    @Override // de.julielab.jcore.reader.db.DBSubsetReader, de.julielab.jcore.reader.db.DBReaderBase
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.dataTimestamp = (String) getConfigParameterValue("Timestamp");
        if (this.readDataTable.booleanValue() && this.hasNext) {
            log.debug("Querying data table {} with schema {} and where condition {}", new Object[]{this.tableName, this.dbc.getActiveTableSchema(), this.whereCondition});
            this.xmlBytes = this.dbc.queryDataTable(this.tableName, this.whereCondition);
        }
    }

    public boolean hasNext() throws IOException, CollectionException {
        return this.hasNext;
    }

    public byte[][] getNextArtifactData() throws CollectionException {
        log.trace("Fetching next document from the current database batch");
        byte[][] nextFromDataTable = this.readDataTable.booleanValue() ? getNextFromDataTable() : getNextFromSubset();
        if (nextFromDataTable != null) {
            this.processedDocuments++;
        }
        return nextFromDataTable;
    }

    private byte[][] getNextFromDataTable() {
        this.hasNext = false;
        byte[][] bArr = (byte[][]) this.xmlBytes.next();
        if (this.processedDocuments < this.totalDocumentCount - 1) {
            this.hasNext = this.xmlBytes.hasNext();
        }
        return bArr;
    }

    private byte[][] getNextFromSubset() {
        log.trace("Reading in subset table mode.");
        byte[][] bArr = (byte[][]) null;
        if (this.retriever == null) {
            log.trace("Creating new RetrievingThread for fetching the first document batch");
            this.retriever = new RetrievingThread();
            this.xmlBytes = this.retriever.getDocuments();
            if (this.fetchIdsProactively.booleanValue()) {
                log.trace("Creating background RetrievingThread to immediately fetch the next document batch");
                this.retriever = new RetrievingThread();
            }
        }
        if (this.xmlBytes.hasNext()) {
            log.debug("Returning next document.");
            bArr = (byte[][]) this.xmlBytes.next();
        }
        if (!this.xmlBytes.hasNext()) {
            this.xmlBytes = this.retriever.getDocuments();
            if (!this.xmlBytes.hasNext()) {
                log.debug("No more documents, settings 'hasNext' to false.");
                this.hasNext = false;
            } else if (this.fetchIdsProactively.booleanValue()) {
                log.trace("Creating background RetrievingThread to immediately fetch the next document batch");
                this.retriever = new RetrievingThread();
            }
        }
        return bArr;
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.processedDocuments, this.totalDocumentCount, "entities", true)};
    }

    public void close() {
        if (this.xmlBytes != null) {
            this.xmlBytes.close();
        }
        this.dbc.close();
        this.dbc = null;
    }

    public static String setDBProcessingMetaData(DataBaseConnector dataBaseConnector, boolean z, String str, byte[][] bArr, JCas jCas) {
        JCasUtil.select(jCas, DBProcessingMetaData.class).forEach(dBProcessingMetaData -> {
            dBProcessingMetaData.removeFromIndexes();
        });
        DBProcessingMetaData dBProcessingMetaData2 = new DBProcessingMetaData(jCas);
        List primaryKeyIndices = dataBaseConnector.getPrimaryKeyIndices();
        StringArray stringArray = new StringArray(jCas, primaryKeyIndices.size());
        for (int i = 0; i < primaryKeyIndices.size(); i++) {
            stringArray.set(i, new String(bArr[((Integer) primaryKeyIndices.get(i)).intValue()], Charset.forName("UTF-8")));
        }
        if (log.isDebugEnabled()) {
            log.debug("Setting primary key to {}", Arrays.toString(stringArray.toArray()));
        }
        dBProcessingMetaData2.setPrimaryKey(stringArray);
        if (!z) {
            dBProcessingMetaData2.setSubsetTable(str.contains(".") ? str : dataBaseConnector.getActivePGSchema() + "." + str);
        }
        dBProcessingMetaData2.addToIndexes();
        return null;
    }

    protected abstract String getReaderComponentName();
}
