package nlp4j.crawler;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.lang.invoke.MethodHandles;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import nlp4j.Document;
import nlp4j.impl.DefaultDocument;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/* loaded from: input_file:nlp4j/crawler/CsvFileCrawler.class */
public class CsvFileCrawler extends AbstractFileCrawler implements Crawler {
    private static final Logger logger = LogManager.getLogger(MethodHandles.lookup().lookupClass());

    public CsvFileCrawler() {
        this.prop.setProperty("target", "text");
    }

    @Override // nlp4j.crawler.AbstractCrawler, nlp4j.crawler.Crawler
    public List<Document> crawlDocuments() {
        ArrayList arrayList = new ArrayList();
        if (this.prop.getProperty("target") == null) {
            logger.warn("target is not set.");
            return arrayList;
        }
        Iterator<File> it = this.files.iterator();
        while (it.hasNext()) {
            try {
                FileInputStream fileInputStream = new FileInputStream(it.next());
                try {
                    arrayList.addAll(parseDocuments(fileInputStream));
                    fileInputStream.close();
                } catch (Throwable th) {
                    try {
                        fileInputStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                    throw th;
                    break;
                }
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e2) {
                e2.printStackTrace();
            }
        }
        return arrayList;
    }

    public List<Document> crawlDocuments(InputStream inputStream) throws IOException {
        return parseDocuments(inputStream);
    }

    private List<Document> parseDocuments(InputStream inputStream) throws IOException {
        ArrayList arrayList = new ArrayList();
        CSVParser parse = CSVParser.parse(inputStream, Charset.forName(this.encoding), CSVFormat.EXCEL.withFirstRecordAsHeader());
        String[] strArr = (String[]) parse.getHeaderMap().keySet().toArray(new String[0]);
        for (CSVRecord cSVRecord : parse.getRecords()) {
            DefaultDocument defaultDocument = new DefaultDocument();
            for (int i = 0; i < cSVRecord.size(); i++) {
                defaultDocument.putAttribute(strArr[i], cSVRecord.get(i));
            }
            arrayList.add(defaultDocument);
        }
        return arrayList;
    }
}
