package org.imixs.workflow.documents.parser;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.util.List;
import java.util.logging.Logger;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

/* loaded from: input_file:org/imixs/workflow/documents/parser/DocumentCoreParser.class */
public class DocumentCoreParser {
    private static Logger logger = Logger.getLogger(DocumentCoreParser.class.getName());

    public static String parse(String str, List<?> list) throws IOException {
        String str2 = null;
        byte[] bArr = (byte[]) list.get(1);
        if (bArr.length > 0) {
            if (str.toLowerCase().endsWith(".pdf")) {
                long currentTimeMillis = System.currentTimeMillis();
                logger.fine("parsing pdf document '" + str + "'.....");
                str2 = parsePDF(str, bArr);
                logger.fine("parsing pdf document completed in " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
            }
            if (str.toLowerCase().endsWith(".doc") || str.toLowerCase().endsWith(".docx")) {
                long currentTimeMillis2 = System.currentTimeMillis();
                logger.fine("parsing MS Office document '" + str + "'.....");
                str2 = parseMSDOC(str, bArr);
                logger.fine("parsing MS Office document completed in " + (System.currentTimeMillis() - currentTimeMillis2) + "ms");
            }
        }
        return str2;
    }

    private static String parsePDF(String str, byte[] bArr) throws IOException {
        PDFTextStripper pDFTextStripper = null;
        PDFParser pDFParser = new PDFParser(new RandomAccessBuffer(bArr));
        pDFParser.parse();
        PDDocument pDDocument = pDFParser.getPDDocument();
        try {
            try {
                StringWriter stringWriter = new StringWriter();
                if (0 == 0) {
                    pDFTextStripper = new PDFTextStripper();
                }
                pDFTextStripper.writeText(pDDocument, stringWriter);
                String stringBuffer = stringWriter.getBuffer().toString();
                logger.fine("length of parsed file=" + stringBuffer.length());
                logger.fine(stringBuffer);
                if (pDDocument != null) {
                    pDDocument.close();
                }
                return stringBuffer;
            } catch (InvalidPasswordException e) {
                throw new IOException("Error: The document is encrypted and will not be indexed.", e);
            }
        } catch (Throwable th) {
            if (pDDocument != null) {
                pDDocument.close();
            }
            throw th;
        }
    }

    private static String parseMSDOC(String str, byte[] bArr) throws IOException {
        String str2 = null;
        try {
            if (str.endsWith(".xls")) {
                str2 = new ExcelExtractor((POIFSFileSystem) null).getText();
            } else if (str.endsWith(".ppt")) {
                str2 = new PowerPointExtractor((POIFSFileSystem) null).getText();
            } else if (str.endsWith(".doc")) {
                WordExtractor wordExtractor = new WordExtractor(new HWPFDocument(new POIFSFileSystem(new ByteArrayInputStream(bArr))));
                str2 = wordExtractor.getText();
                wordExtractor.close();
            } else if (str.endsWith(".docx")) {
                XWPFWordExtractor xWPFWordExtractor = new XWPFWordExtractor(new XWPFDocument(new ByteArrayInputStream(bArr)));
                str2 = xWPFWordExtractor.getText();
                xWPFWordExtractor.close();
            }
        } catch (Exception e) {
            System.out.println("document file cant be indexed");
        }
        logger.fine("length of parsed file=" + str2.length());
        logger.fine(str2);
        return str2;
    }
}
