package org.imixs.archive.ocr;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import javax.ejb.Stateless;
import javax.inject.Inject;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.eclipse.microprofile.config.inject.ConfigProperty;
import org.imixs.workflow.FileData;
import org.imixs.workflow.ItemCollection;
import org.imixs.workflow.exceptions.PluginException;

@Stateless
/* loaded from: input_file:org/imixs/archive/ocr/OCRService.class */
public class OCRService {
    public static final String DEFAULT_ENCODING = "UTF-8";
    public static final String PLUGIN_ERROR = "PLUGIN_ERROR";
    public static final String ENV_TIKA_SERVICE_ENDPOINT = "tika.service.endpoint";
    public static final String ENV_TIKA_SERVICE_MODE = "tika.service.mode";
    public static final String ENV_TIKA_OCR_MODE = "tika.ocr.mode";
    private static Logger logger = Logger.getLogger(OCRService.class.getName());

    @Inject
    @ConfigProperty(name = ENV_TIKA_SERVICE_ENDPOINT, defaultValue = "")
    String serviceEndpoint;

    @Inject
    @ConfigProperty(name = ENV_TIKA_SERVICE_MODE, defaultValue = "auto")
    String serviceMode;

    @Inject
    @ConfigProperty(name = ENV_TIKA_OCR_MODE, defaultValue = "PDF_AND_OCR")
    String ocrMode;

    public void extractText(ItemCollection itemCollection, ItemCollection itemCollection2) throws PluginException {
        extractText(itemCollection, itemCollection2, this.ocrMode, null);
    }

    public void extractText(ItemCollection itemCollection, ItemCollection itemCollection2, String str, List<String> list) throws PluginException {
        FileData fetchOriginFileData;
        if (str != null) {
            this.ocrMode = str;
        }
        long currentTimeMillis = System.currentTimeMillis();
        for (FileData fileData : itemCollection.getFileData()) {
            if (!hasOCRContent(fileData) && (fetchOriginFileData = fetchOriginFileData(fileData, itemCollection2)) != null) {
                String str2 = null;
                try {
                    logger.info("...text extraction '" + fetchOriginFileData.getName() + "'...");
                    if (isPDF(fetchOriginFileData)) {
                        if ("OCR_ONLY".equals(this.ocrMode)) {
                            logger.info("...force orc scan for pdfs...");
                            str2 = doORCProcessing(fetchOriginFileData, list);
                        } else {
                            str2 = doPDFTextExtraction(fetchOriginFileData);
                            if (str2 != null && str2.length() < 16) {
                                str2 = null;
                            }
                            if (str2 == null && "MIXED".equals(this.ocrMode)) {
                                str2 = doORCProcessing(fetchOriginFileData, list);
                            }
                        }
                    } else if (!"PDF_ONLY".equals(this.ocrMode)) {
                        str2 = doORCProcessing(fetchOriginFileData, list);
                    }
                    if (str2 == null) {
                        logger.warning("Unable to extract ocr-content for '" + fileData.getName() + "'");
                        str2 = "";
                    }
                    ArrayList arrayList = new ArrayList();
                    arrayList.add(str2);
                    fileData.setAttribute("text", arrayList);
                } catch (IOException e) {
                    throw new PluginException(OCRService.class.getSimpleName(), PLUGIN_ERROR, "Unable to scan attached document '" + fileData.getName() + "'", e);
                }
            }
        }
        logger.fine("...extracted textual information in " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
    }

    private boolean hasOCRContent(FileData fileData) {
        List list;
        return (fileData == null || (list = (List) fileData.getAttribute("text")) == null || list.size() <= 0 || list.get(0) == null) ? false : true;
    }

    private FileData fetchOriginFileData(FileData fileData, ItemCollection itemCollection) {
        FileData fileData2;
        byte[] content;
        byte[] content2 = fileData.getContent();
        if (content2 != null && content2.length > 1) {
            return fileData;
        }
        if (itemCollection != null && (fileData2 = itemCollection.getFileData(fileData.getName())) != null && (content = fileData2.getContent()) != null && content.length > 1) {
            return fileData2;
        }
        logger.warning("no content found for fileData '" + fileData.getName() + "'!");
        return null;
    }

    public String doORCProcessing(FileData fileData, List<String> list) throws IOException {
        if (this.serviceEndpoint == null || this.serviceEndpoint.isEmpty()) {
            return null;
        }
        logger.info("...ocr scanning....");
        String adaptContentType = adaptContentType(fileData);
        if (!acceptContentType(adaptContentType)) {
            logger.fine("contentType '" + adaptContentType + " is not supported by Tika Server");
            return null;
        }
        PrintWriter printWriter = null;
        try {
            HttpURLConnection httpURLConnection = (HttpURLConnection) new URL(this.serviceEndpoint).openConnection();
            httpURLConnection.setRequestMethod("PUT");
            httpURLConnection.setDoOutput(true);
            httpURLConnection.setDoInput(true);
            httpURLConnection.setAllowUserInteraction(false);
            httpURLConnection.setRequestProperty("Content-Type", adaptContentType + "; charset=" + DEFAULT_ENCODING);
            httpURLConnection.setRequestProperty("Accept", "text/plain");
            if (list != null && list.size() > 0) {
                for (String str : list) {
                    int indexOf = str.indexOf("=");
                    if (indexOf > -1) {
                        String substring = str.substring(0, indexOf);
                        String substring2 = str.substring(indexOf + 1);
                        if (substring.startsWith("X-Tika")) {
                            httpURLConnection.setRequestProperty(substring, substring2);
                        } else {
                            logger.warning("Invalid tika option : '" + str + "'  key must start with 'X-Tika'");
                        }
                    } else {
                        logger.warning("Invalid tika option : '" + str + "'  character '=' expreced!");
                    }
                }
            }
            httpURLConnection.setRequestProperty("Content-Length", "" + Integer.valueOf(fileData.getContent().length));
            OutputStream outputStream = httpURLConnection.getOutputStream();
            PrintWriter printWriter2 = new PrintWriter((Writer) new OutputStreamWriter(outputStream, DEFAULT_ENCODING), true);
            outputStream.write(fileData.getContent());
            printWriter2.flush();
            int responseCode = httpURLConnection.getResponseCode();
            if (responseCode < 200 || responseCode > 299) {
                return null;
            }
            String readResponse = readResponse(httpURLConnection, DEFAULT_ENCODING);
            if (0 != 0) {
                printWriter.close();
            }
            return readResponse;
        } finally {
            if (0 != 0) {
                printWriter.close();
            }
        }
    }

    public String doPDFTextExtraction(FileData fileData) {
        logger.info("...pdf text extraction....");
        PDDocument pDDocument = null;
        String str = null;
        try {
            try {
                pDDocument = PDDocument.load(fileData.getContent());
                str = new PDFTextStripper().getText(pDDocument);
                logger.finest("<RESULT>" + str + "</RESULT>");
                pDDocument.close();
                if (pDDocument != null) {
                    try {
                        pDDocument.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            } catch (IOException e2) {
                logger.warning("unable to load pdf : " + e2.getMessage());
                if (pDDocument != null) {
                    try {
                        pDDocument.close();
                    } catch (IOException e3) {
                        e3.printStackTrace();
                    }
                }
            }
            return str;
        } catch (Throwable th) {
            if (pDDocument != null) {
                try {
                    pDDocument.close();
                } catch (IOException e4) {
                    e4.printStackTrace();
                }
            }
            throw th;
        }
    }

    /* JADX WARN: Finally extract failed */
    private String readResponse(URLConnection uRLConnection, String str) throws IOException {
        logger.finest("......readResponse....");
        StringWriter stringWriter = new StringWriter();
        BufferedReader bufferedReader = null;
        try {
            try {
                String contentEncoding = uRLConnection.getContentEncoding();
                if ((contentEncoding == null || contentEncoding.isEmpty()) && str != null && !str.isEmpty()) {
                    contentEncoding = str;
                }
                bufferedReader = (contentEncoding == null || contentEncoding.isEmpty()) ? new BufferedReader(new InputStreamReader(uRLConnection.getInputStream())) : new BufferedReader(new InputStreamReader(uRLConnection.getInputStream(), contentEncoding));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    logger.finest("......" + readLine);
                    stringWriter.write(readLine + "\n");
                }
                if (bufferedReader != null) {
                    bufferedReader.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
                if (bufferedReader != null) {
                    bufferedReader.close();
                }
            }
            return stringWriter.toString();
        } catch (Throwable th) {
            if (bufferedReader != null) {
                bufferedReader.close();
            }
            throw th;
        }
    }

    private boolean acceptContentType(String str) {
        return (str == null || str.isEmpty() || "application/octet-stream".equalsIgnoreCase(str)) ? false : true;
    }

    private String adaptContentType(FileData fileData) {
        String contentType = fileData.getContentType();
        if (contentType == null || contentType.isEmpty() || "*/*".equals(contentType)) {
            contentType = fileData.getName().toLowerCase().endsWith(".pdf") ? "application/pdf" : "application/xml";
        }
        return contentType;
    }

    private boolean isPDF(FileData fileData) {
        return fileData.getName().toLowerCase().endsWith(".pdf") || fileData.getContentType().contains("pdf");
    }
}
