package org.opencms.search.extractors;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.StringWriter;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.opencms.file.CmsPropertyDefinition;
import org.opencms.jsp.search.config.parser.simplesearch.CmsConfigParserUtils;
import org.opencms.loader.CmsRedirectLoader;
import org.opencms.main.I_CmsEventListener;
import org.opencms.ui.components.CmsExtendedSiteSelector;
import org.opencms.util.CmsFileUtil;
import org.opencms.util.CmsStringUtil;

/* loaded from: input_file:org/opencms/search/extractors/A_CmsTextExtractor.class */
public abstract class A_CmsTextExtractor implements I_CmsTextExtractor {
    @Override // org.opencms.search.extractors.I_CmsTextExtractor
    public I_CmsExtractionResult extractText(byte[] bArr) throws Exception {
        return extractText(new ByteArrayInputStream(bArr));
    }

    @Override // org.opencms.search.extractors.I_CmsTextExtractor
    public I_CmsExtractionResult extractText(byte[] bArr, String str) throws Exception {
        return extractText(new ByteArrayInputStream(bArr), str);
    }

    @Override // org.opencms.search.extractors.I_CmsTextExtractor
    public I_CmsExtractionResult extractText(InputStream inputStream) throws Exception {
        return extractText(inputStream, (String) null);
    }

    @Override // org.opencms.search.extractors.I_CmsTextExtractor
    public I_CmsExtractionResult extractText(InputStream inputStream, String str) throws Exception {
        return extractText(CmsFileUtil.readFully(inputStream), str);
    }

    protected void combineContentItem(String str, String str2, StringBuffer stringBuffer, Map<String, String> map) {
        if (CmsStringUtil.isNotEmpty(str)) {
            map.put(str2, str);
            stringBuffer.append('\n');
            stringBuffer.append(str);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public CmsExtractionResult extractText(InputStream inputStream, Parser parser) throws Exception {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        StringWriter stringWriter = new StringWriter();
        BodyContentHandler bodyContentHandler = new BodyContentHandler(stringWriter);
        Metadata metadata = new Metadata();
        parser.parse(inputStream, bodyContentHandler, metadata, new ParseContext());
        inputStream.close();
        String stringWriter2 = stringWriter.toString();
        StringBuffer stringBuffer = new StringBuffer(stringWriter2);
        if (CmsStringUtil.isNotEmpty(stringWriter2)) {
            linkedHashMap.put(I_CmsExtractionResult.ITEM_RAW, stringWriter2);
        }
        combineContentItem(metadata.get(DublinCore.TITLE), "title", stringBuffer, linkedHashMap);
        combineContentItem(metadata.get(CmsPropertyDefinition.PROPERTY_KEYWORDS), "keywords", stringBuffer, linkedHashMap);
        String str = metadata.get("subject");
        if (StringUtils.isBlank(str)) {
            str = metadata.get(DublinCore.SUBJECT);
        }
        combineContentItem(str, "subject", stringBuffer, linkedHashMap);
        combineContentItem(metadata.get("Author"), I_CmsExtractionResult.ITEM_AUTHOR, stringBuffer, linkedHashMap);
        String str2 = metadata.get("xmp:CreatorTool");
        if (StringUtils.isBlank(str2)) {
            str2 = metadata.get(DublinCore.CREATOR);
        }
        if (StringUtils.isBlank(str2)) {
            str2 = metadata.get("creator");
        }
        combineContentItem(str2, "creator", stringBuffer, linkedHashMap);
        combineContentItem(metadata.get(CmsConfigParserUtils.N_CATEGORY), "category", stringBuffer, linkedHashMap);
        combineContentItem(metadata.get("Comments"), I_CmsExtractionResult.ITEM_COMMENTS, stringBuffer, linkedHashMap);
        String str3 = metadata.get(OfficeOpenXMLExtended.COMPANY);
        if (StringUtils.isBlank(str3)) {
            str3 = metadata.get("Company");
        }
        combineContentItem(str3, I_CmsExtractionResult.ITEM_COMPANY, stringBuffer, linkedHashMap);
        combineContentItem(metadata.get("Manager"), "manager", stringBuffer, linkedHashMap);
        combineContentItem(metadata.get(I_CmsExtractionResult.ITEM_PRODUCER), I_CmsExtractionResult.ITEM_PRODUCER, stringBuffer, linkedHashMap);
        return new CmsExtractionResult(stringBuffer.toString(), linkedHashMap);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String removeControlChars(String str) {
        if (CmsStringUtil.isEmptyOrWhitespaceOnly(str)) {
            return "";
        }
        char[] charArray = str.toCharArray();
        StringBuffer stringBuffer = new StringBuffer(charArray.length);
        boolean z = false;
        for (char c : charArray) {
            switch (Character.getType(c)) {
                case 0:
                case 6:
                case 7:
                case 8:
                case 10:
                case 11:
                case 14:
                case 15:
                case 16:
                case I_CmsEventListener.EVENT_CLEAR_ONLINE_CACHES /* 17 */:
                case I_CmsEventListener.EVENT_PROJECT_MODIFIED /* 18 */:
                case I_CmsEventListener.EVENT_UPDATE_EXPORTS /* 19 */:
                case 25:
                case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED /* 27 */:
                case I_CmsEventListener.EVENT_PROPERTY_DEFINITION_CREATED /* 28 */:
                default:
                    if (z) {
                        break;
                    } else {
                        stringBuffer.append('\n');
                        z = true;
                        break;
                    }
                case 1:
                case 2:
                case 3:
                case 4:
                case 5:
                case 9:
                case 12:
                case CmsExtendedSiteSelector.LONG_PAGE_LENGTH /* 20 */:
                case 21:
                case I_CmsEventListener.EVENT_RESOURCE_MOVED /* 22 */:
                case I_CmsEventListener.EVENT_RESOURCE_CREATED /* 23 */:
                case I_CmsEventListener.EVENT_RESOURCE_COPIED /* 24 */:
                case I_CmsEventListener.EVENT_PROPERTY_DEFINITION_MODIFIED /* 26 */:
                case I_CmsEventListener.EVENT_USER_MODIFIED /* 29 */:
                case I_CmsEventListener.EVENT_OU_MODIFIED /* 30 */:
                    stringBuffer.append(c);
                    z = false;
                    break;
                case CmsRedirectLoader.LOADER_ID /* 13 */:
                    stringBuffer.append('\n');
                    z = true;
                    break;
            }
        }
        return stringBuffer.toString();
    }
}
