package org.opensextant.xtext.converters;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.io.TikaInputStream;
import org.opensextant.xtext.Content;
import org.opensextant.xtext.ConvertedDocument;

/* loaded from: input_file:org/opensextant/xtext/converters/WebArchiveConverter.class */
public class WebArchiveConverter extends MessageConverter {
    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.opensextant.xtext.converters.MessageConverter, org.opensextant.xtext.converters.ConverterAdapter
    public ConvertedDocument conversionImplementation(InputStream inputStream, File file) throws IOException {
        TikaHTMLConverter tikaHTMLConverter = new TikaHTMLConverter(false);
        DefaultConverter defaultConverter = new DefaultConverter();
        ConvertedDocument conversionImplementation = super.conversionImplementation(inputStream, file);
        conversionImplementation.is_webArchive = true;
        if (!conversionImplementation.hasRawChildren()) {
            return conversionImplementation;
        }
        StringBuilder sb = new StringBuilder();
        for (Content content : conversionImplementation.getRawChildren()) {
            this.logger.info("{} {} {}", new Object[]{conversionImplementation.id, content.id, content.mimeType});
            if (content.mimeType != null) {
                if ("application/octet-stream".equalsIgnoreCase(content.mimeType)) {
                    ConvertedDocument convert = defaultConverter.convert((InputStream) TikaInputStream.get(content.content));
                    if (convert != null && convert.hasText() && !isWebScript(convert.getText())) {
                        sb.append(convert.getText());
                        sb.append("\n==================\n");
                    }
                } else if (content.mimeType.startsWith("text/html")) {
                    ConvertedDocument convert2 = tikaHTMLConverter.convert((InputStream) TikaInputStream.get(content.content));
                    if (convert2 != null && convert2.hasText() && !isWebScript(convert2.getText())) {
                        sb.append(convert2.getText());
                        sb.append("\n==================\n");
                    }
                } else if (content.mimeType.startsWith("image")) {
                    sb.append(String.format("\n[Image: %s type='%s']  ", content.id, content.mimeType));
                }
            }
        }
        if (conversionImplementation.hasText()) {
            conversionImplementation.setText(conversionImplementation.getText() + "\n\n==================\n\n" + sb.toString());
        } else {
            conversionImplementation.setText(sb.toString());
        }
        return conversionImplementation;
    }

    public static boolean isWebScript(String str) {
        if (StringUtils.isBlank(str)) {
            return true;
        }
        String trim = str.substring(0, Math.min(4000, str.length()) - 1).toLowerCase().trim();
        if (trim.contains("javascript") || trim.contains("document.write(") || trim.contains("xmlhttp")) {
            return true;
        }
        return trim.contains("function") && trim.contains("{") && trim.contains("var ") && trim.contains("=");
    }
}
