package org.apache.tika.parser.html;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.spi.LocationInfo;
import org.apache.solr.response.RawResponseWriter;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.sax.TextContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.springframework.web.servlet.tags.form.InputTag;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:WEB-INF/lib/tika-parsers-1.0.jar:org/apache/tika/parser/html/HtmlHandler.class */
public class HtmlHandler extends TextContentHandler {
    private final HtmlMapper mapper;
    private final XHTMLContentHandler xhtml;
    private final Metadata metadata;
    private int bodyLevel;
    private int discardLevel;
    private int titleLevel;
    private final StringBuilder title;
    private static final Set<String> URI_ATTRIBUTES = new HashSet(Arrays.asList("src", "href", "longdesc", "cite"));
    private static final Pattern ICBM = Pattern.compile("\\s*(-?\\d+\\.\\d+)[,\\s]+(-?\\d+\\.\\d+)\\s*");

    private HtmlHandler(HtmlMapper htmlMapper, XHTMLContentHandler xHTMLContentHandler, Metadata metadata) {
        super(xHTMLContentHandler);
        String str;
        this.bodyLevel = 0;
        this.discardLevel = 0;
        this.titleLevel = 0;
        this.title = new StringBuilder();
        this.mapper = htmlMapper;
        this.xhtml = xHTMLContentHandler;
        this.metadata = metadata;
        if (metadata.get("Content-Location") != null || (str = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY)) == null) {
            return;
        }
        String trim = str.trim();
        try {
            new URL(trim);
            metadata.set("Content-Location", trim);
        } catch (MalformedURLException e) {
        }
    }

    public HtmlHandler(HtmlMapper htmlMapper, ContentHandler contentHandler, Metadata metadata) {
        this(htmlMapper, new XHTMLContentHandler(contentHandler, metadata), metadata);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        String mapSafeElement;
        if ("TITLE".equals(str3) || this.titleLevel > 0) {
            this.titleLevel++;
        }
        if ("BODY".equals(str3) || "FRAMESET".equals(str3) || this.bodyLevel > 0) {
            this.bodyLevel++;
        }
        if (this.mapper.isDiscardElement(str3) || this.discardLevel > 0) {
            this.discardLevel++;
        }
        if (this.bodyLevel == 0 && this.discardLevel == 0) {
            if (!"META".equals(str3) || attributes.getValue(RawResponseWriter.CONTENT) == null) {
                if ("BASE".equals(str3) && attributes.getValue("href") != null) {
                    startElementWithSafeAttributes("base", attributes);
                    this.xhtml.endElement("base");
                    this.metadata.set("Content-Location", resolve(attributes.getValue("href")));
                } else if ("LINK".equals(str3)) {
                    startElementWithSafeAttributes("link", attributes);
                    this.xhtml.endElement("link");
                }
            } else if (attributes.getValue("http-equiv") != null) {
                addHtmlMetadata(attributes.getValue("http-equiv"), attributes.getValue(RawResponseWriter.CONTENT));
            } else if (attributes.getValue("name") != null) {
                addHtmlMetadata(attributes.getValue("name"), attributes.getValue(RawResponseWriter.CONTENT));
            }
        }
        if (this.bodyLevel > 0 && this.discardLevel == 0 && (mapSafeElement = this.mapper.mapSafeElement(str3)) != null) {
            startElementWithSafeAttributes(mapSafeElement, attributes);
        }
        this.title.setLength(0);
    }

    private void addHtmlMetadata(String str, String str2) {
        if (str == null || str2 == null) {
            return;
        }
        if (str.equalsIgnoreCase("ICBM")) {
            Matcher matcher = ICBM.matcher(str2);
            if (!matcher.matches()) {
                this.metadata.set("ICBM", str2);
                return;
            }
            this.metadata.set("ICBM", matcher.group(1) + ", " + matcher.group(2));
            this.metadata.set(Metadata.LATITUDE, matcher.group(1));
            this.metadata.set(Metadata.LONGITUDE, matcher.group(2));
            return;
        }
        if (!str.equalsIgnoreCase("Content-Type")) {
            this.metadata.set(str, str2);
            return;
        }
        MediaType parse = MediaType.parse(str2);
        if (parse != null) {
            this.metadata.set("Content-Type", parse.toString());
        } else {
            this.metadata.set("Content-Type", str2);
        }
    }

    private void startElementWithSafeAttributes(String str, Attributes attributes) throws SAXException {
        if (attributes.getLength() == 0) {
            this.xhtml.startElement(str);
            return;
        }
        boolean equals = str.equals("object");
        String str2 = null;
        if (equals) {
            String value = attributes.getValue("", "codebase");
            str2 = value != null ? resolve(value) : this.metadata.get("Content-Location");
        }
        AttributesImpl attributesImpl = new AttributesImpl(attributes);
        int i = 0;
        while (i < attributesImpl.getLength()) {
            String mapSafeAttribute = this.mapper.mapSafeAttribute(str, attributesImpl.getLocalName(i));
            if (mapSafeAttribute == null) {
                attributesImpl.removeAttribute(i);
                i--;
            } else {
                attributesImpl.setLocalName(i, mapSafeAttribute);
                if (URI_ATTRIBUTES.contains(mapSafeAttribute)) {
                    attributesImpl.setValue(i, resolve(attributesImpl.getValue(i)));
                } else if (equals && "codebase".equals(mapSafeAttribute)) {
                    attributesImpl.setValue(i, str2);
                } else if (equals && ("data".equals(mapSafeAttribute) || "classid".equals(mapSafeAttribute))) {
                    attributesImpl.setValue(i, resolve(str2, attributesImpl.getValue(i)));
                }
            }
            i++;
        }
        if ("img".equals(str) && attributesImpl.getValue("", InputTag.ALT_ATTRIBUTE) == null) {
            attributesImpl.addAttribute("", InputTag.ALT_ATTRIBUTE, InputTag.ALT_ATTRIBUTE, "CDATA", "");
        }
        this.xhtml.startElement(str, attributesImpl);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if (this.bodyLevel > 0 && this.discardLevel == 0) {
            String mapSafeElement = this.mapper.mapSafeElement(str3);
            if (mapSafeElement != null) {
                this.xhtml.endElement(mapSafeElement);
            } else if (XHTMLContentHandler.ENDLINE.contains(str3.toLowerCase(Locale.ENGLISH))) {
                this.xhtml.newline();
            }
        }
        if (this.titleLevel > 0) {
            this.titleLevel--;
            if (this.titleLevel == 0) {
                this.metadata.set("title", this.title.toString().trim());
            }
        }
        if (this.bodyLevel > 0) {
            this.bodyLevel--;
        }
        if (this.discardLevel > 0) {
            this.discardLevel--;
        }
    }

    @Override // org.apache.tika.sax.TextContentHandler, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        if (this.titleLevel > 0 && this.bodyLevel == 0) {
            this.title.append(cArr, i, i2);
        }
        if (this.bodyLevel <= 0 || this.discardLevel != 0) {
            return;
        }
        super.characters(cArr, i, i2);
    }

    @Override // org.apache.tika.sax.TextContentHandler, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
        if (this.bodyLevel <= 0 || this.discardLevel != 0) {
            return;
        }
        super.ignorableWhitespace(cArr, i, i2);
    }

    private String resolve(String str) {
        return resolve(this.metadata.get("Content-Location"), str);
    }

    private String resolve(String str, String str2) {
        String trim = str2.trim();
        String lowerCase = trim.toLowerCase(Locale.ENGLISH);
        if (str == null || lowerCase.startsWith("urn:") || lowerCase.startsWith("mailto:") || lowerCase.startsWith("tel:") || lowerCase.startsWith("data:") || lowerCase.startsWith("javascript:") || lowerCase.startsWith("about:")) {
            return trim;
        }
        try {
            URL url = new URL(str.trim());
            String path = url.getPath();
            return (!trim.startsWith(LocationInfo.NA) || path.length() <= 0 || path.endsWith("/")) ? new URL(url, trim).toExternalForm() : new URL(url.getProtocol(), url.getHost(), url.getPort(), url.getPath() + trim).toExternalForm();
        } catch (MalformedURLException e) {
            return trim;
        }
    }
}
