package org.codelibs.robot.extractor.impl;

import java.io.BufferedInputStream;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.xpath.CachedXPathAPI;
import org.codelibs.robot.RobotSystemException;
import org.codelibs.robot.entity.ExtractData;
import org.codelibs.robot.extractor.ExtractException;
import org.codelibs.robot.extractor.Extractor;
import org.cyberneko.html.parsers.DOMParser;
import org.seasar.framework.util.StringUtil;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

/* loaded from: input_file:org/codelibs/robot/extractor/impl/HtmlXpathExtractor.class */
public class HtmlXpathExtractor extends AbstractXmlExtractor implements Extractor {
    protected Pattern metaCharsetPattern = Pattern.compile("<meta.*content\\s*=\\s*['\"].*;\\s*charset=([\\w\\d\\-_]*)['\"]\\s*/?>", 10);
    protected Map<String, String> featureMap = new HashMap();
    protected Map<String, String> propertyMap = new HashMap();
    protected String targetNodePath = "//HTML/BODY | //@alt | //@title";
    private final ThreadLocal<CachedXPathAPI> xpathAPI = new ThreadLocal<>();

    @Override // org.codelibs.robot.extractor.impl.AbstractXmlExtractor, org.codelibs.robot.extractor.Extractor
    public ExtractData getText(InputStream inputStream, Map<String, String> map) {
        if (inputStream == null) {
            throw new RobotSystemException("The inputstream is null.");
        }
        try {
            BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
            String encoding = getEncoding(bufferedInputStream);
            DOMParser domParser = getDomParser();
            InputSource inputSource = new InputSource(bufferedInputStream);
            inputSource.setEncoding(encoding);
            domParser.parse(inputSource);
            Document document = domParser.getDocument();
            StringBuilder sb = new StringBuilder(255);
            NodeList selectNodeList = getXPathAPI().selectNodeList(document, this.targetNodePath);
            for (int i = 0; i < selectNodeList.getLength(); i++) {
                sb.append(selectNodeList.item(i).getTextContent()).append(' ');
            }
            return new ExtractData(sb.toString().replaceAll("\\s+", " ").trim());
        } catch (Exception e) {
            throw new ExtractException(e);
        }
    }

    protected CachedXPathAPI getXPathAPI() {
        CachedXPathAPI cachedXPathAPI = this.xpathAPI.get();
        if (cachedXPathAPI == null) {
            cachedXPathAPI = new CachedXPathAPI();
            this.xpathAPI.set(cachedXPathAPI);
        }
        return cachedXPathAPI;
    }

    protected DOMParser getDomParser() {
        DOMParser dOMParser = new DOMParser();
        try {
            for (Map.Entry<String, String> entry : this.featureMap.entrySet()) {
                dOMParser.setFeature(entry.getKey(), "true".equalsIgnoreCase(entry.getValue()));
            }
            for (Map.Entry<String, String> entry2 : this.propertyMap.entrySet()) {
                dOMParser.setProperty(entry2.getKey(), entry2.getValue());
            }
            return dOMParser;
        } catch (Exception e) {
            throw new RobotSystemException("Invalid parser configuration.", e);
        }
    }

    @Override // org.codelibs.robot.extractor.impl.AbstractXmlExtractor
    protected Pattern getEncodingPattern() {
        return this.metaCharsetPattern;
    }

    @Override // org.codelibs.robot.extractor.impl.AbstractXmlExtractor
    protected Pattern getTagPattern() {
        return null;
    }

    public void addFeature(String str, String str2) {
        if (StringUtil.isBlank(str) || StringUtil.isBlank(str2)) {
            throw new RobotSystemException("key or value is null.");
        }
        this.featureMap.put(str, str2);
    }

    public void addProperty(String str, String str2) {
        if (StringUtil.isBlank(str) || StringUtil.isBlank(str2)) {
            throw new RobotSystemException("key or value is null.");
        }
        this.propertyMap.put(str, str2);
    }

    public Map<String, String> getFeatureMap() {
        return this.featureMap;
    }

    public void setFeatureMap(Map<String, String> map) {
        this.featureMap = map;
    }

    public Map<String, String> getPropertyMap() {
        return this.propertyMap;
    }

    public void setPropertyMap(Map<String, String> map) {
        this.propertyMap = map;
    }

    public Pattern getMetaCharsetPattern() {
        return this.metaCharsetPattern;
    }

    public void setMetaCharsetPattern(Pattern pattern) {
        this.metaCharsetPattern = pattern;
    }

    public String getTargetNodePath() {
        return this.targetNodePath;
    }

    public void setTargetNodePath(String str) {
        this.targetNodePath = str;
    }
}
