package org.apdplat.extractor.html.impl;

import java.net.URL;
import org.apdplat.extractor.html.HtmlFetcher;
import org.jsoup.Jsoup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/extractor/html/impl/JSoupHtmlFetcher.class */
public class JSoupHtmlFetcher implements HtmlFetcher {
    private static final Logger LOGGER = LoggerFactory.getLogger(JSoupHtmlFetcher.class);
    private static final String ACCEPT = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
    private static final String ENCODING = "gzip, deflate";
    private static final String LANGUAGE = "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3";
    private static final String CONNECTION = "keep-alive";
    private static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:36.0) Gecko/20100101 Firefox/36.0";

    @Override // org.apdplat.extractor.html.HtmlFetcher
    public String fetch(String str) {
        try {
            LOGGER.debug("url:" + str);
            String host = new URL(str).getHost();
            String html = Jsoup.connect(str).header("Accept", ACCEPT).header("Accept-Encoding", ENCODING).header("Accept-Language", LANGUAGE).header("Connection", CONNECTION).header("Referer", "http://" + host).header("Host", host).header("User-Agent", USER_AGENT).ignoreContentType(true).get().html();
            LOGGER.debug("html:" + html);
            return html;
        } catch (Exception e) {
            LOGGER.error("获取URL：" + str + "页面出错", e);
            return "";
        }
    }
}
