package org.codelibs.robot;

import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Date;
import java.util.Set;
import javax.annotation.Resource;
import org.apache.commons.io.IOUtils;
import org.codelibs.robot.builder.RequestDataBuilder;
import org.codelibs.robot.client.S2RobotClient;
import org.codelibs.robot.client.S2RobotClientFactory;
import org.codelibs.robot.client.fs.ChildUrlsException;
import org.codelibs.robot.entity.RequestData;
import org.codelibs.robot.entity.ResponseData;
import org.codelibs.robot.entity.UrlQueue;
import org.codelibs.robot.helper.LogHelper;
import org.codelibs.robot.log.LogType;
import org.codelibs.robot.processor.ResponseProcessor;
import org.codelibs.robot.rule.Rule;
import org.codelibs.robot.service.DataService;
import org.codelibs.robot.service.UrlQueueService;
import org.codelibs.robot.util.CrawlingParameterUtil;
import org.seasar.framework.container.S2Container;
import org.seasar.framework.container.annotation.tiger.Binding;
import org.seasar.framework.container.annotation.tiger.BindingType;
import org.seasar.framework.container.factory.SingletonS2ContainerFactory;
import org.seasar.framework.util.StringUtil;

/* loaded from: input_file:org/codelibs/robot/S2RobotThread.class */
public class S2RobotThread implements Runnable {

    @Resource
    protected UrlQueueService urlQueueService;

    @Resource
    protected DataService dataService;

    @Resource
    protected S2Container container;

    @Binding(bindingType = BindingType.MAY)
    @Resource
    protected LogHelper logHelper;
    protected S2RobotClientFactory clientFactory;
    protected S2RobotContext robotContext;
    protected boolean noWaitOnFolder = false;

    protected void startCrawling() {
        synchronized (this.robotContext.activeThreadCountLock) {
            S2RobotContext s2RobotContext = this.robotContext;
            Integer num = s2RobotContext.activeThreadCount;
            s2RobotContext.activeThreadCount = Integer.valueOf(s2RobotContext.activeThreadCount.intValue() + 1);
        }
    }

    protected void finishCrawling() {
        synchronized (this.robotContext.activeThreadCountLock) {
            S2RobotContext s2RobotContext = this.robotContext;
            Integer num = s2RobotContext.activeThreadCount;
            s2RobotContext.activeThreadCount = Integer.valueOf(s2RobotContext.activeThreadCount.intValue() - 1);
        }
    }

    protected boolean isContinue(int i) {
        if (!SingletonS2ContainerFactory.hasContainer()) {
            return false;
        }
        boolean z = false;
        if (i < this.robotContext.maxThreadCheckCount) {
            if (this.robotContext.maxAccessCount > 0 && this.robotContext.accessCount.longValue() >= this.robotContext.maxAccessCount) {
                return false;
            }
            z = true;
        }
        if (z || this.robotContext.activeThreadCount.intValue() <= 0) {
            return z;
        }
        return true;
    }

    protected void log(LogHelper logHelper, LogType logType, Object... objArr) {
        if (logHelper != null) {
            logHelper.log(logType, objArr);
        }
    }

    @Override // java.lang.Runnable
    public void run() {
        Set<RequestData> childUrlList;
        S2RobotClient client;
        log(this.logHelper, LogType.START_THREAD, this.robotContext);
        int i = 0;
        CrawlingParameterUtil.setRobotContext(this.robotContext);
        CrawlingParameterUtil.setUrlQueueService(this.urlQueueService);
        CrawlingParameterUtil.setDataService(this.dataService);
        while (this.robotContext.running && isContinue(i)) {
            try {
                try {
                    UrlQueue poll = this.urlQueueService.poll(this.robotContext.sessionId);
                    if (isValid(poll)) {
                        ResponseData responseData = null;
                        log(this.logHelper, LogType.START_CRAWLING, this.robotContext, poll);
                        try {
                            try {
                                client = getClient(poll.getUrl());
                            } finally {
                            }
                        } catch (RobotCrawlAccessException e) {
                            log(this.logHelper, LogType.CRAWLING_ACCESS_EXCEPTION, this.robotContext, poll, e);
                            addSitemapsFromRobotsTxt(poll);
                            if (responseData != null) {
                                IOUtils.closeQuietly(responseData.getResponseBody());
                            }
                            if (this.robotContext.intervalController != null) {
                                this.robotContext.intervalController.delay(2);
                            }
                            i = 0;
                            CrawlingParameterUtil.setUrlQueue(null);
                            finishCrawling();
                        } catch (ChildUrlsException e2) {
                            try {
                                childUrlList = e2.getChildUrlList();
                                log(this.logHelper, LogType.PROCESS_CHILD_URLS_BY_EXCEPTION, this.robotContext, poll, childUrlList);
                            } catch (Exception e3) {
                                log(this.logHelper, LogType.CRAWLING_EXCETPION, this.robotContext, poll, e3);
                            }
                            synchronized (this.robotContext.accessCountLock) {
                                storeChildUrls(childUrlList, poll.getUrl(), poll.getDepth() == null ? 1 : poll.getDepth().intValue() + 1);
                                if (this.noWaitOnFolder) {
                                    addSitemapsFromRobotsTxt(poll);
                                    if (responseData != null) {
                                        IOUtils.closeQuietly(responseData.getResponseBody());
                                    }
                                    if (this.robotContext.intervalController != null) {
                                        this.robotContext.intervalController.delay(2);
                                    }
                                    i = 0;
                                    CrawlingParameterUtil.setUrlQueue(null);
                                    finishCrawling();
                                } else {
                                    addSitemapsFromRobotsTxt(poll);
                                    if (responseData != null) {
                                        IOUtils.closeQuietly(responseData.getResponseBody());
                                    }
                                    if (this.robotContext.intervalController != null) {
                                        this.robotContext.intervalController.delay(2);
                                    }
                                    i = 0;
                                    CrawlingParameterUtil.setUrlQueue(null);
                                    finishCrawling();
                                }
                            }
                        } catch (Throwable th) {
                            log(this.logHelper, LogType.CRAWLING_EXCETPION, this.robotContext, poll, th);
                            addSitemapsFromRobotsTxt(poll);
                            if (responseData != null) {
                                IOUtils.closeQuietly(responseData.getResponseBody());
                            }
                            if (this.robotContext.intervalController != null) {
                                this.robotContext.intervalController.delay(2);
                            }
                            i = 0;
                            CrawlingParameterUtil.setUrlQueue(null);
                            finishCrawling();
                        }
                        if (client == null) {
                            log(this.logHelper, LogType.UNSUPPORTED_URL_AT_CRAWLING_STARTED, this.robotContext, poll);
                            addSitemapsFromRobotsTxt(poll);
                            if (0 != 0) {
                                IOUtils.closeQuietly(responseData.getResponseBody());
                            }
                            if (this.robotContext.intervalController != null) {
                                this.robotContext.intervalController.delay(2);
                            }
                            i = 0;
                            CrawlingParameterUtil.setUrlQueue(null);
                            finishCrawling();
                        } else {
                            startCrawling();
                            CrawlingParameterUtil.setUrlQueue(poll);
                            if (this.robotContext.intervalController != null) {
                                this.robotContext.intervalController.delay(1);
                            }
                            if (isContentUpdated(client, poll)) {
                                log(this.logHelper, LogType.GET_CONTENT, this.robotContext, poll);
                                long currentTimeMillis = System.currentTimeMillis();
                                responseData = client.execute(RequestDataBuilder.newRequestData().method(poll.getMethod()).url(poll.getUrl()).build());
                                responseData.setExecutionTime(System.currentTimeMillis() - currentTimeMillis);
                                responseData.setParentUrl(poll.getParentUrl());
                                responseData.setSessionId(this.robotContext.sessionId);
                                if (responseData.getRedirectLocation() == null) {
                                    log(this.logHelper, LogType.PROCESS_RESPONSE, this.robotContext, poll, responseData);
                                    processResponse(poll, responseData);
                                } else {
                                    log(this.logHelper, LogType.REDIRECT_LOCATION, this.robotContext, poll, responseData);
                                    synchronized (this.robotContext.accessCountLock) {
                                        storeChildUrl(responseData.getRedirectLocation(), poll.getUrl(), null, poll.getDepth() == null ? 1 : poll.getDepth().intValue() + 1);
                                    }
                                }
                            }
                            log(this.logHelper, LogType.FINISHED_CRAWLING, this.robotContext, poll);
                            addSitemapsFromRobotsTxt(poll);
                            if (responseData != null) {
                                IOUtils.closeQuietly(responseData.getResponseBody());
                            }
                            if (this.robotContext.intervalController != null) {
                                this.robotContext.intervalController.delay(2);
                            }
                            i = 0;
                            CrawlingParameterUtil.setUrlQueue(null);
                            finishCrawling();
                        }
                    } else {
                        log(this.logHelper, LogType.NO_URL_IN_QUEUE, this.robotContext, poll, Integer.valueOf(i));
                        if (this.robotContext.intervalController != null) {
                            this.robotContext.intervalController.delay(4);
                        }
                        i++;
                    }
                    if (this.robotContext.intervalController != null) {
                        this.robotContext.intervalController.delay(8);
                    }
                } catch (Throwable th2) {
                    log(this.logHelper, LogType.SYSTEM_ERROR, th2);
                    CrawlingParameterUtil.setRobotContext(null);
                    CrawlingParameterUtil.setUrlQueueService(null);
                    CrawlingParameterUtil.setDataService(null);
                }
            } catch (Throwable th3) {
                CrawlingParameterUtil.setRobotContext(null);
                CrawlingParameterUtil.setUrlQueueService(null);
                CrawlingParameterUtil.setDataService(null);
                throw th3;
            }
        }
        CrawlingParameterUtil.setRobotContext(null);
        CrawlingParameterUtil.setUrlQueueService(null);
        CrawlingParameterUtil.setDataService(null);
        log(this.logHelper, LogType.FINISHED_THREAD, this.robotContext);
    }

    protected void addSitemapsFromRobotsTxt(UrlQueue urlQueue) {
        String[] removeSitemaps = this.robotContext.removeSitemaps();
        if (removeSitemaps != null) {
            for (String str : removeSitemaps) {
                try {
                    storeChildUrl(str, urlQueue.getUrl(), null, urlQueue.getDepth() == null ? 1 : urlQueue.getDepth().intValue() + 1);
                } catch (Exception e) {
                    log(this.logHelper, LogType.PROCESS_CHILD_URL_BY_EXCEPTION, this.robotContext, urlQueue, str, e);
                }
            }
        }
    }

    protected S2RobotClient getClient(String str) {
        return this.clientFactory.getClient(str);
    }

    protected boolean isContentUpdated(S2RobotClient s2RobotClient, UrlQueue urlQueue) {
        if (urlQueue.getLastModified() == null) {
            return true;
        }
        log(this.logHelper, LogType.CHECK_LAST_MODIFIED, this.robotContext, urlQueue);
        long currentTimeMillis = System.currentTimeMillis();
        ResponseData responseData = null;
        try {
            responseData = s2RobotClient.execute(RequestDataBuilder.newRequestData().head().url(urlQueue.getUrl()).build());
            if (responseData == null || responseData.getLastModified() == null || responseData.getLastModified().getTime() > urlQueue.getLastModified().getTime() || responseData.getHttpStatusCode() != 200) {
                if (responseData == null) {
                    return true;
                }
                IOUtils.closeQuietly(responseData.getResponseBody());
                return true;
            }
            log(this.logHelper, LogType.NOT_MODIFIED, this.robotContext, urlQueue);
            responseData.setExecutionTime(System.currentTimeMillis() - currentTimeMillis);
            responseData.setParentUrl(urlQueue.getParentUrl());
            responseData.setSessionId(this.robotContext.sessionId);
            responseData.setStatus(304);
            responseData.setHttpStatusCode(304);
            processResponse(urlQueue, responseData);
            if (responseData != null) {
                IOUtils.closeQuietly(responseData.getResponseBody());
            }
            return false;
        } catch (Throwable th) {
            if (responseData != null) {
                IOUtils.closeQuietly(responseData.getResponseBody());
            }
            throw th;
        }
    }

    protected void processResponse(UrlQueue urlQueue, ResponseData responseData) {
        Rule rule = this.robotContext.ruleManager.getRule(responseData);
        if (rule == null) {
            log(this.logHelper, LogType.NO_RULE, this.robotContext, urlQueue, responseData);
            return;
        }
        responseData.setRuleId(rule.getRuleId());
        ResponseProcessor responseProcessor = rule.getResponseProcessor();
        if (responseProcessor == null) {
            log(this.logHelper, LogType.NO_RESPONSE_PROCESSOR, this.robotContext, urlQueue, responseData, rule);
        } else {
            responseProcessor.process(responseData);
        }
    }

    protected void storeChildUrls(Set<RequestData> set, String str, int i) {
        if (this.robotContext.getMaxDepth() < 0 || i <= this.robotContext.getMaxDepth()) {
            ArrayList arrayList = new ArrayList();
            for (RequestData requestData : set) {
                if (this.robotContext.urlFilter.match(requestData.getUrl())) {
                    UrlQueue urlQueue = (UrlQueue) this.container.getComponent(UrlQueue.class);
                    urlQueue.setCreateTime(new Timestamp(System.currentTimeMillis()));
                    urlQueue.setDepth(Integer.valueOf(i));
                    urlQueue.setMethod(Constants.GET_METHOD);
                    urlQueue.setParentUrl(str);
                    urlQueue.setSessionId(this.robotContext.sessionId);
                    urlQueue.setUrl(requestData.getUrl());
                    urlQueue.setMetaData(requestData.getMetaData());
                    arrayList.add(urlQueue);
                }
            }
            this.urlQueueService.offerAll(this.robotContext.sessionId, arrayList);
        }
    }

    protected void storeChildUrl(String str, String str2, String str3, int i) {
        if ((this.robotContext.getMaxDepth() < 0 || i <= this.robotContext.getMaxDepth()) && this.robotContext.urlFilter.match(str)) {
            ArrayList arrayList = new ArrayList(1);
            UrlQueue urlQueue = (UrlQueue) this.container.getComponent(UrlQueue.class);
            urlQueue.setCreateTime(new Timestamp(new Date().getTime()));
            urlQueue.setDepth(Integer.valueOf(i));
            urlQueue.setMethod(Constants.GET_METHOD);
            urlQueue.setParentUrl(str2);
            urlQueue.setSessionId(this.robotContext.sessionId);
            urlQueue.setUrl(str);
            urlQueue.setMetaData(str3);
            arrayList.add(urlQueue);
            this.urlQueueService.offerAll(this.robotContext.sessionId, arrayList);
        }
    }

    protected boolean isValid(UrlQueue urlQueue) {
        if (urlQueue == null || StringUtil.isBlank(urlQueue.getUrl())) {
            return false;
        }
        return (this.robotContext.getMaxDepth() < 0 || urlQueue.getDepth().intValue() <= this.robotContext.getMaxDepth()) && this.robotContext.urlFilter.match(urlQueue.getUrl());
    }

    public boolean isNoWaitOnFolder() {
        return this.noWaitOnFolder;
    }

    public void setNoWaitOnFolder(boolean z) {
        this.noWaitOnFolder = z;
    }
}
