package org.codelibs.robot.transformer.impl;

import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.HashMap;
import org.apache.commons.io.IOUtils;
import org.codelibs.robot.Constants;
import org.codelibs.robot.RobotCrawlAccessException;
import org.codelibs.robot.RobotSystemException;
import org.codelibs.robot.entity.AccessResultData;
import org.codelibs.robot.entity.ResponseData;
import org.codelibs.robot.entity.ResultData;
import org.codelibs.robot.extractor.Extractor;
import org.codelibs.robot.extractor.ExtractorFactory;
import org.seasar.framework.container.SingletonS2Container;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/codelibs/robot/transformer/impl/TextTransformer.class */
public class TextTransformer extends AbstractTransformer {
    private static final Logger logger = LoggerFactory.getLogger(TextTransformer.class);
    protected String charsetName = Constants.UTF_8;

    @Override // org.codelibs.robot.transformer.impl.AbstractTransformer, org.codelibs.robot.transformer.Transformer
    public ResultData transform(ResponseData responseData) {
        if (responseData == null || responseData.getResponseBody() == null) {
            throw new RobotCrawlAccessException("No response body.");
        }
        ExtractorFactory extractorFactory = (ExtractorFactory) SingletonS2Container.getComponent("extractorFactory");
        if (extractorFactory == null) {
            throw new RobotSystemException("Could not find extractorFactory.");
        }
        Extractor extractor = extractorFactory.getExtractor(responseData.getMimeType());
        InputStream responseBody = responseData.getResponseBody();
        HashMap hashMap = new HashMap();
        hashMap.put("resourceName", getResourceName(responseData));
        hashMap.put("Content-Type", responseData.getMimeType());
        try {
            try {
                String content = extractor.getText(responseBody, hashMap).getContent();
                IOUtils.closeQuietly(responseBody);
                ResultData resultData = new ResultData();
                resultData.setTransformerName(getName());
                try {
                    resultData.setData(content.getBytes(this.charsetName));
                } catch (UnsupportedEncodingException e) {
                    if (logger.isInfoEnabled()) {
                        logger.info("Invalid charsetName: " + this.charsetName + ". Changed to " + Constants.UTF_8, e);
                    }
                    this.charsetName = Constants.UTF_8_CHARSET.name();
                    resultData.setData(content.getBytes(Constants.UTF_8_CHARSET));
                }
                resultData.setEncoding(this.charsetName);
                return resultData;
            } catch (Exception e2) {
                throw new RobotCrawlAccessException("Could not extract data.", e2);
            }
        } catch (Throwable th) {
            IOUtils.closeQuietly(responseBody);
            throw th;
        }
    }

    @Override // org.codelibs.robot.transformer.Transformer
    public Object getData(AccessResultData accessResultData) {
        if (!getName().equals(accessResultData.getTransformerName())) {
            throw new RobotSystemException("Transformer is invalid. Use " + accessResultData.getTransformerName() + ". This transformer is " + getName() + ".");
        }
        byte[] data = accessResultData.getData();
        if (data == null) {
            return null;
        }
        try {
            return new String(data, this.charsetName);
        } catch (UnsupportedEncodingException e) {
            throw new RobotCrawlAccessException("Unsupported encoding: " + this.charsetName, e);
        }
    }

    private String getResourceName(ResponseData responseData) {
        String url = responseData.getUrl();
        String charSet = responseData.getCharSet();
        if (url == null || charSet == null) {
            return null;
        }
        String replaceAll = url.replaceAll("/+$", Constants.EMPTY_STRING);
        int lastIndexOf = replaceAll.lastIndexOf(47);
        if (lastIndexOf >= 0) {
            replaceAll = replaceAll.substring(lastIndexOf + 1);
        }
        try {
            return URLDecoder.decode(replaceAll, charSet);
        } catch (UnsupportedEncodingException e) {
            return replaceAll;
        }
    }

    public String getCharsetName() {
        return this.charsetName;
    }

    public void setCharsetName(String str) {
        this.charsetName = str;
    }
}
