package org.lucee.extension.search.lucene.net;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import lucee.commons.io.log.Log;
import lucee.commons.net.http.Header;
import lucee.loader.engine.CFMLEngine;
import lucee.loader.engine.CFMLEngineFactory;
import lucee.runtime.exp.PageException;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.lucee.extension.search.lucene.DocumentUtil;

/* loaded from: input_file:org/lucee/extension/search/lucene/net/WebCrawler.class */
public final class WebCrawler {
    private static final String[] EXTENSIONS;
    private Log log;

    public WebCrawler(Log log) {
        this.log = log;
    }

    public void parse(IndexWriter indexWriter, URL url, String[] strArr, boolean z, long j) throws IOException, PageException {
        translateExtension(strArr);
        if (strArr == null || strArr.length == 0) {
            strArr = EXTENSIONS;
        }
        _parse(this.log, indexWriter, null, url, new ArrayList(), strArr, z, 0, j);
    }

    private static URL translateURL(URL url) throws MalformedURLException {
        CFMLEngine cFMLEngineFactory = CFMLEngineFactory.getInstance();
        String path = url.getPath();
        if (path.lastIndexOf(46) == -1 && !path.endsWith("/")) {
            return cFMLEngineFactory.getHTTPUtil().removeUnecessaryPort(new URL(url.getProtocol(), url.getHost(), url.getPort(), path + "/" + cFMLEngineFactory.getStringUtil().emptyIfNull(url.getQuery())));
        }
        return cFMLEngineFactory.getHTTPUtil().removeUnecessaryPort(url);
    }

    private void translateExtension(String[] strArr) {
        for (int i = 0; i < strArr.length; i++) {
            if (strArr[i].startsWith("*.")) {
                strArr[i] = strArr[i].substring(2);
            } else if (strArr[i].startsWith(".")) {
                strArr[i] = strArr[i].substring(1);
            }
        }
    }

    private static Document toDocument(StringBuffer stringBuffer, IndexWriter indexWriter, String str, URL url, long j) throws IOException, PageException {
        return DocumentUtil.toDocument(stringBuffer, str, url, CFMLEngineFactory.getInstance().getHTTPUtil().get(url, (String) null, (String) null, (int) j, (String) null, "RailoBot", (String) null, -1, (String) null, (String) null, (Header[]) null));
    }

    protected static void _parse(Log log, IndexWriter indexWriter, String str, URL url, List<URL> list, String[] strArr, boolean z, int i, long j) throws IOException, PageException {
        StringBuffer _parseItem = _parseItem(log, indexWriter, str, url, list, strArr, z, i, j);
        if (_parseItem != null) {
            _parseChildren(log, _parseItem, indexWriter, str, url, list, strArr, z, i, j);
        }
    }

    public static StringBuffer _parseItem(Log log, IndexWriter indexWriter, String str, URL url, List<URL> list, String[] strArr, boolean z, int i, long j) throws IOException, PageException {
        try {
            url = translateURL(url);
            if (list.contains(url.toExternalForm())) {
                return null;
            }
            list.add(url);
            StringBuffer stringBuffer = new StringBuffer();
            Document document = toDocument(stringBuffer, indexWriter, str, url, j);
            if (document == null) {
                return null;
            }
            if (indexWriter != null) {
                indexWriter.addDocument(document);
            }
            info(log, url.toExternalForm());
            return stringBuffer;
        } catch (PageException e) {
            error(log, url.toExternalForm(), e);
            throw e;
        } catch (IOException e2) {
            error(log, url.toExternalForm(), e2);
            throw e2;
        }
    }

    protected static void _parseChildren(Log log, StringBuffer stringBuffer, IndexWriter indexWriter, String str, URL url, List<URL> list, String[] strArr, boolean z, int i, long j) throws IOException {
        boolean z2;
        ThreadDeath threadDeath;
        if (z) {
            List urls = CFMLEngineFactory.getInstance().getHTMLUtil().getURLS(stringBuffer.toString(), url);
            int size = urls.size();
            ArrayList<ChildrenIndexer> arrayList = size > 1 ? new ArrayList() : null;
            for (int i2 = 0; i2 < size; i2++) {
                URL translateURL = translateURL((URL) urls.get(i2));
                if (!list.contains(translateURL.toExternalForm())) {
                    String lowerCase = translateURL.getProtocol().toLowerCase();
                    String path = translateURL.getPath();
                    if ((lowerCase.equals("http") || lowerCase.equals("https")) && validExtension(strArr, path) && url.getHost().equalsIgnoreCase(translateURL.getHost())) {
                        try {
                            ChildrenIndexer childrenIndexer = new ChildrenIndexer(log, indexWriter, str, translateURL, list, strArr, z, i + 1, j);
                            arrayList.add(childrenIndexer);
                            childrenIndexer.start();
                        } finally {
                            if (z2) {
                            }
                        }
                    }
                }
            }
            if (arrayList != null && !arrayList.isEmpty()) {
                for (ChildrenIndexer childrenIndexer2 : arrayList) {
                    if (childrenIndexer2.isAlive()) {
                        try {
                            childrenIndexer2.join(j);
                        } catch (InterruptedException e) {
                        }
                    }
                    if (childrenIndexer2.isAlive()) {
                        childrenIndexer2.interrupt();
                        if (log != null) {
                            URL url2 = childrenIndexer2.url;
                            log.error("WebCrawler", "timeout [" + j + " ms] occur while invoking page [" + log + "]");
                        }
                    }
                }
                for (ChildrenIndexer childrenIndexer3 : arrayList) {
                    if (childrenIndexer3.content != null) {
                        _parseChildren(log, childrenIndexer3.content, indexWriter, str, childrenIndexer3.url, list, strArr, z, i, j);
                    }
                }
            }
            urls.clear();
        }
    }

    private static boolean validExtension(String[] strArr, String str) {
        CFMLEngine cFMLEngineFactory = CFMLEngineFactory.getInstance();
        String first = cFMLEngineFactory.getListUtil().first(cFMLEngineFactory.getResourceUtil().getExtension(str, ""), "/", true);
        if (cFMLEngineFactory.getStringUtil().isEmpty(first)) {
            return true;
        }
        for (String str2 : strArr) {
            if (first.equalsIgnoreCase(str2)) {
                return true;
            }
        }
        return false;
    }

    private static void info(Log log, String str) {
        if (log == null) {
            return;
        }
        log.log(1, "Webcrawler", "invoke " + str);
    }

    private static void error(Log log, String str, Exception exc) {
        if (log == null) {
            return;
        }
        log.error("Webcrawler", "invoke " + str + ":", exc);
    }

    static {
        ArrayList arrayList = new ArrayList();
        arrayList.add("cfm");
        arrayList.add("cfml");
        arrayList.add("htm");
        arrayList.add("html");
        arrayList.add("dbm");
        arrayList.add("dbml");
        EXTENSIONS = (String[]) arrayList.toArray(new String[arrayList.size()]);
    }
}
