package org.opensextant.xtext.collectors.sharepoint;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.security.NoSuchAlgorithmException;
import java.util.Collection;
import org.apache.http.HttpResponse;
import org.opensextant.ConfigException;
import org.opensextant.util.TextUtils;
import org.opensextant.xtext.ConvertedDocument;
import org.opensextant.xtext.ExclusionFilter;
import org.opensextant.xtext.collectors.CollectionListener;
import org.opensextant.xtext.collectors.Collector;
import org.opensextant.xtext.collectors.web.CrawlFilter;
import org.opensextant.xtext.collectors.web.HyperLink;
import org.opensextant.xtext.collectors.web.WebClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/opensextant/xtext/collectors/sharepoint/DefaultSharepointCrawl.class */
public class DefaultSharepointCrawl extends SharepointClient implements ExclusionFilter, Collector, CrawlFilter {
    protected CollectionListener listener;
    private final Logger log;
    private boolean allowCurrentSiteOnly;
    private boolean allowCurrentDirOnly;

    public DefaultSharepointCrawl(String str, String str2, String str3, String str4, String str5) throws MalformedURLException, ConfigException {
        super(str, str2, str3, str4, str5);
        this.listener = null;
        this.log = LoggerFactory.getLogger(getClass());
        this.allowCurrentSiteOnly = true;
        this.allowCurrentDirOnly = false;
    }

    public void setListener(CollectionListener collectionListener) {
        this.listener = collectionListener;
    }

    @Override // org.opensextant.xtext.ExclusionFilter
    public boolean filterOutFile(String str) {
        String lowerCase = str.toLowerCase();
        return lowerCase.endsWith(".flv") || lowerCase.endsWith(".mp4") || lowerCase.startsWith("mailto:");
    }

    @Override // org.opensextant.xtext.collectors.Collector
    public void collect() throws IOException {
        collectItems(getSite());
    }

    public boolean filterOut(HyperLink hyperLink) {
        return filterOutFile(hyperLink.getAbsoluteURL());
    }

    public void collectItems(URL url) throws IOException {
        if (this.depth >= 5) {
            this.log.info("Maximum Depth reached with link: {}", url);
            return;
        }
        Collection<SPLink> parseContentPage = parseContentPage(WebClient.readTextStream(getPage(url).getEntity().getContent()), url);
        this.depth++;
        for (SPLink sPLink : parseContentPage) {
            if (filterOut(sPLink)) {
                this.log.debug("Filtering out {}", sPLink);
            } else if (isAllowCurrentSiteOnly() && !sPLink.isCurrentSite() && !sPLink.isCurrentHost()) {
                this.log.info("Not on current site: {}", sPLink);
            } else if (sPLink.isFile()) {
                pause();
                try {
                    String text_id = TextUtils.text_id(sPLink.getAbsoluteURL());
                    try {
                        if (this.listener == null || !this.listener.exists(text_id)) {
                            HttpResponse page = getPage(sPLink.getURL());
                            File createArchiveFile = createArchiveFile(sPLink.getNormalPath(), false);
                            WebClient.downloadFile(page.getEntity(), createArchiveFile.getAbsolutePath());
                            convertContent(createArchiveFile, sPLink);
                        }
                    } catch (Exception e) {
                        this.log.error("Collection Listener error", e);
                    }
                } catch (Exception e2) {
                    this.log.error("Item for URL {} was not saved due to a net or IO issue.", sPLink.getAbsoluteURL(), e2);
                }
            }
        }
        for (SPLink sPLink2 : parseContentPage) {
            if (sPLink2.isSharepointFolder()) {
                try {
                    collectItems(sPLink2.getSimplifiedFolderURL());
                } catch (Exception e3) {
                    this.log.error("Folder URL {} was not saved due to a net or IO issue.", sPLink2.getSimplifiedFolderURL(), e3);
                }
            }
        }
        this.depth--;
    }

    protected void convertContent(File file, HyperLink hyperLink) throws IOException, ConfigException, NoSuchAlgorithmException {
        if (file == null || hyperLink == null) {
            throw new IOException("Bad data - null values for file and link...");
        }
        if (this.converter == null && this.listener != null) {
            this.log.debug("Link {} was saved to {}", hyperLink.getAbsoluteURL(), file.getAbsolutePath());
            this.listener.collected(file);
            return;
        }
        if (file.exists()) {
            ConvertedDocument convert = this.converter.convert(file);
            if (convert == null) {
                this.log.error("Document was not converted, FILE={}", file);
                return;
            }
            convert.setDefaultID();
            convert.addSourceURL(hyperLink.getAbsoluteURL(), hyperLink.getReferrer());
            convert.saveBuffer(new File(convert.textpath));
            if (this.listener != null) {
                this.listener.collected(convert, file.getAbsolutePath());
            }
        }
    }

    @Override // org.opensextant.xtext.collectors.web.CrawlFilter
    public boolean isAllowCurrentDirOnly() {
        return this.allowCurrentDirOnly;
    }

    @Override // org.opensextant.xtext.collectors.web.CrawlFilter
    public void setAllowCurrentDirOnly(boolean z) {
        this.allowCurrentDirOnly = z;
    }

    @Override // org.opensextant.xtext.collectors.web.CrawlFilter
    public boolean isAllowCurrentSiteOnly() {
        return this.allowCurrentSiteOnly;
    }

    @Override // org.opensextant.xtext.collectors.web.CrawlFilter
    public void setAllowCurrentSiteOnly(boolean z) {
        this.allowCurrentSiteOnly = z;
    }
}
