package org.opensextant.xtext.collectors.web;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClientBuilder;
import org.opensextant.ConfigException;
import org.opensextant.util.FileUtility;
import org.opensextant.xtext.XText;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/opensextant/xtext/collectors/web/WebClient.class */
public class WebClient {
    protected String archiveRoot;
    public static final int MAX_DEPTH = 5;
    private static final Pattern HREF_MATCH = Pattern.compile("href=[\"']([^\"']+)[\"']", 2);
    private final Logger log = LoggerFactory.getLogger(getClass());
    private String proxy = null;
    protected String server = null;
    protected URL site = null;
    protected HttpHost proxyHost = null;
    protected int interval = 100;
    protected XText converter = null;
    protected Map<String, HyperLink> found = new HashMap();
    protected Set<String> saved = new HashSet();
    protected int depth = 0;
    boolean useSystemProperties = false;
    private String name = "Unamed Web crawler";

    public static URL prepURL(String str) throws MalformedURLException {
        return new URL(str.replaceAll(" ", "%20"));
    }

    public static String prepURLPath(String str) throws MalformedURLException {
        return str.replaceAll(" ", "%20");
    }

    public WebClient(String str, String str2) throws MalformedURLException, ConfigException {
        this.archiveRoot = null;
        setSite(str);
        this.archiveRoot = str2;
    }

    public void configure() throws ConfigException {
        testAvailability();
        if (this.archiveRoot != null) {
            File file = new File(this.archiveRoot);
            if (!file.isDirectory() || !file.exists()) {
                throw new ConfigException("Destination archive does not exist. Caller must create prior to creation.");
            }
        }
    }

    public void setConverter(XText xText) {
        this.converter = xText;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public File createArchiveFile(String str, boolean z) throws IOException {
        File file = new File((this.archiveRoot + '/' + str).replaceAll("//", "/"));
        if (z) {
            FileUtility.makeDirectory(file);
        } else {
            file.getParentFile().mkdirs();
        }
        return file;
    }

    public void setProxy(String str) {
        this.proxy = str;
        int i = 80;
        String str2 = this.proxy;
        if (this.proxy.contains(":")) {
            String[] split = this.proxy.split(":");
            str2 = split[0];
            i = Integer.parseInt(split[1]);
        }
        this.proxyHost = new HttpHost(str2, i);
    }

    public void setProxy(String str, int i) {
        this.proxyHost = new HttpHost(str, i);
    }

    public void enableSystemProperties(boolean z) {
        this.useSystemProperties = z;
    }

    public void setSite(String str) throws MalformedURLException {
        this.site = new URL(str);
        this.server = new URL(str).getHost();
    }

    public URL getSite() {
        return this.site;
    }

    public String getServer() {
        return this.server;
    }

    public HttpClient getClient() {
        HttpClientBuilder create;
        if (this.useSystemProperties) {
            create = HttpClientBuilder.create().useSystemProperties();
        } else {
            create = HttpClientBuilder.create();
            if (this.proxyHost != null) {
                create.setProxy(this.proxyHost);
            }
        }
        return create.setDefaultRequestConfig(RequestConfig.custom().setCookieSpec("compatibility").build()).build();
    }

    public void testAvailability() throws ConfigException {
        if (this.site == null) {
            throw new ConfigException("Engineering Error: site was not set.");
        }
        try {
            getPage(this.site);
        } catch (Exception e) {
            throw new ConfigException(String.format("%s failed to collect URL %s", getName(), this.site), e);
        }
    }

    public void reset() {
        this.found.clear();
        this.saved.clear();
    }

    public void setInterval(int i) {
        this.interval = i;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void pause() {
        if (this.interval > 0) {
            try {
                Thread.sleep(this.interval);
            } catch (Exception e) {
            }
        }
    }

    public HttpResponse getPage(URL url) throws IOException {
        HttpClient client = getClient();
        HttpGet httpGet = new HttpGet();
        try {
            httpGet.setURI(url.toURI());
            HttpResponse execute = client.execute(httpGet);
            if (execute.getStatusLine().getStatusCode() == 404) {
                throw new IOException("HTTP Page " + url + " not found");
            }
            return execute;
        } catch (URISyntaxException e) {
            throw new IOException(e);
        }
    }

    public Collection<HyperLink> parseContentPage(String str, URL url, URL url2) {
        HashMap hashMap = new HashMap();
        Matcher matcher = HREF_MATCH.matcher(str);
        while (matcher.find()) {
            String trim = matcher.group(1).trim();
            String lowerCase = trim.toLowerCase();
            if (!"/".equals(trim) && !"#".equals(trim) && !lowerCase.startsWith("#") && !lowerCase.startsWith("javascript")) {
                if (lowerCase.startsWith("mailto:")) {
                    this.log.info("Ignore Mailto {}", lowerCase);
                } else {
                    if (trim.endsWith("/")) {
                        trim = trim.substring(0, trim.length() - 1);
                    }
                    try {
                        HyperLink hyperLink = new HyperLink(trim, url, url2);
                        if (!hyperLink.isResource()) {
                            if (!hashMap.containsKey(hyperLink.toString())) {
                                this.log.debug("Found link {}", trim);
                                hashMap.put(hyperLink.toString(), hyperLink);
                            }
                        }
                    } catch (Exception e) {
                        this.log.error("Failed to parse URL {}", trim, e);
                    }
                }
            }
        }
        return hashMap.values();
    }

    public static String readTextStream(InputStream inputStream) throws IOException {
        InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
        StringWriter stringWriter = new StringWriter();
        while (true) {
            int read = inputStreamReader.read();
            if (read < 0) {
                inputStreamReader.close();
                inputStream.close();
                return stringWriter.toString();
            }
            stringWriter.write(read);
        }
    }

    public static void downloadFile(HttpEntity httpEntity, String str) throws IOException {
        IOUtils.copy(httpEntity.getContent(), new FileOutputStream(str));
    }

    public void setName(String str) {
        this.name = str;
    }

    public String getName() {
        return this.name;
    }
}
