package de.fuberlin.wiwiss.ng4j.semwebclient;

import com.hp.hpl.jena.rdf.model.impl.RDFDefaultErrorHandler;
import com.hp.hpl.jena.sparql.sse.Tags;
import de.fuberlin.wiwiss.ng4j.NamedGraph;
import de.fuberlin.wiwiss.ng4j.NamedGraphSet;
import de.fuberlin.wiwiss.ng4j.NamedGraphSetFactory;
import de.fuberlin.wiwiss.ng4j.semwebclient.threadutils.Task;
import de.fuberlin.wiwiss.ng4j.semwebclient.threadutils.TaskExecutorBase;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.xml.transform.Transformer;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xml.serialize.Method;
import org.codehaus.jackson.util.MinimalPrettyPrinter;
import org.cyberneko.html.parsers.DOMParser;
import org.openjena.riot.WebContent;
import org.xml.sax.InputSource;

/* loaded from: input_file:WEB-INF/lib/ng4j-0.9.4.jar:de/fuberlin/wiwiss/ng4j/semwebclient/DereferencerThread.class */
public class DereferencerThread extends TaskExecutorBase {
    private HttpURLConnection connection;
    protected final NamedGraphSetFactory ngsFactory;
    private URL url;
    private Transformer transformerForRDFa;
    private NamedGraphSet tempNgs = null;
    private int maxfilesize = -1;
    private boolean enableRDFa = false;
    private int connectTimeout = 0;
    private int readTimeout = 0;
    private Log log = LogFactory.getLog(DereferencerThread.class);

    public DereferencerThread(NamedGraphSetFactory namedGraphSetFactory) {
        this.ngsFactory = namedGraphSetFactory;
        setPriority(getPriority() - 1);
    }

    @Override // de.fuberlin.wiwiss.ng4j.semwebclient.threadutils.TaskExecutorBase
    public Class<? extends Task> getTaskType() {
        return DereferencingTask.class;
    }

    @Override // de.fuberlin.wiwiss.ng4j.semwebclient.threadutils.TaskExecutorBase
    protected void executeTask(Task task) {
        DereferencingResult executeTask = executeTask((DereferencingTask) task);
        synchronized (this) {
            if (isStopped()) {
                return;
            }
            ((DereferencingTask) task).notifyListeners(executeTask);
        }
    }

    public synchronized boolean isAvailable() {
        return (hasTask() || isStopped()) ? false : true;
    }

    public synchronized boolean startDereferencingIfAvailable(DereferencingTask dereferencingTask) {
        if (!isAvailable()) {
            return false;
        }
        startTask(dereferencingTask);
        return true;
    }

    private DereferencingResult createErrorResult(DereferencingTask dereferencingTask, int i, Exception exc, Map<String, List<String>> map) {
        return new DereferencingResult(dereferencingTask, i, null, exc, map);
    }

    private DereferencingResult createNewUrisResult(DereferencingTask dereferencingTask, int i, ArrayList<String> arrayList) {
        return new DereferencingResult(dereferencingTask, i, arrayList, this.connection.getHeaderFields());
    }

    public DereferencingResult executeTask(DereferencingTask dereferencingTask) {
        this.tempNgs = this.ngsFactory.create();
        try {
            this.url = new URL(dereferencingTask.getURI());
            try {
                URLConnection openConnection = this.url.openConnection();
                openConnection.setConnectTimeout(this.connectTimeout);
                openConnection.setReadTimeout(this.readTimeout);
                if (dereferencingTask.conditional) {
                    openConnection.setIfModifiedSince(dereferencingTask.ifModifiedSince);
                }
                this.connection = (HttpURLConnection) openConnection;
                this.connection.setInstanceFollowRedirects(false);
                this.connection.addRequestProperty("Accept", "application/rdf+xml;q=1,text/xml;q=0.6,text/rdf+n3;q=0.9,application/octet-stream;q=0.5,application/xml q=0.5,application/rss+xml;q=0.5,text/plain; q=0.5,application/x-turtle;q=0.5,application/x-trig;q=0.5,application/xhtml+xml;q=0.5, text/html;q=0.5");
                try {
                    this.connection.connect();
                    try {
                        this.log.debug(this.connection.getResponseCode() + MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR + this.url + " (" + this.connection.getContentType() + ")");
                        if (this.connection.getResponseCode() == 301 || this.connection.getResponseCode() == 302 || this.connection.getResponseCode() == 303) {
                            DereferencingResult dereferencingResult = new DereferencingResult(dereferencingTask, -5, this.connection.getHeaderField("Location"), this.connection.getHeaderFields());
                            this.connection.disconnect();
                            this.connection = null;
                            return dereferencingResult;
                        }
                        if (this.connection.getResponseCode() == 304) {
                            DereferencingResult dereferencingResult2 = new DereferencingResult(dereferencingTask, 1, null, null, this.connection.getHeaderFields());
                            this.connection.disconnect();
                            this.connection = null;
                            return dereferencingResult2;
                        }
                        if (this.connection.getResponseCode() != 200) {
                            DereferencingResult createErrorResult = createErrorResult(dereferencingTask, -3, new Exception("Unexpected response code (" + this.connection.getResponseCode() + ")"), this.connection.getHeaderFields());
                            this.connection.disconnect();
                            this.connection = null;
                            return createErrorResult;
                        }
                        if (this.connection.getContentType() == null) {
                            DereferencingResult createErrorResult2 = createErrorResult(dereferencingTask, -3, new Exception("Unknown content type"), this.connection.getHeaderFields());
                            this.connection.disconnect();
                            this.connection = null;
                            return createErrorResult2;
                        }
                        try {
                            DereferencingResult parseRdf = parseRdf(dereferencingTask, setLang());
                            this.connection.disconnect();
                            this.connection = null;
                            return parseRdf;
                        } catch (Exception e) {
                            this.log.debug(e.getMessage());
                            DereferencingResult createErrorResult3 = createErrorResult(dereferencingTask, -1, e, this.connection.getHeaderFields());
                            this.connection.disconnect();
                            this.connection = null;
                            return createErrorResult3;
                        }
                    } catch (SocketTimeoutException e2) {
                        this.log.debug("Accessing the connection to <" + this.url.toString() + "> caused a " + e2.getClass().getName() + ": " + e2.getMessage());
                        DereferencingResult createErrorResult4 = createErrorResult(dereferencingTask, -6, e2, null);
                        this.connection.disconnect();
                        this.connection = null;
                        return createErrorResult4;
                    } catch (IOException e3) {
                        this.log.debug("Accessing the connection to <" + this.url.toString() + "> caused a " + e3.getClass().getName() + ": " + e3.getMessage(), e3);
                        DereferencingResult createErrorResult5 = createErrorResult(dereferencingTask, -3, e3, null);
                        this.connection.disconnect();
                        this.connection = null;
                        return createErrorResult5;
                    }
                } catch (SocketTimeoutException e4) {
                    this.log.debug("Connecting to <" + this.url.toString() + "> caused a " + e4.getClass().getName() + ": " + e4.getMessage());
                    this.connection.disconnect();
                    this.connection = null;
                    return createErrorResult(dereferencingTask, -6, e4, null);
                } catch (IOException e5) {
                    this.log.debug("Connecting to <" + this.url.toString() + "> caused a " + e5.getClass().getName() + ": " + e5.getMessage(), e5);
                    this.connection.disconnect();
                    this.connection = null;
                    return createErrorResult(dereferencingTask, -3, e5, null);
                } catch (RuntimeException e6) {
                    this.log.debug("Connecting to <" + this.url.toString() + "> caused a " + e6.getClass().getName() + ": " + e6.getMessage());
                    this.connection.disconnect();
                    this.connection = null;
                    return createErrorResult(dereferencingTask, -3, e6, null);
                }
            } catch (IOException e7) {
                this.log.debug("Creating a connection to <" + this.url.toString() + "> caused a " + e7.getClass().getName() + ": " + e7.getMessage(), e7);
                return createErrorResult(dereferencingTask, -3, e7, null);
            }
        } catch (MalformedURLException e8) {
            return createErrorResult(dereferencingTask, -2, e8, null);
        }
    }

    private DereferencingResult parseRdf(DereferencingTask dereferencingTask, String str) throws Exception {
        if (str == null || !str.toUpperCase().equals("HTML")) {
            RDFDefaultErrorHandler.silent = true;
            this.tempNgs.read(new LimitedInputStream(this.connection.getInputStream(), this.maxfilesize), str, this.url.toString());
            return new DereferencingResult(dereferencingTask, 0, this.tempNgs, null, this.connection.getHeaderFields());
        }
        String readout = readout(this.connection.getInputStream());
        ArrayList<String> fetchLinks = HtmlLinkFetcher.fetchLinks(readout);
        if (!fetchLinks.isEmpty()) {
            Iterator<String> it = fetchLinks.iterator();
            ArrayList<String> arrayList = new ArrayList<>();
            while (it.hasNext()) {
                String replace = it.next().replace("&amp;", "&").replace("&gt;", ">").replace("&lt;", Tags.symLT);
                try {
                    arrayList.add(new URL(this.url, replace).toString());
                } catch (MalformedURLException e) {
                    this.log.debug("Creating a URL from the link <" + replace + "> fetched for <" + this.url.toString() + "> caused an exception (" + e.getMessage() + ").", e);
                }
            }
            return createNewUrisResult(dereferencingTask, -4, arrayList);
        }
        if (this.enableRDFa) {
            this.log.debug("Parsing HTML from <" + this.url.toString() + "> for RDFa");
            StringWriter stringWriter = new StringWriter();
            DOMParser dOMParser = new DOMParser();
            dOMParser.setFeature("http://xml.org/sax/features/namespaces", false);
            dOMParser.setFeature("http://cyberneko.org/html/features/balance-tags", true);
            dOMParser.setFeature("http://cyberneko.org/html/features/balance-tags/ignore-outside-content", true);
            dOMParser.parse(new InputSource(new StringReader(readout)));
            this.transformerForRDFa.transform(new DOMSource(dOMParser.getDocument(), this.url.toString()), new StreamResult(stringWriter));
            this.tempNgs.read(new StringReader(stringWriter.getBuffer().toString()), "RDF/XML", this.url.toString());
            int i = 0;
            Iterator<NamedGraph> listGraphs = this.tempNgs.listGraphs();
            while (listGraphs.hasNext()) {
                i += listGraphs.next().size();
            }
            if (i > 0) {
                this.log.debug("Found RDFa in HTML from <" + this.url.toString() + ">");
                return new DereferencingResult(dereferencingTask, 0, this.tempNgs, null, this.connection.getHeaderFields());
            }
            this.log.debug("No RDFa in HTML from <" + this.url.toString() + ">");
        }
        return createNewUrisResult(dereferencingTask, -4, new ArrayList<>());
    }

    private String setLang() {
        String contentType = this.connection.getContentType();
        if (contentType == null) {
            return null;
        }
        return (contentType.startsWith(WebContent.contentTypeRDFXML) || contentType.startsWith("text/xml") || contentType.startsWith("application/xml") || contentType.startsWith("application/rss+xml") || contentType.startsWith("text/plain")) ? "RDF/XML" : (contentType.startsWith(WebContent.contentTypeN3Alt1) || contentType.startsWith(WebContent.contentTypeTurtleAlt2) || contentType.startsWith(WebContent.contentTypeN3)) ? "N3" : contentType.contains(Method.HTML) ? "HTML" : contentType;
    }

    public synchronized void setMaxfilesize(int i) {
        this.maxfilesize = i;
    }

    public synchronized void setEnableRDFa(boolean z) {
        this.enableRDFa = z;
    }

    public synchronized void setRDFaTransformer(Transformer transformer) {
        this.transformerForRDFa = transformer;
    }

    public synchronized void setConnectTimeout(int i) {
        this.connectTimeout = i;
    }

    public synchronized void setReadTimeout(int i) {
        this.readTimeout = i;
    }

    public static String readout(InputStream inputStream) throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        byte[] bArr = new byte[4096];
        while (true) {
            int read = inputStream.read(bArr);
            if (read == -1) {
                return stringBuffer.toString();
            }
            stringBuffer.append(new String(bArr, 0, read));
        }
    }
}
