package org.archive.wayback.replay.mimetype;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.archive.wayback.core.Resource;
import org.archive.wayback.replay.DecodingResource;
import org.archive.wayback.replay.charset.CharsetDetector;
import org.archive.wayback.replay.charset.StandardCharsetDetector;
import org.archive.wayback.util.graph.GraphRenderer;

/* loaded from: input_file:org/archive/wayback/replay/mimetype/SimpleMimeTypeDetector.class */
public class SimpleMimeTypeDetector implements MimeTypeDetector {
    public static final int DEFAULT_SNIFF_LENGTH = 1536;
    protected static final int MINIMUM_SNIFF_BUFFER_SIZE = 10;
    private int sniffLength = DEFAULT_SNIFF_LENGTH;
    private CharsetDetector charsetDetector = new StandardCharsetDetector();
    private static final String BINARY_FILE = "application/octet-stream";
    private static final String RE_CSS_SIMPLE_SELECTOR = "(?:(?:[-a-z0-9]+|\\*)(?:[.#:][-_a-z0-9]+|\\[.+?\\])*|(?:[.#:][-_a-z0-9]+|\\[.+?\\])+)";
    private static final Logger logger = Logger.getLogger(SimpleMimeTypeDetector.class.getName());
    private static final Pattern RE_XML_PROLOGUE = Pattern.compile("\\s*<\\?xml\\s+version=\"[.\\d]+\"\\s+.*\\?>");
    private static final Pattern RE_HTML_ELEMENTS = Pattern.compile("(?i)\\s*<(HTML|HEAD|STYLE|SCRIPT|META|BODY)(\\s|>)");
    private static final Pattern RE_DOCTYPE_HTML = Pattern.compile("(?i)\\s*<!DOCTYPE\\s+HTML");
    private static final Pattern RE_SGML_COMMENT = Pattern.compile("(?s)\\s*<!--.*?-->");
    private static final Pattern RE_END_TAG = Pattern.compile("(?i)</[a-z][a-z0-9]*>");
    private static final Pattern RE_JS_VAR = Pattern.compile("(?m)^var\\s+[_a-zA-Z$][_a-zA-Z$0-9]+");
    private static final Pattern RE_JS_FUNCTION = Pattern.compile("(?s)function(?:\\s+[a-zA-Z0-9_$]+\\s*)?\\(");
    private static final Pattern RE_JSON_HEAD = Pattern.compile("\\s*\\{\\s*\"");
    private static final Pattern RE_CSS_COMMENT = Pattern.compile("\\s*/\\*.*?\\*/");
    private static final Pattern RE_CSS_AT_RULE = Pattern.compile("\\s*@(import|media|document|charset|font-face|keyframes|namespace|supports)\\s+");
    private static final Pattern RE_CSS_RULESET_START = Pattern.compile("(?i)\\s*(?:(?:[-a-z0-9]+|\\*)(?:[.#:][-_a-z0-9]+|\\[.+?\\])*|(?:[.#:][-_a-z0-9]+|\\[.+?\\])+)(?:[\\s,+>]+(?:(?:[-a-z0-9]+|\\*)(?:[.#:][-_a-z0-9]+|\\[.+?\\])*|(?:[.#:][-_a-z0-9]+|\\[.+?\\])+))*\\s*\\{");
    private static final Pattern RE_CSS_DECLARATION = Pattern.compile("(?i)\\s*[-a-z]+\\s*:\\s*[^;}]+[;}]");

    public void setSniffLength(int i) {
        this.sniffLength = i;
    }

    public int getSniffLength() {
        return this.sniffLength;
    }

    public void setCharsetDetector(CharsetDetector charsetDetector) {
        if (charsetDetector != null) {
            this.charsetDetector = charsetDetector;
        } else {
            this.charsetDetector = new StandardCharsetDetector();
        }
    }

    private String detectBinaryTypes(byte[] bArr) {
        switch (bArr[0]) {
            case -119:
                if (bArr[1] == 80 && bArr[2] == 78 && bArr[3] == 71) {
                    return GraphRenderer.RENDERED_IMAGE_MIME;
                }
                break;
            case -54:
                if (bArr[1] == -2 && bArr[2] == -70 && bArr[3] == -66) {
                    return "application/java";
                }
                break;
            case -48:
                if (bArr[1] == -49 && bArr[2] == 17 && bArr[2] == -31) {
                    return BINARY_FILE;
                }
                break;
            case -17:
                if (bArr[1] == -69 && bArr[2] == -65) {
                    return null;
                }
                break;
            case -9:
                if (bArr[1] == 2 && bArr[2] == 1) {
                    return "application/x-dvi";
                }
                break;
            case -2:
                if (bArr[1] == -1) {
                    return null;
                }
                break;
            case -1:
                if (bArr[1] == -2) {
                    return null;
                }
                return (bArr[1] & 254) == -6 ? "audio/mp3" : bArr[1] == -40 ? "image/jpeg" : BINARY_FILE;
            case 0:
                if (bArr[1] == 0 || bArr[1] == 1) {
                    return BINARY_FILE;
                }
                break;
            case 1:
                if (bArr[1] == -77 || bArr[1] == -70) {
                    return "video/mpeg";
                }
                if (bArr[1] == 0) {
                    return BINARY_FILE;
                }
                break;
            case 31:
                if (bArr[1] == -117) {
                    return "application/x-gzip";
                }
                if (bArr[1] == -99) {
                    return "application/x-compress";
                }
                break;
            case 37:
                if (bArr[1] == 80 && bArr[2] == 68 && bArr[3] == 70 && bArr[4] == 45) {
                    return "application/pdf";
                }
                if (bArr[1] == 33 && bArr[2] == 80 && bArr[3] == 83 && bArr[4] == 45) {
                    return "application/postscript";
                }
                break;
            case 66:
                if (bArr[1] == 90 && bArr[2] == 104) {
                    return "application/x-bzip2";
                }
                break;
            case 70:
                if (bArr[1] == 87 && bArr[2] == 83) {
                    return "application/x-shockwave-flash";
                }
                if (bArr[1] == 76 && bArr[2] == 86 && bArr[3] == 1) {
                    return "video/x-flv";
                }
                break;
            case 71:
                if (bArr[1] == 73 && bArr[2] == 70 && bArr[3] == 56) {
                    return "image/gif";
                }
                break;
            case 77:
                if (bArr[1] == 90) {
                    if (bArr[3] == 0 || bArr[3] == 1) {
                        return "application/x-dosexec";
                    }
                } else {
                    if (bArr[1] == 83 && bArr[2] == 67 && bArr[3] == 70) {
                        return "application/vnd.ms-cab-compressed";
                    }
                    if (bArr[1] == 84 && bArr[2] == 104 && bArr[3] == 100) {
                        return "audio/midi";
                    }
                }
                break;
            case 80:
                if (bArr[1] == 75 && bArr[2] == 3 && bArr[3] == 4) {
                    return "application/zip";
                }
                if (bArr[1] == 69 && bArr[2] == 0 && bArr[3] == 0 && bArr[4] == 77 && bArr[5] == 83) {
                    return BINARY_FILE;
                }
                break;
            case 109:
                if (bArr[1] == 111 && bArr[2] == 111 && bArr[3] == 118) {
                    return "video/quicktime";
                }
                if (bArr[1] == 100 && bArr[2] == 97 && bArr[3] == 116) {
                    return "video/quicktime";
                }
                break;
            case 123:
                if (bArr[1] == 92 && bArr[2] == 114 && bArr[3] == 116 && bArr[4] == 102 && bArr[5] == 49) {
                    return "application/rtf";
                }
                break;
        }
        if (bArr[2] == 45 && bArr[3] == 108 && bArr[4] == 104 && bArr[5] == 53 && bArr[6] == 45) {
            return BINARY_FILE;
        }
        return null;
    }

    protected byte[] peekContent(Resource resource) throws IOException {
        DecodingResource forEncoding;
        byte[] bArr = new byte[Math.max(this.sniffLength, 10)];
        String header = resource.getHeader("content-encoding");
        resource.mark(this.sniffLength + 100);
        if (header == null || (forEncoding = DecodingResource.forEncoding(header, resource)) == null) {
            resource.read(bArr, 0, this.sniffLength);
            resource.reset();
            return bArr;
        }
        forEncoding.read(bArr, 0, this.sniffLength);
        resource.reset();
        return bArr;
    }

    @Override // org.archive.wayback.replay.mimetype.MimeTypeDetector
    public String sniff(Resource resource) {
        try {
            byte[] peekContent = peekContent(resource);
            String detectBinaryTypes = detectBinaryTypes(peekContent);
            if (detectBinaryTypes != null) {
                return detectBinaryTypes;
            }
            try {
                try {
                    String str = new String(peekContent, this.charsetDetector.getCharset(resource, null));
                    if (str.length() > 0 && str.charAt(0) == 65279) {
                        str = str.substring(1);
                    }
                    String detectHTML = detectHTML(str);
                    if (detectHTML != null) {
                        return detectHTML;
                    }
                    String detectJavaScript = detectJavaScript(str);
                    if (detectJavaScript != null) {
                        return detectJavaScript;
                    }
                    String detectCSS = detectCSS(str);
                    if (detectCSS != null) {
                        return detectCSS;
                    }
                    return null;
                } catch (UnsupportedEncodingException e) {
                    return null;
                }
            } catch (IOException e2) {
                return null;
            }
        } catch (IOException e3) {
            logger.warning("error reading " + this.sniffLength + " from resource: " + e3.getMessage());
            return null;
        }
    }

    protected String detectHTML(String str) {
        int i = 0;
        Matcher matcher = RE_XML_PROLOGUE.matcher(str);
        if (matcher.lookingAt()) {
            i = matcher.end();
        }
        Matcher matcher2 = RE_SGML_COMMENT.matcher(str);
        matcher2.region(i, str.length());
        while (matcher2.lookingAt()) {
            int end = matcher2.end();
            i = end;
            matcher2.region(end, str.length());
        }
        Matcher matcher3 = RE_DOCTYPE_HTML.matcher(str);
        matcher3.region(i, str.length());
        if (matcher3.lookingAt()) {
            return "text/html";
        }
        Matcher matcher4 = RE_HTML_ELEMENTS.matcher(str);
        matcher4.region(i, str.length());
        if (matcher4.lookingAt()) {
            return "text/html";
        }
        Matcher matcher5 = RE_END_TAG.matcher(str);
        matcher5.region(i, str.length());
        if (matcher5.find()) {
            return "text/html";
        }
        return null;
    }

    protected String detectJavaScript(String str) {
        if (RE_JS_VAR.matcher(str).find() || RE_JS_FUNCTION.matcher(str).find()) {
            return "text/javascript";
        }
        if (RE_JSON_HEAD.matcher(str).lookingAt()) {
            return "application/json";
        }
        return null;
    }

    protected String detectCSS(String str) {
        int i = 0;
        Matcher matcher = RE_CSS_COMMENT.matcher(str);
        matcher.region(0, str.length());
        while (matcher.lookingAt()) {
            int end = matcher.end();
            i = end;
            matcher.region(end, str.length());
        }
        Matcher matcher2 = RE_CSS_AT_RULE.matcher(str);
        matcher2.region(i, str.length());
        if (matcher2.lookingAt()) {
            return "text/css";
        }
        Matcher matcher3 = RE_CSS_RULESET_START.matcher(str);
        matcher3.region(i, str.length());
        if (!matcher3.lookingAt()) {
            return null;
        }
        int end2 = matcher3.end();
        int i2 = end2;
        matcher.region(end2, str.length());
        while (matcher.lookingAt()) {
            int end3 = matcher.end();
            i2 = end3;
            matcher.region(end3, str.length());
        }
        Matcher matcher4 = RE_CSS_DECLARATION.matcher(str);
        matcher4.region(i2, str.length());
        if (matcher4.lookingAt()) {
            return "text/css";
        }
        return null;
    }
}
