package org.jsoup.helper;

import ch.qos.logback.core.CoreConstants;
import java.io.BufferedReader;
import java.io.CharArrayReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Iterator;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.internal.ControllableInputStream;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.XmlDeclaration;
import org.jsoup.parser.Parser;

/* loaded from: classes3.dex */
public abstract class DataUtil {
    public static final Charset UTF_8;
    private static final Pattern charsetPattern = Pattern.compile("(?i)\\bcharset=\\s*(?:[\"'])?([^\\s,;\"']*)");
    static final String defaultCharsetName;
    private static final char[] mimeBoundaryChars;

    /* loaded from: classes3.dex */
    public static class BomCharset {
        private final String charset;
        private final boolean offset;

        public BomCharset(String str, boolean z2) {
            this.charset = str;
            this.offset = z2;
        }
    }

    /* loaded from: classes3.dex */
    public static class CharsetDoc {
        Charset charset;
        Document doc;
        InputStream input;
        boolean skip;

        public CharsetDoc(Charset charset, Document document, InputStream inputStream, boolean z2) {
            this.charset = charset;
            this.input = inputStream;
            this.doc = document;
            this.skip = z2;
        }
    }

    static {
        Charset forName = Charset.forName("UTF-8");
        UTF_8 = forName;
        defaultCharsetName = forName.name();
        mimeBoundaryChars = "-_1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray();
    }

    public static CharsetDoc detectCharset(InputStream inputStream, String str, String str2, Parser parser) {
        XmlDeclaration xmlDeclaration;
        boolean z2 = false;
        ControllableInputStream wrap = ControllableInputStream.wrap(inputStream, 32768, 0);
        wrap.mark(32768);
        ByteBuffer readToByteBuffer = readToByteBuffer(wrap, 5119);
        boolean z3 = wrap.read() == -1;
        wrap.reset();
        BomCharset detectCharsetFromBom = detectCharsetFromBom(readToByteBuffer);
        if (detectCharsetFromBom != null) {
            str = detectCharsetFromBom.charset;
        }
        Document document = null;
        if (str == null) {
            try {
                CharBuffer decode = UTF_8.decode(readToByteBuffer);
                Document parseInput = decode.hasArray() ? parser.parseInput(new CharArrayReader(decode.array(), decode.arrayOffset(), decode.limit()), str2) : parser.parseInput(decode.toString(), str2);
                Iterator<Element> it = parseInput.select("meta[http-equiv=content-type], meta[charset]").iterator();
                String str3 = null;
                while (it.hasNext()) {
                    Element next = it.next();
                    if (next.hasAttr("http-equiv")) {
                        str3 = getCharsetFromContentType(next.attr("content"));
                    }
                    if (str3 == null && next.hasAttr("charset")) {
                        str3 = next.attr("charset");
                    }
                    if (str3 != null) {
                        break;
                    }
                }
                if (str3 == null && parseInput.childNodeSize() > 0) {
                    Node childNode = parseInput.childNode(0);
                    if (childNode instanceof XmlDeclaration) {
                        xmlDeclaration = (XmlDeclaration) childNode;
                    } else {
                        if (childNode instanceof Comment) {
                            Comment comment = (Comment) childNode;
                            if (comment.isXmlDeclaration()) {
                                xmlDeclaration = comment.asXmlDeclaration();
                            }
                        }
                        xmlDeclaration = null;
                    }
                    if (xmlDeclaration != null && xmlDeclaration.name().equalsIgnoreCase("xml")) {
                        str3 = xmlDeclaration.attr("encoding");
                    }
                }
                String validateCharset = validateCharset(str3);
                if (validateCharset != null && !validateCharset.equalsIgnoreCase(defaultCharsetName)) {
                    str = validateCharset.trim().replaceAll("[\"']", CoreConstants.EMPTY_STRING);
                } else if (z3) {
                    document = parseInput;
                }
            } catch (UncheckedIOException e2) {
                throw e2.getCause();
            }
        } else {
            Validate.notEmpty(str, "Must set charset arg to character set of file to parse. Set to null to attempt to detect from HTML");
        }
        if (str == null) {
            str = defaultCharsetName;
        }
        Charset forName = str.equals(defaultCharsetName) ? UTF_8 : Charset.forName(str);
        if (detectCharsetFromBom != null && detectCharsetFromBom.offset) {
            z2 = true;
        }
        return new CharsetDoc(forName, document, wrap, z2);
    }

    private static BomCharset detectCharsetFromBom(ByteBuffer byteBuffer) {
        byteBuffer.mark();
        byte[] bArr = new byte[4];
        if (byteBuffer.remaining() >= 4) {
            byteBuffer.get(bArr);
            byteBuffer.rewind();
        }
        byte b2 = bArr[0];
        if ((b2 == 0 && bArr[1] == 0 && bArr[2] == -2 && bArr[3] == -1) || (b2 == -1 && bArr[1] == -2 && bArr[2] == 0 && bArr[3] == 0)) {
            return new BomCharset("UTF-32", false);
        }
        if ((b2 == -2 && bArr[1] == -1) || (b2 == -1 && bArr[1] == -2)) {
            return new BomCharset("UTF-16", false);
        }
        if (b2 == -17 && bArr[1] == -69 && bArr[2] == -65) {
            return new BomCharset("UTF-8", true);
        }
        return null;
    }

    public static String getCharsetFromContentType(String str) {
        if (str == null) {
            return null;
        }
        Matcher matcher = charsetPattern.matcher(str);
        if (matcher.find()) {
            return validateCharset(matcher.group(1).trim().replace("charset=", CoreConstants.EMPTY_STRING));
        }
        return null;
    }

    public static Document load(InputStream inputStream, String str, String str2, Parser parser) {
        return parseInputStream(inputStream, str, str2, parser);
    }

    public static void maybeSkipBom(Reader reader, CharsetDoc charsetDoc) {
        if (charsetDoc.skip) {
            Validate.isTrue(reader.skip(1L) == 1);
        }
    }

    public static Document parseInputStream(InputStream inputStream, String str, String str2, Parser parser) {
        CharsetDoc charsetDoc;
        if (inputStream == null) {
            return new Document(str2);
        }
        try {
            charsetDoc = detectCharset(inputStream, str, str2, parser);
            try {
                Document parseInputStream = parseInputStream(charsetDoc, str2, parser);
                if (charsetDoc != null) {
                    charsetDoc.input.close();
                }
                return parseInputStream;
            } catch (Throwable th) {
                th = th;
                if (charsetDoc != null) {
                    charsetDoc.input.close();
                }
                throw th;
            }
        } catch (Throwable th2) {
            th = th2;
            charsetDoc = null;
        }
    }

    public static Document parseInputStream(CharsetDoc charsetDoc, String str, Parser parser) {
        Document document = charsetDoc.doc;
        if (document != null) {
            return document;
        }
        InputStream inputStream = charsetDoc.input;
        Validate.notNull(inputStream);
        Charset charset = charsetDoc.charset;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, charset), 32768);
        try {
            maybeSkipBom(bufferedReader, charsetDoc);
            try {
                Document parseInput = parser.parseInput(bufferedReader, str);
                parseInput.outputSettings().charset(charset);
                if (!charset.canEncode()) {
                    parseInput.charset(UTF_8);
                }
                bufferedReader.close();
                return parseInput;
            } catch (UncheckedIOException e2) {
                throw e2.getCause();
            }
        } catch (Throwable th) {
            try {
                bufferedReader.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    public static ByteBuffer readToByteBuffer(InputStream inputStream, int i) {
        return ControllableInputStream.readToByteBuffer(inputStream, i);
    }

    private static String validateCharset(String str) {
        if (str != null && str.length() != 0) {
            String replaceAll = str.trim().replaceAll("[\"']", CoreConstants.EMPTY_STRING);
            try {
                if (Charset.isSupported(replaceAll)) {
                    return replaceAll;
                }
                String upperCase = replaceAll.toUpperCase(Locale.ENGLISH);
                if (Charset.isSupported(upperCase)) {
                    return upperCase;
                }
            } catch (IllegalCharsetNameException unused) {
            }
        }
        return null;
    }
}
