diff options
| -rw-r--r-- | core/java/android/webkit/URLUtil.java | 332 |
1 files changed, 321 insertions, 11 deletions
diff --git a/core/java/android/webkit/URLUtil.java b/core/java/android/webkit/URLUtil.java index 828ec265f4c8..c6271d27cb37 100644 --- a/core/java/android/webkit/URLUtil.java +++ b/core/java/android/webkit/URLUtil.java @@ -16,20 +16,40 @@ package android.webkit; +import android.annotation.FlaggedApi; +import android.annotation.NonNull; import android.annotation.Nullable; +import android.compat.Compatibility; +import android.compat.annotation.ChangeId; +import android.compat.annotation.EnabledSince; import android.compat.annotation.UnsupportedAppUsage; import android.net.ParseException; import android.net.Uri; import android.net.WebAddress; +import android.os.Build; import android.util.Log; import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.nio.charset.Charset; import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; public final class URLUtil { + /** + * This feature enables parsing of Content-Disposition headers that conform to RFC 6266. In + * particular, this enables parsing of {@code filename*} values which can use a different + * character encoding. + * + * @hide + */ + @ChangeId + @EnabledSince(targetSdkVersion = Build.VERSION_CODES.VANILLA_ICE_CREAM) + @FlaggedApi(android.os.Flags.FLAG_ANDROID_OS_BUILD_VANILLA_ICE_CREAM) + public static final long PARSE_CONTENT_DISPOSITION_USING_RFC_6266 = 319400769L; + private static final String LOGTAG = "webkit"; private static final boolean TRACE = false; @@ -293,21 +313,58 @@ public final class URLUtil { /** * Guesses canonical filename that a download would have, using the URL and contentDisposition. - * File extension, if not defined, is added based on the mimetype + * + * <p>File extension, if not defined, is added based on the mimetype. + * + * <p>The {@code contentDisposition} argument will be treated differently depending on + * targetSdkVersion. + * + * <ul> + * <li>For targetSDK versions < {@code VANILLA_ICE_CREAM} it will be parsed based on RFC + * 2616. + * <li>For targetSDK versions >= {@code VANILLA_ICE_CREAM} it will be parsed based on RFC + * 6266. + * </ul> + * + * In practice, this means that from {@code VANILLA_ICE_CREAM}, this method will be able to + * parse {@code filename*} directives in the {@code contentDisposition} string. + * + * <p>The function also changed in the following ways in {@code VANILLA_ICE_CREAM}: + * + * <ul> + * <li>If the suggested file type extension doesn't match the passed {@code mimeType}, the + * method will append the appropriate extension instead of replacing the current + * extension. + * <li>If the suggested file name contains a path separator ({@code "/"}), the method will + * replace this with the underscore character ({@code "_"}) instead of splitting the + * result and only using the last part. + * </ul> * * @param url Url to the content * @param contentDisposition Content-Disposition HTTP header or {@code null} * @param mimeType Mime-type of the content or {@code null} * @return suggested filename */ - public static final String guessFileName( + public static String guessFileName( + String url, @Nullable String contentDisposition, @Nullable String mimeType) { + if (android.os.Flags.androidOsBuildVanillaIceCream()) { + if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) { + return guessFileNameRfc6266(url, contentDisposition, mimeType); + } + } + + return guessFileNameRfc2616(url, contentDisposition, mimeType); + } + + /** Legacy implementation of guessFileName, based on RFC 2616. */ + private static String guessFileNameRfc2616( String url, @Nullable String contentDisposition, @Nullable String mimeType) { String filename = null; String extension = null; // If we couldn't do anything with the hint, move toward the content disposition if (contentDisposition != null) { - filename = parseContentDisposition(contentDisposition); + filename = parseContentDispositionRfc2616(contentDisposition); if (filename != null) { int index = filename.lastIndexOf('/') + 1; if (index > 0) { @@ -384,6 +441,128 @@ public final class URLUtil { return filename + extension; } + /** + * Guesses canonical filename that a download would have, using the URL and contentDisposition. + * Uses RFC 6266 for parsing the contentDisposition header value. + */ + @NonNull + private static String guessFileNameRfc6266( + @NonNull String url, @Nullable String contentDisposition, @Nullable String mimeType) { + String filename = getFilenameSuggestion(url, contentDisposition); + // Split filename between base and extension + // Add an extension if filename does not have one + String extensionFromMimeType = suggestExtensionFromMimeType(mimeType); + + if (filename.indexOf('.') < 0) { + // Filename does not have an extension, use the suggested one. + return filename + extensionFromMimeType; + } + + // Filename already contains at least one dot. + // Compare the last segment of the extension against the mime type. + // If there's a mismatch, add the suggested extension instead. + if (mimeType != null && extensionDifferentFromMimeType(filename, mimeType)) { + return filename + extensionFromMimeType; + } + return filename; + } + + /** + * Get the suggested file name from the {@code contentDisposition} or {@code url}. Will ensure + * that the filename contains no path separators by replacing them with the {@code "_"} + * character. + */ + @NonNull + private static String getFilenameSuggestion(String url, @Nullable String contentDisposition) { + // First attempt to parse the Content-Disposition header if available + if (contentDisposition != null) { + String filename = getFilenameFromContentDispositionRfc6266(contentDisposition); + if (filename != null) { + return replacePathSeparators(filename); + } + } + + // Try to generate a filename based on the URL. + if (url != null) { + Uri parsedUri = Uri.parse(url); + String lastPathSegment = parsedUri.getLastPathSegment(); + if (lastPathSegment != null) { + return replacePathSeparators(lastPathSegment); + } + } + + // Finally, if couldn't get filename from URI, get a generic filename. + return "downloadfile"; + } + + /** + * Replace all instances of {@code "/"} with {@code "_"} to avoid filenames that navigate the + * path. + */ + @NonNull + private static String replacePathSeparators(@NonNull String raw) { + return raw.replaceAll("/", "_"); + } + + /** + * Check if the {@code filename} has an extension that is different from the expected one based + * on the {@code mimeType}. + */ + private static boolean extensionDifferentFromMimeType( + @NonNull String filename, @NonNull String mimeType) { + int lastDotIndex = filename.lastIndexOf('.'); + String typeFromExt = + MimeTypeMap.getSingleton() + .getMimeTypeFromExtension(filename.substring(lastDotIndex + 1)); + return typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType); + } + + /** + * Get a candidate file extension (including the {@code .}) for the given mimeType. will return + * {@code ".bin"} if {@code mimeType} is {@code null} + * + * @param mimeType Reported mimetype + * @return A file extension, including the {@code .} + */ + @NonNull + private static String suggestExtensionFromMimeType(@Nullable String mimeType) { + if (mimeType == null) { + return ".bin"; + } + String extensionFromMimeType = + MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); + if (extensionFromMimeType != null) { + return "." + extensionFromMimeType; + } + if (mimeType.equalsIgnoreCase("text/html")) { + return ".html"; + } else if (mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) { + return ".txt"; + } else { + return ".bin"; + } + } + + /** + * Parse the Content-Disposition HTTP Header. + * + * <p>Behavior depends on targetSdkVersion. + * + * <ul> + * <li>For targetSDK versions < {@code VANILLA_ICE_CREAM} it will parse based on RFC 2616. + * <li>For targetSDK versions >= {@code VANILLA_ICE_CREAM} it will parse based on RFC 6266. + * </ul> + */ + @UnsupportedAppUsage + static String parseContentDisposition(String contentDisposition) { + if (android.os.Flags.androidOsBuildVanillaIceCream()) { + if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) { + return getFilenameFromContentDispositionRfc6266(contentDisposition); + } + } + return parseContentDispositionRfc2616(contentDisposition); + } + /** Regex used to parse content-disposition headers */ private static final Pattern CONTENT_DISPOSITION_PATTERN = Pattern.compile( @@ -391,15 +570,14 @@ public final class URLUtil { Pattern.CASE_INSENSITIVE); /** - * Parse the Content-Disposition HTTP Header. The format of the header is defined here: - * http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html This header provides a filename for - * content that is going to be downloaded to the file system. We only support the attachment - * type. Note that RFC 2616 specifies the filename value must be double-quoted. Unfortunately - * some servers do not quote the value so to maintain consistent behaviour with other browsers, - * we allow unquoted values too. + * Parse the Content-Disposition HTTP Header. The format of the header is defined here: <a + * href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html">rfc2616 Section 19</a>. This + * header provides a filename for content that is going to be downloaded to the file system. We + * only support the attachment type. Note that RFC 2616 specifies the filename value must be + * double-quoted. Unfortunately some servers do not quote the value so to maintain consistent + * behaviour with other browsers, we allow unquoted values too. */ - @UnsupportedAppUsage - static String parseContentDisposition(String contentDisposition) { + private static String parseContentDispositionRfc2616(String contentDisposition) { try { Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition); if (m.find()) { @@ -410,4 +588,136 @@ public final class URLUtil { } return null; } + + /** + * Pattern for parsing individual content disposition key-value pairs. + * + * <p>The pattern will attempt to parse the value as either single-, double-, or unquoted. For + * the single- and double-quoted options, the pattern allows escaped quotes as part of the + * value, as per <a href="https://datatracker.ietf.org/doc/html/rfc2616#section-2.2">rfc2616 + * section-2.2</a> + */ + @SuppressWarnings("RegExpRepeatedSpace") // Spaces are only for readability. + private static final Pattern DISPOSITION_PATTERN = + Pattern.compile( + """ + \\s*(\\S+?) # Group 1: parameter name + \\s*=\\s* # Match equals sign + (?: # non-capturing group of options + '( (?: [^'\\\\] | \\\\. )* )' # Group 2: single-quoted + | "( (?: [^"\\\\] | \\\\. )* )" # Group 3: double-quoted + | ( [^'"][^;\\s]* ) # Group 4: un-quoted parameter + )\\s*;? # Optional end semicolon""", + Pattern.COMMENTS); + + /** + * Extract filename from a {@code Content-Disposition} header value. + * + * <p>This method implements the parsing defined in <a + * href="https://datatracker.ietf.org/doc/html/rfc6266">RFC 6266</a>, supporting both the {@code + * filename} and {@code filename*} disposition parameters. If the passed header value has the + * {@code "inline"} disposition type, this method will return {@code null} to indicate that a + * download was not intended. + * + * <p>If both {@code filename*} and {@code filename} is present, the former will be returned, as + * per the RFC. Invalid encoded values will be ignored. + * + * @param contentDisposition Value of {@code Content-Disposition} header. + * @return The filename suggested by the header or {@code null} if no filename could be parsed + * from the header value. + */ + @Nullable + private static String getFilenameFromContentDispositionRfc6266( + @NonNull String contentDisposition) { + String[] parts = contentDisposition.trim().split(";", 2); + if (parts.length < 2) { + // Need at least 2 parts, the `disposition-type` and at least one `disposition-parm`. + return null; + } + String dispositionType = parts[0].trim(); + if ("inline".equalsIgnoreCase(dispositionType)) { + // "inline" should not result in a download. + // Unknown disposition types should be handles as "attachment" + // https://datatracker.ietf.org/doc/html/rfc6266#section-4.2 + return null; + } + String dispositionParameters = parts[1]; + Matcher matcher = DISPOSITION_PATTERN.matcher(dispositionParameters); + String filename = null; + String filenameExt = null; + while (matcher.find()) { + String parameter = matcher.group(1); + String value; + if (matcher.group(2) != null) { + value = removeSlashEscapes(matcher.group(2)); // Value was single-quoted + } else if (matcher.group(3) != null) { + value = removeSlashEscapes(matcher.group(3)); // Value was double-quoted + } else { + value = matcher.group(4); // Value was un-quoted + } + + if (parameter == null || value == null) { + continue; + } + + if ("filename*".equalsIgnoreCase(parameter)) { + filenameExt = parseExtValueString(value); + } else if ("filename".equalsIgnoreCase(parameter)) { + filename = value; + } + } + + // RFC 6266 dictates the filenameExt should be preferred if present. + if (filenameExt != null) { + return filenameExt; + } + return filename; + } + + /** Replace escapes of the \X form with X. */ + private static String removeSlashEscapes(String raw) { + if (raw == null) { + return null; + } + return raw.replaceAll("\\\\(.)", "$1"); + } + + /** + * Parse an extended value string which can be percent-encoded. Return {@code} null if unable to + * parse the string. + */ + private static String parseExtValueString(String raw) { + String[] parts = raw.split("'", 3); + if (parts.length < 3) { + return null; + } + + String encoding = parts[0]; + // Intentionally ignore parts[1] (language). + String valueChars = parts[2]; + + try { + // The URLDecoder force-decodes + as " " + // so preemptively replace all values with the encoded value to preserve them. + Charset charset = Charset.forName(encoding); + String valueWithEncodedPlus = encodePlusCharacters(valueChars, charset); + return URLDecoder.decode(valueWithEncodedPlus, charset); + } catch (RuntimeException ignored) { + return null; // Ignoring an un-parsable value is within spec. + } + } + + /** + * Replace all instances of {@code "+"} with the percent-encoded equivalent for the given {@code + * charset}. + */ + @NonNull + private static String encodePlusCharacters(@NonNull String valueChars, Charset charset) { + StringBuilder sb = new StringBuilder(); + for (byte b : charset.encode("+").array()) { + // Formatting a byte is not possible with TextUtils.formatSimple + sb.append(String.format("%02x", b)); + } + return valueChars.replaceAll("\\+", sb.toString()); + } } |