summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/java/android/webkit/URLUtil.java332
1 files changed, 321 insertions, 11 deletions
diff --git a/core/java/android/webkit/URLUtil.java b/core/java/android/webkit/URLUtil.java
index 828ec265f4c8..c6271d27cb37 100644
--- a/core/java/android/webkit/URLUtil.java
+++ b/core/java/android/webkit/URLUtil.java
@@ -16,20 +16,40 @@
package android.webkit;
+import android.annotation.FlaggedApi;
+import android.annotation.NonNull;
import android.annotation.Nullable;
+import android.compat.Compatibility;
+import android.compat.annotation.ChangeId;
+import android.compat.annotation.EnabledSince;
import android.compat.annotation.UnsupportedAppUsage;
import android.net.ParseException;
import android.net.Uri;
import android.net.WebAddress;
+import android.os.Build;
import android.util.Log;
import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.charset.Charset;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public final class URLUtil {
+ /**
+ * This feature enables parsing of Content-Disposition headers that conform to RFC 6266. In
+ * particular, this enables parsing of {@code filename*} values which can use a different
+ * character encoding.
+ *
+ * @hide
+ */
+ @ChangeId
+ @EnabledSince(targetSdkVersion = Build.VERSION_CODES.VANILLA_ICE_CREAM)
+ @FlaggedApi(android.os.Flags.FLAG_ANDROID_OS_BUILD_VANILLA_ICE_CREAM)
+ public static final long PARSE_CONTENT_DISPOSITION_USING_RFC_6266 = 319400769L;
+
private static final String LOGTAG = "webkit";
private static final boolean TRACE = false;
@@ -293,21 +313,58 @@ public final class URLUtil {
/**
* Guesses canonical filename that a download would have, using the URL and contentDisposition.
- * File extension, if not defined, is added based on the mimetype
+ *
+ * <p>File extension, if not defined, is added based on the mimetype.
+ *
+ * <p>The {@code contentDisposition} argument will be treated differently depending on
+ * targetSdkVersion.
+ *
+ * <ul>
+ * <li>For targetSDK versions &lt; {@code VANILLA_ICE_CREAM} it will be parsed based on RFC
+ * 2616.
+ * <li>For targetSDK versions &gt;= {@code VANILLA_ICE_CREAM} it will be parsed based on RFC
+ * 6266.
+ * </ul>
+ *
+ * In practice, this means that from {@code VANILLA_ICE_CREAM}, this method will be able to
+ * parse {@code filename*} directives in the {@code contentDisposition} string.
+ *
+ * <p>The function also changed in the following ways in {@code VANILLA_ICE_CREAM}:
+ *
+ * <ul>
+ * <li>If the suggested file type extension doesn't match the passed {@code mimeType}, the
+ * method will append the appropriate extension instead of replacing the current
+ * extension.
+ * <li>If the suggested file name contains a path separator ({@code "/"}), the method will
+ * replace this with the underscore character ({@code "_"}) instead of splitting the
+ * result and only using the last part.
+ * </ul>
*
* @param url Url to the content
* @param contentDisposition Content-Disposition HTTP header or {@code null}
* @param mimeType Mime-type of the content or {@code null}
* @return suggested filename
*/
- public static final String guessFileName(
+ public static String guessFileName(
+ String url, @Nullable String contentDisposition, @Nullable String mimeType) {
+ if (android.os.Flags.androidOsBuildVanillaIceCream()) {
+ if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) {
+ return guessFileNameRfc6266(url, contentDisposition, mimeType);
+ }
+ }
+
+ return guessFileNameRfc2616(url, contentDisposition, mimeType);
+ }
+
+ /** Legacy implementation of guessFileName, based on RFC 2616. */
+ private static String guessFileNameRfc2616(
String url, @Nullable String contentDisposition, @Nullable String mimeType) {
String filename = null;
String extension = null;
// If we couldn't do anything with the hint, move toward the content disposition
if (contentDisposition != null) {
- filename = parseContentDisposition(contentDisposition);
+ filename = parseContentDispositionRfc2616(contentDisposition);
if (filename != null) {
int index = filename.lastIndexOf('/') + 1;
if (index > 0) {
@@ -384,6 +441,128 @@ public final class URLUtil {
return filename + extension;
}
+ /**
+ * Guesses canonical filename that a download would have, using the URL and contentDisposition.
+ * Uses RFC 6266 for parsing the contentDisposition header value.
+ */
+ @NonNull
+ private static String guessFileNameRfc6266(
+ @NonNull String url, @Nullable String contentDisposition, @Nullable String mimeType) {
+ String filename = getFilenameSuggestion(url, contentDisposition);
+ // Split filename between base and extension
+ // Add an extension if filename does not have one
+ String extensionFromMimeType = suggestExtensionFromMimeType(mimeType);
+
+ if (filename.indexOf('.') < 0) {
+ // Filename does not have an extension, use the suggested one.
+ return filename + extensionFromMimeType;
+ }
+
+ // Filename already contains at least one dot.
+ // Compare the last segment of the extension against the mime type.
+ // If there's a mismatch, add the suggested extension instead.
+ if (mimeType != null && extensionDifferentFromMimeType(filename, mimeType)) {
+ return filename + extensionFromMimeType;
+ }
+ return filename;
+ }
+
+ /**
+ * Get the suggested file name from the {@code contentDisposition} or {@code url}. Will ensure
+ * that the filename contains no path separators by replacing them with the {@code "_"}
+ * character.
+ */
+ @NonNull
+ private static String getFilenameSuggestion(String url, @Nullable String contentDisposition) {
+ // First attempt to parse the Content-Disposition header if available
+ if (contentDisposition != null) {
+ String filename = getFilenameFromContentDispositionRfc6266(contentDisposition);
+ if (filename != null) {
+ return replacePathSeparators(filename);
+ }
+ }
+
+ // Try to generate a filename based on the URL.
+ if (url != null) {
+ Uri parsedUri = Uri.parse(url);
+ String lastPathSegment = parsedUri.getLastPathSegment();
+ if (lastPathSegment != null) {
+ return replacePathSeparators(lastPathSegment);
+ }
+ }
+
+ // Finally, if couldn't get filename from URI, get a generic filename.
+ return "downloadfile";
+ }
+
+ /**
+ * Replace all instances of {@code "/"} with {@code "_"} to avoid filenames that navigate the
+ * path.
+ */
+ @NonNull
+ private static String replacePathSeparators(@NonNull String raw) {
+ return raw.replaceAll("/", "_");
+ }
+
+ /**
+ * Check if the {@code filename} has an extension that is different from the expected one based
+ * on the {@code mimeType}.
+ */
+ private static boolean extensionDifferentFromMimeType(
+ @NonNull String filename, @NonNull String mimeType) {
+ int lastDotIndex = filename.lastIndexOf('.');
+ String typeFromExt =
+ MimeTypeMap.getSingleton()
+ .getMimeTypeFromExtension(filename.substring(lastDotIndex + 1));
+ return typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType);
+ }
+
+ /**
+ * Get a candidate file extension (including the {@code .}) for the given mimeType. will return
+ * {@code ".bin"} if {@code mimeType} is {@code null}
+ *
+ * @param mimeType Reported mimetype
+ * @return A file extension, including the {@code .}
+ */
+ @NonNull
+ private static String suggestExtensionFromMimeType(@Nullable String mimeType) {
+ if (mimeType == null) {
+ return ".bin";
+ }
+ String extensionFromMimeType =
+ MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
+ if (extensionFromMimeType != null) {
+ return "." + extensionFromMimeType;
+ }
+ if (mimeType.equalsIgnoreCase("text/html")) {
+ return ".html";
+ } else if (mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
+ return ".txt";
+ } else {
+ return ".bin";
+ }
+ }
+
+ /**
+ * Parse the Content-Disposition HTTP Header.
+ *
+ * <p>Behavior depends on targetSdkVersion.
+ *
+ * <ul>
+ * <li>For targetSDK versions &lt; {@code VANILLA_ICE_CREAM} it will parse based on RFC 2616.
+ * <li>For targetSDK versions &gt;= {@code VANILLA_ICE_CREAM} it will parse based on RFC 6266.
+ * </ul>
+ */
+ @UnsupportedAppUsage
+ static String parseContentDisposition(String contentDisposition) {
+ if (android.os.Flags.androidOsBuildVanillaIceCream()) {
+ if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) {
+ return getFilenameFromContentDispositionRfc6266(contentDisposition);
+ }
+ }
+ return parseContentDispositionRfc2616(contentDisposition);
+ }
+
/** Regex used to parse content-disposition headers */
private static final Pattern CONTENT_DISPOSITION_PATTERN =
Pattern.compile(
@@ -391,15 +570,14 @@ public final class URLUtil {
Pattern.CASE_INSENSITIVE);
/**
- * Parse the Content-Disposition HTTP Header. The format of the header is defined here:
- * http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html This header provides a filename for
- * content that is going to be downloaded to the file system. We only support the attachment
- * type. Note that RFC 2616 specifies the filename value must be double-quoted. Unfortunately
- * some servers do not quote the value so to maintain consistent behaviour with other browsers,
- * we allow unquoted values too.
+ * Parse the Content-Disposition HTTP Header. The format of the header is defined here: <a
+ * href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html">rfc2616 Section 19</a>. This
+ * header provides a filename for content that is going to be downloaded to the file system. We
+ * only support the attachment type. Note that RFC 2616 specifies the filename value must be
+ * double-quoted. Unfortunately some servers do not quote the value so to maintain consistent
+ * behaviour with other browsers, we allow unquoted values too.
*/
- @UnsupportedAppUsage
- static String parseContentDisposition(String contentDisposition) {
+ private static String parseContentDispositionRfc2616(String contentDisposition) {
try {
Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
if (m.find()) {
@@ -410,4 +588,136 @@ public final class URLUtil {
}
return null;
}
+
+ /**
+ * Pattern for parsing individual content disposition key-value pairs.
+ *
+ * <p>The pattern will attempt to parse the value as either single-, double-, or unquoted. For
+ * the single- and double-quoted options, the pattern allows escaped quotes as part of the
+ * value, as per <a href="https://datatracker.ietf.org/doc/html/rfc2616#section-2.2">rfc2616
+ * section-2.2</a>
+ */
+ @SuppressWarnings("RegExpRepeatedSpace") // Spaces are only for readability.
+ private static final Pattern DISPOSITION_PATTERN =
+ Pattern.compile(
+ """
+ \\s*(\\S+?) # Group 1: parameter name
+ \\s*=\\s* # Match equals sign
+ (?: # non-capturing group of options
+ '( (?: [^'\\\\] | \\\\. )* )' # Group 2: single-quoted
+ | "( (?: [^"\\\\] | \\\\. )* )" # Group 3: double-quoted
+ | ( [^'"][^;\\s]* ) # Group 4: un-quoted parameter
+ )\\s*;? # Optional end semicolon""",
+ Pattern.COMMENTS);
+
+ /**
+ * Extract filename from a {@code Content-Disposition} header value.
+ *
+ * <p>This method implements the parsing defined in <a
+ * href="https://datatracker.ietf.org/doc/html/rfc6266">RFC 6266</a>, supporting both the {@code
+ * filename} and {@code filename*} disposition parameters. If the passed header value has the
+ * {@code "inline"} disposition type, this method will return {@code null} to indicate that a
+ * download was not intended.
+ *
+ * <p>If both {@code filename*} and {@code filename} is present, the former will be returned, as
+ * per the RFC. Invalid encoded values will be ignored.
+ *
+ * @param contentDisposition Value of {@code Content-Disposition} header.
+ * @return The filename suggested by the header or {@code null} if no filename could be parsed
+ * from the header value.
+ */
+ @Nullable
+ private static String getFilenameFromContentDispositionRfc6266(
+ @NonNull String contentDisposition) {
+ String[] parts = contentDisposition.trim().split(";", 2);
+ if (parts.length < 2) {
+ // Need at least 2 parts, the `disposition-type` and at least one `disposition-parm`.
+ return null;
+ }
+ String dispositionType = parts[0].trim();
+ if ("inline".equalsIgnoreCase(dispositionType)) {
+ // "inline" should not result in a download.
+ // Unknown disposition types should be handles as "attachment"
+ // https://datatracker.ietf.org/doc/html/rfc6266#section-4.2
+ return null;
+ }
+ String dispositionParameters = parts[1];
+ Matcher matcher = DISPOSITION_PATTERN.matcher(dispositionParameters);
+ String filename = null;
+ String filenameExt = null;
+ while (matcher.find()) {
+ String parameter = matcher.group(1);
+ String value;
+ if (matcher.group(2) != null) {
+ value = removeSlashEscapes(matcher.group(2)); // Value was single-quoted
+ } else if (matcher.group(3) != null) {
+ value = removeSlashEscapes(matcher.group(3)); // Value was double-quoted
+ } else {
+ value = matcher.group(4); // Value was un-quoted
+ }
+
+ if (parameter == null || value == null) {
+ continue;
+ }
+
+ if ("filename*".equalsIgnoreCase(parameter)) {
+ filenameExt = parseExtValueString(value);
+ } else if ("filename".equalsIgnoreCase(parameter)) {
+ filename = value;
+ }
+ }
+
+ // RFC 6266 dictates the filenameExt should be preferred if present.
+ if (filenameExt != null) {
+ return filenameExt;
+ }
+ return filename;
+ }
+
+ /** Replace escapes of the \X form with X. */
+ private static String removeSlashEscapes(String raw) {
+ if (raw == null) {
+ return null;
+ }
+ return raw.replaceAll("\\\\(.)", "$1");
+ }
+
+ /**
+ * Parse an extended value string which can be percent-encoded. Return {@code} null if unable to
+ * parse the string.
+ */
+ private static String parseExtValueString(String raw) {
+ String[] parts = raw.split("'", 3);
+ if (parts.length < 3) {
+ return null;
+ }
+
+ String encoding = parts[0];
+ // Intentionally ignore parts[1] (language).
+ String valueChars = parts[2];
+
+ try {
+ // The URLDecoder force-decodes + as " "
+ // so preemptively replace all values with the encoded value to preserve them.
+ Charset charset = Charset.forName(encoding);
+ String valueWithEncodedPlus = encodePlusCharacters(valueChars, charset);
+ return URLDecoder.decode(valueWithEncodedPlus, charset);
+ } catch (RuntimeException ignored) {
+ return null; // Ignoring an un-parsable value is within spec.
+ }
+ }
+
+ /**
+ * Replace all instances of {@code "+"} with the percent-encoded equivalent for the given {@code
+ * charset}.
+ */
+ @NonNull
+ private static String encodePlusCharacters(@NonNull String valueChars, Charset charset) {
+ StringBuilder sb = new StringBuilder();
+ for (byte b : charset.encode("+").array()) {
+ // Formatting a byte is not possible with TextUtils.formatSimple
+ sb.append(String.format("%02x", b));
+ }
+ return valueChars.replaceAll("\\+", sb.toString());
+ }
}