1 files changed, 321 insertions, 11 deletions
diff --git a/core/java/android/webkit/URLUtil.java b/core/java/android/webkit/URLUtil.java
index 828ec265f4c8..c6271d27cb37 100644
--- a/core/java/android/webkit/URLUtil.java
+++ b/core/java/android/webkit/URLUtil.java
@@ -16,20 +16,40 @@
 
 package android.webkit;
 
+import android.annotation.FlaggedApi;
+import android.annotation.NonNull;
 import android.annotation.Nullable;
+import android.compat.Compatibility;
+import android.compat.annotation.ChangeId;
+import android.compat.annotation.EnabledSince;
 import android.compat.annotation.UnsupportedAppUsage;
 import android.net.ParseException;
 import android.net.Uri;
 import android.net.WebAddress;
+import android.os.Build;
 import android.util.Log;
 
 import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.charset.Charset;
 import java.util.Locale;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 public final class URLUtil {
 
+    /**
+     * This feature enables parsing of Content-Disposition headers that conform to RFC 6266. In
+     * particular, this enables parsing of {@code filename*} values which can use a different
+     * character encoding.
+     *
+     * @hide
+     */
+    @ChangeId
+    @EnabledSince(targetSdkVersion = Build.VERSION_CODES.VANILLA_ICE_CREAM)
+    @FlaggedApi(android.os.Flags.FLAG_ANDROID_OS_BUILD_VANILLA_ICE_CREAM)
+    public static final long PARSE_CONTENT_DISPOSITION_USING_RFC_6266 = 319400769L;
+
     private static final String LOGTAG = "webkit";
     private static final boolean TRACE = false;
 
@@ -293,21 +313,58 @@ public final class URLUtil {
 
     /**
      * Guesses canonical filename that a download would have, using the URL and contentDisposition.
-     * File extension, if not defined, is added based on the mimetype
+     *
+     * <p>File extension, if not defined, is added based on the mimetype.
+     *
+     * <p>The {@code contentDisposition} argument will be treated differently depending on
+     * targetSdkVersion.
+     *
+     * <ul>
+     *   <li>For targetSDK versions &lt; {@code VANILLA_ICE_CREAM} it will be parsed based on RFC
+     *       2616.
+     *   <li>For targetSDK versions &gt;= {@code VANILLA_ICE_CREAM} it will be parsed based on RFC
+     *       6266.
+     * </ul>
+     *
+     * In practice, this means that from {@code VANILLA_ICE_CREAM}, this method will be able to
+     * parse {@code filename*} directives in the {@code contentDisposition} string.
+     *
+     * <p>The function also changed in the following ways in {@code VANILLA_ICE_CREAM}:
+     *
+     * <ul>
+     *   <li>If the suggested file type extension doesn't match the passed {@code mimeType}, the
+     *       method will append the appropriate extension instead of replacing the current
+     *       extension.
+     *   <li>If the suggested file name contains a path separator ({@code "/"}), the method will
+     *       replace this with the underscore character ({@code "_"}) instead of splitting the
+     *       result and only using the last part.
+     * </ul>
      *
      * @param url Url to the content
      * @param contentDisposition Content-Disposition HTTP header or {@code null}
      * @param mimeType Mime-type of the content or {@code null}
      * @return suggested filename
      */
-    public static final String guessFileName(
+    public static String guessFileName(
+            String url, @Nullable String contentDisposition, @Nullable String mimeType) {
+        if (android.os.Flags.androidOsBuildVanillaIceCream()) {
+            if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) {
+                return guessFileNameRfc6266(url, contentDisposition, mimeType);
+            }
+        }
+
+        return guessFileNameRfc2616(url, contentDisposition, mimeType);
+    }
+
+    /** Legacy implementation of guessFileName, based on RFC 2616. */
+    private static String guessFileNameRfc2616(
             String url, @Nullable String contentDisposition, @Nullable String mimeType) {
         String filename = null;
         String extension = null;
 
         // If we couldn't do anything with the hint, move toward the content disposition
         if (contentDisposition != null) {
-            filename = parseContentDisposition(contentDisposition);
+            filename = parseContentDispositionRfc2616(contentDisposition);
             if (filename != null) {
                 int index = filename.lastIndexOf('/') + 1;
                 if (index > 0) {
@@ -384,6 +441,128 @@ public final class URLUtil {
         return filename + extension;
     }
 
+    /**
+     * Guesses canonical filename that a download would have, using the URL and contentDisposition.
+     * Uses RFC 6266 for parsing the contentDisposition header value.
+     */
+    @NonNull
+    private static String guessFileNameRfc6266(
+            @NonNull String url, @Nullable String contentDisposition, @Nullable String mimeType) {
+        String filename = getFilenameSuggestion(url, contentDisposition);
+        // Split filename between base and extension
+        // Add an extension if filename does not have one
+        String extensionFromMimeType = suggestExtensionFromMimeType(mimeType);
+
+        if (filename.indexOf('.') < 0) {
+            // Filename does not have an extension, use the suggested one.
+            return filename + extensionFromMimeType;
+        }
+
+        // Filename already contains at least one dot.
+        // Compare the last segment of the extension against the mime type.
+        // If there's a mismatch, add the suggested extension instead.
+        if (mimeType != null && extensionDifferentFromMimeType(filename, mimeType)) {
+            return filename + extensionFromMimeType;
+        }
+        return filename;
+    }
+
+    /**
+     * Get the suggested file name from the {@code contentDisposition} or {@code url}. Will ensure
+     * that the filename contains no path separators by replacing them with the {@code "_"}
+     * character.
+     */
+    @NonNull
+    private static String getFilenameSuggestion(String url, @Nullable String contentDisposition) {
+        // First attempt to parse the Content-Disposition header if available
+        if (contentDisposition != null) {
+            String filename = getFilenameFromContentDispositionRfc6266(contentDisposition);
+            if (filename != null) {
+                return replacePathSeparators(filename);
+            }
+        }
+
+        // Try to generate a filename based on the URL.
+        if (url != null) {
+            Uri parsedUri = Uri.parse(url);
+            String lastPathSegment = parsedUri.getLastPathSegment();
+            if (lastPathSegment != null) {
+                return replacePathSeparators(lastPathSegment);
+            }
+        }
+
+        // Finally, if couldn't get filename from URI, get a generic filename.
+        return "downloadfile";
+    }
+
+    /**
+     * Replace all instances of {@code "/"} with {@code "_"} to avoid filenames that navigate the
+     * path.
+     */
+    @NonNull
+    private static String replacePathSeparators(@NonNull String raw) {
+        return raw.replaceAll("/", "_");
+    }
+
+    /**
+     * Check if the {@code filename} has an extension that is different from the expected one based
+     * on the {@code mimeType}.
+     */
+    private static boolean extensionDifferentFromMimeType(
+            @NonNull String filename, @NonNull String mimeType) {
+        int lastDotIndex = filename.lastIndexOf('.');
+        String typeFromExt =
+                MimeTypeMap.getSingleton()
+                        .getMimeTypeFromExtension(filename.substring(lastDotIndex + 1));
+        return typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType);
+    }
+
+    /**
+     * Get a candidate file extension (including the {@code .}) for the given mimeType. will return
+     * {@code ".bin"} if {@code mimeType} is {@code null}
+     *
+     * @param mimeType Reported mimetype
+     * @return A file extension, including the {@code .}
+     */
+    @NonNull
+    private static String suggestExtensionFromMimeType(@Nullable String mimeType) {
+        if (mimeType == null) {
+            return ".bin";
+        }
+        String extensionFromMimeType =
+                MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
+        if (extensionFromMimeType != null) {
+            return "." + extensionFromMimeType;
+        }
+        if (mimeType.equalsIgnoreCase("text/html")) {
+            return ".html";
+        } else if (mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
+            return ".txt";
+        } else {
+            return ".bin";
+        }
+    }
+
+    /**
+     * Parse the Content-Disposition HTTP Header.
+     *
+     * <p>Behavior depends on targetSdkVersion.
+     *
+     * <ul>
+     *   <li>For targetSDK versions &lt; {@code VANILLA_ICE_CREAM} it will parse based on RFC 2616.
+     *   <li>For targetSDK versions &gt;= {@code VANILLA_ICE_CREAM} it will parse based on RFC 6266.
+     * </ul>
+     */
+    @UnsupportedAppUsage
+    static String parseContentDisposition(String contentDisposition) {
+        if (android.os.Flags.androidOsBuildVanillaIceCream()) {
+            if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) {
+                return getFilenameFromContentDispositionRfc6266(contentDisposition);
+            }
+        }
+        return parseContentDispositionRfc2616(contentDisposition);
+    }
+
     /** Regex used to parse content-disposition headers */
     private static final Pattern CONTENT_DISPOSITION_PATTERN =
             Pattern.compile(
@@ -391,15 +570,14 @@ public final class URLUtil {
                     Pattern.CASE_INSENSITIVE);
 
     /**
-     * Parse the Content-Disposition HTTP Header. The format of the header is defined here:
-     * http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html This header provides a filename for
-     * content that is going to be downloaded to the file system. We only support the attachment
-     * type. Note that RFC 2616 specifies the filename value must be double-quoted. Unfortunately
-     * some servers do not quote the value so to maintain consistent behaviour with other browsers,
-     * we allow unquoted values too.
+     * Parse the Content-Disposition HTTP Header. The format of the header is defined here: <a
+     * href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html">rfc2616 Section 19</a>. This
+     * header provides a filename for content that is going to be downloaded to the file system. We
+     * only support the attachment type. Note that RFC 2616 specifies the filename value must be
+     * double-quoted. Unfortunately some servers do not quote the value so to maintain consistent
+     * behaviour with other browsers, we allow unquoted values too.
      */
-    @UnsupportedAppUsage
-    static String parseContentDisposition(String contentDisposition) {
+    private static String parseContentDispositionRfc2616(String contentDisposition) {
         try {
             Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
             if (m.find()) {
@@ -410,4 +588,136 @@ public final class URLUtil {
         }
         return null;
     }
+
+    /**
+     * Pattern for parsing individual content disposition key-value pairs.
+     *
+     * <p>The pattern will attempt to parse the value as either single-, double-, or unquoted. For
+     * the single- and double-quoted options, the pattern allows escaped quotes as part of the
+     * value, as per <a href="https://datatracker.ietf.org/doc/html/rfc2616#section-2.2">rfc2616
+     * section-2.2</a>
+     */
+    @SuppressWarnings("RegExpRepeatedSpace") // Spaces are only for readability.
+    private static final Pattern DISPOSITION_PATTERN =
+            Pattern.compile(
+                    """
+                            \\s*(\\S+?) # Group 1: parameter name
+                            \\s*=\\s* # Match equals sign
+                            (?: # non-capturing group of options
+                               '( (?: [^'\\\\] | \\\\. )* )' # Group 2: single-quoted
+                             | "( (?: [^"\\\\] | \\\\. )*  )" # Group 3: double-quoted
+                             | ( [^'"][^;\\s]* ) # Group 4: un-quoted parameter
+                            )\\s*;? # Optional end semicolon""",
+                    Pattern.COMMENTS);
+
+    /**
+     * Extract filename from a {@code Content-Disposition} header value.
+     *
+     * <p>This method implements the parsing defined in <a
+     * href="https://datatracker.ietf.org/doc/html/rfc6266">RFC 6266</a>, supporting both the {@code
+     * filename} and {@code filename*} disposition parameters. If the passed header value has the
+     * {@code "inline"} disposition type, this method will return {@code null} to indicate that a
+     * download was not intended.
+     *
+     * <p>If both {@code filename*} and {@code filename} is present, the former will be returned, as
+     * per the RFC. Invalid encoded values will be ignored.
+     *
+     * @param contentDisposition Value of {@code Content-Disposition} header.
+     * @return The filename suggested by the header or {@code null} if no filename could be parsed
+     *     from the header value.
+     */
+    @Nullable
+    private static String getFilenameFromContentDispositionRfc6266(
+            @NonNull String contentDisposition) {
+        String[] parts = contentDisposition.trim().split(";", 2);
+        if (parts.length < 2) {
+            // Need at least 2 parts, the `disposition-type` and at least one `disposition-parm`.
+            return null;
+        }
+        String dispositionType = parts[0].trim();
+        if ("inline".equalsIgnoreCase(dispositionType)) {
+            // "inline" should not result in a download.
+            // Unknown disposition types should be handles as "attachment"
+            // https://datatracker.ietf.org/doc/html/rfc6266#section-4.2
+            return null;
+        }
+        String dispositionParameters = parts[1];
+        Matcher matcher = DISPOSITION_PATTERN.matcher(dispositionParameters);
+        String filename = null;
+        String filenameExt = null;
+        while (matcher.find()) {
+            String parameter = matcher.group(1);
+            String value;
+            if (matcher.group(2) != null) {
+                value = removeSlashEscapes(matcher.group(2)); // Value was single-quoted
+            } else if (matcher.group(3) != null) {
+                value = removeSlashEscapes(matcher.group(3)); // Value was double-quoted
+            } else {
+                value = matcher.group(4); // Value was un-quoted
+            }
+
+            if (parameter == null || value == null) {
+                continue;
+            }
+
+            if ("filename*".equalsIgnoreCase(parameter)) {
+                filenameExt = parseExtValueString(value);
+            } else if ("filename".equalsIgnoreCase(parameter)) {
+                filename = value;
+            }
+        }
+
+        // RFC 6266 dictates the filenameExt should be preferred if present.
+        if (filenameExt != null) {
+            return filenameExt;
+        }
+        return filename;
+    }
+
+    /** Replace escapes of the \X form with X. */
+    private static String removeSlashEscapes(String raw) {
+        if (raw == null) {
+            return null;
+        }
+        return raw.replaceAll("\\\\(.)", "$1");
+    }
+
+    /**
+     * Parse an extended value string which can be percent-encoded. Return {@code} null if unable to
+     * parse the string.
+     */
+    private static String parseExtValueString(String raw) {
+        String[] parts = raw.split("'", 3);
+        if (parts.length < 3) {
+            return null;
+        }
+
+        String encoding = parts[0];
+        // Intentionally ignore parts[1] (language).
+        String valueChars = parts[2];
+
+        try {
+            // The URLDecoder force-decodes + as " "
+            // so preemptively replace all values with the encoded value to preserve them.
+            Charset charset = Charset.forName(encoding);
+            String valueWithEncodedPlus = encodePlusCharacters(valueChars, charset);
+            return URLDecoder.decode(valueWithEncodedPlus, charset);
+        } catch (RuntimeException ignored) {
+            return null; // Ignoring an un-parsable value is within spec.
+        }
+    }
+
+    /**
+     * Replace all instances of {@code "+"} with the percent-encoded equivalent for the given {@code
+     * charset}.
+     */
+    @NonNull
+    private static String encodePlusCharacters(@NonNull String valueChars, Charset charset) {
+        StringBuilder sb = new StringBuilder();
+        for (byte b : charset.encode("+").array()) {
+            // Formatting a byte is not possible with TextUtils.formatSimple
+            sb.append(String.format("%02x", b));
+        }
+        return valueChars.replaceAll("\\+", sb.toString());
+    }
 }