Macrobenchmark tweaks & gpu memory dumping

Add support for glob matching
Ensure glob matches are alphabetically sorted
Add feature to dump GPU memory usage after a test pass
Adjust gpu memory dump to be a bit more compact (skip empty outputs)

Test: this
Bug: 187718492
Change-Id: I6dc80b2d3379d8d10001116e1240727d9914bc10
diff --git a/libs/hwui/pipeline/skia/SkiaMemoryTracer.cpp b/libs/hwui/pipeline/skia/SkiaMemoryTracer.cpp
index e48ecf4..0b995bc 100644
--- a/libs/hwui/pipeline/skia/SkiaMemoryTracer.cpp
+++ b/libs/hwui/pipeline/skia/SkiaMemoryTracer.cpp
@@ -126,6 +126,12 @@
     mCurrentValues.insert({valueName, {units, value}});
 }
 
+bool SkiaMemoryTracer::hasOutput() {
+    // process any remaining elements
+    processElement();
+    return mResults.size() > 0;
+}
+
 void SkiaMemoryTracer::logOutput(String8& log) {
     // process any remaining elements
     processElement();
diff --git a/libs/hwui/pipeline/skia/SkiaMemoryTracer.h b/libs/hwui/pipeline/skia/SkiaMemoryTracer.h
index e9a7981..b393b075 100644
--- a/libs/hwui/pipeline/skia/SkiaMemoryTracer.h
+++ b/libs/hwui/pipeline/skia/SkiaMemoryTracer.h
@@ -34,6 +34,7 @@
     SkiaMemoryTracer(const char* categoryKey, bool itemizeType);
     ~SkiaMemoryTracer() override {}
 
+    bool hasOutput();
     void logOutput(String8& log);
     void logTotals(String8& log);
 
diff --git a/libs/hwui/renderthread/CacheManager.cpp b/libs/hwui/renderthread/CacheManager.cpp
index d998e50..5047be9 100644
--- a/libs/hwui/renderthread/CacheManager.cpp
+++ b/libs/hwui/renderthread/CacheManager.cpp
@@ -140,7 +140,6 @@
     log.appendFormat("  Size: %.2f kB \n", SkGraphics::GetFontCacheUsed() / 1024.0f);
     log.appendFormat("  Glyph Count: %d \n", SkGraphics::GetFontCacheCountUsed());
 
-    log.appendFormat("CPU Caches:\n");
     std::vector<skiapipeline::ResourcePair> cpuResourceMap = {
             {"skia/sk_resource_cache/bitmap_", "Bitmaps"},
             {"skia/sk_resource_cache/rrect-blur_", "Masks"},
@@ -149,20 +148,20 @@
     };
     skiapipeline::SkiaMemoryTracer cpuTracer(cpuResourceMap, false);
     SkGraphics::DumpMemoryStatistics(&cpuTracer);
-    cpuTracer.logOutput(log);
+    if (cpuTracer.hasOutput()) {
+        log.appendFormat("CPU Caches:\n");
+        cpuTracer.logOutput(log);
+    }
 
-    log.appendFormat("GPU Caches:\n");
     skiapipeline::SkiaMemoryTracer gpuTracer("category", true);
     mGrContext->dumpMemoryStatistics(&gpuTracer);
-    gpuTracer.logOutput(log);
+    if (gpuTracer.hasOutput()) {
+        log.appendFormat("GPU Caches:\n");
+        gpuTracer.logOutput(log);
+    }
 
-    log.appendFormat("Other Caches:\n");
-    log.appendFormat("                         Current / Maximum\n");
-
-    if (renderState) {
-        if (renderState->mActiveLayers.size() > 0) {
-            log.appendFormat("  Layer Info:\n");
-        }
+    if (renderState && renderState->mActiveLayers.size() > 0) {
+        log.appendFormat("Layer Info:\n");
 
         const char* layerType = Properties::getRenderPipelineType() == RenderPipelineType::SkiaGL
                                         ? "GlLayer"
diff --git a/libs/hwui/renderthread/RenderProxy.cpp b/libs/hwui/renderthread/RenderProxy.cpp
index 1b4b4b9..ad325cf 100644
--- a/libs/hwui/renderthread/RenderProxy.cpp
+++ b/libs/hwui/renderthread/RenderProxy.cpp
@@ -249,10 +249,10 @@
     });
 }
 
-void RenderProxy::dumpGraphicsMemory(int fd) {
+void RenderProxy::dumpGraphicsMemory(int fd, bool includeProfileData) {
     if (RenderThread::hasInstance()) {
         auto& thread = RenderThread::getInstance();
-        thread.queue().runSync([&]() { thread.dumpGraphicsMemory(fd); });
+        thread.queue().runSync([&]() { thread.dumpGraphicsMemory(fd, includeProfileData); });
     }
 }
 
diff --git a/libs/hwui/renderthread/RenderProxy.h b/libs/hwui/renderthread/RenderProxy.h
index 288f555..662b445 100644
--- a/libs/hwui/renderthread/RenderProxy.h
+++ b/libs/hwui/renderthread/RenderProxy.h
@@ -109,7 +109,7 @@
     // Not exported, only used for testing
     void resetProfileInfo();
     uint32_t frameTimePercentile(int p);
-    static void dumpGraphicsMemory(int fd);
+    static void dumpGraphicsMemory(int fd, bool includeProfileData = true);
 
     static void rotateProcessStatsBuffer();
     static void setProcessStatsBuffer(int fd);
diff --git a/libs/hwui/renderthread/RenderThread.cpp b/libs/hwui/renderthread/RenderThread.cpp
index 3421e01..308352d 100644
--- a/libs/hwui/renderthread/RenderThread.cpp
+++ b/libs/hwui/renderthread/RenderThread.cpp
@@ -302,30 +302,25 @@
     return *mVkManager.get();
 }
 
-void RenderThread::dumpGraphicsMemory(int fd) {
-    globalProfileData()->dump(fd);
-
-    String8 cachesOutput;
-    String8 pipeline;
-    auto renderType = Properties::getRenderPipelineType();
-    switch (renderType) {
-        case RenderPipelineType::SkiaGL: {
-            mCacheManager->dumpMemoryUsage(cachesOutput, mRenderState);
-            pipeline.appendFormat("Skia (OpenGL)");
-            break;
-        }
-        case RenderPipelineType::SkiaVulkan: {
-            mCacheManager->dumpMemoryUsage(cachesOutput, mRenderState);
-            pipeline.appendFormat("Skia (Vulkan)");
-            break;
-        }
+static const char* pipelineToString() {
+    switch (auto renderType = Properties::getRenderPipelineType()) {
+        case RenderPipelineType::SkiaGL:
+            return "Skia (OpenGL)";
+        case RenderPipelineType::SkiaVulkan:
+            return "Skia (Vulkan)";
         default:
             LOG_ALWAYS_FATAL("canvas context type %d not supported", (int32_t)renderType);
-            break;
+    }
+}
+
+void RenderThread::dumpGraphicsMemory(int fd, bool includeProfileData) {
+    if (includeProfileData) {
+        globalProfileData()->dump(fd);
     }
 
-    dprintf(fd, "\n%s\n", cachesOutput.string());
-    dprintf(fd, "\nPipeline=%s\n", pipeline.string());
+    String8 cachesOutput;
+    mCacheManager->dumpMemoryUsage(cachesOutput, mRenderState);
+    dprintf(fd, "\nPipeline=%s\n%s\n", pipelineToString(), cachesOutput.string());
 }
 
 Readback& RenderThread::readback() {
diff --git a/libs/hwui/renderthread/RenderThread.h b/libs/hwui/renderthread/RenderThread.h
index cd9b923..afd5750 100644
--- a/libs/hwui/renderthread/RenderThread.h
+++ b/libs/hwui/renderthread/RenderThread.h
@@ -150,7 +150,7 @@
     VulkanManager& vulkanManager();
 
     sk_sp<Bitmap> allocateHardwareBitmap(SkBitmap& skBitmap);
-    void dumpGraphicsMemory(int fd);
+    void dumpGraphicsMemory(int fd, bool includeProfileData);
 
     void requireGlContext();
     void requireVkContext();
diff --git a/libs/hwui/tests/common/TestScene.cpp b/libs/hwui/tests/common/TestScene.cpp
index 02bcd47..2c532b0 100644
--- a/libs/hwui/tests/common/TestScene.cpp
+++ b/libs/hwui/tests/common/TestScene.cpp
@@ -22,8 +22,9 @@
 
 // Not a static global because we need to force the map to be constructed
 // before we try to add things to it.
-std::unordered_map<std::string, TestScene::Info>& TestScene::testMap() {
-    static std::unordered_map<std::string, TestScene::Info> testMap;
+// std::map because tests sorted by name is a prettier output
+std::map<std::string, TestScene::Info>& TestScene::testMap() {
+    static std::map<std::string, TestScene::Info> testMap;
     return testMap;
 }
 
diff --git a/libs/hwui/tests/common/TestScene.h b/libs/hwui/tests/common/TestScene.h
index eaf5988..781884a 100644
--- a/libs/hwui/tests/common/TestScene.h
+++ b/libs/hwui/tests/common/TestScene.h
@@ -19,8 +19,8 @@
 #include <gui/Surface.h>
 #include <utils/StrongPointer.h>
 
+#include <map>
 #include <string>
-#include <unordered_map>
 
 namespace android {
 
@@ -39,6 +39,7 @@
         int repeatCount = 1;
         int reportFrametimeWeight = 0;
         bool renderOffscreen = true;
+        bool reportGpuMemoryUsage = false;
     };
 
     template <class T>
@@ -68,7 +69,7 @@
     virtual void createContent(int width, int height, Canvas& renderer) = 0;
     virtual void doFrame(int frameNr) = 0;
 
-    static std::unordered_map<std::string, Info>& testMap();
+    static std::map<std::string, Info>& testMap();
     static void registerScene(const Info& info);
 
     sp<Surface> renderTarget;
diff --git a/libs/hwui/tests/macrobench/TestSceneRunner.cpp b/libs/hwui/tests/macrobench/TestSceneRunner.cpp
index cf9b0c5..9d3b732 100644
--- a/libs/hwui/tests/macrobench/TestSceneRunner.cpp
+++ b/libs/hwui/tests/macrobench/TestSceneRunner.cpp
@@ -162,6 +162,11 @@
 
 void run(const TestScene::Info& info, const TestScene::Options& opts,
          benchmark::BenchmarkReporter* reporter) {
+    if (opts.reportGpuMemoryUsage) {
+        // If we're reporting GPU memory usage we need to first start with a clean slate
+        // All repetitions of the same test will share a single memory usage report
+        RenderProxy::trimMemory(100);
+    }
     BenchmarkResults results;
     for (int i = 0; i < opts.repeatCount; i++) {
         doRun(info, opts, i, reporter ? &results : nullptr);
@@ -172,4 +177,7 @@
             // TODO: Report summary
         }
     }
+    if (opts.reportGpuMemoryUsage) {
+        RenderProxy::dumpGraphicsMemory(STDOUT_FILENO, false);
+    }
 }
diff --git a/libs/hwui/tests/macrobench/main.cpp b/libs/hwui/tests/macrobench/main.cpp
index acbbb95..e9e962a 100644
--- a/libs/hwui/tests/macrobench/main.cpp
+++ b/libs/hwui/tests/macrobench/main.cpp
@@ -23,6 +23,7 @@
 #include "renderthread/RenderProxy.h"
 
 #include <benchmark/benchmark.h>
+#include <fnmatch.h>
 #include <getopt.h>
 #include <pthread.h>
 #include <stdio.h>
@@ -70,6 +71,7 @@
   --benchmark_format   Set output format. Possible values are tabular, json, csv
   --renderer=TYPE      Sets the render pipeline to use. May be skiagl or skiavk
   --skip-leak-check    Skips the memory leak check
+  --report-gpu-memory  Dumps the GPU memory usage after each test run
 )");
 }
 
@@ -172,6 +174,7 @@
     Offscreen,
     Renderer,
     SkipLeakCheck,
+    ReportGpuMemory,
 };
 }
 
@@ -188,6 +191,7 @@
         {"offscreen", no_argument, nullptr, LongOpts::Offscreen},
         {"renderer", required_argument, nullptr, LongOpts::Renderer},
         {"skip-leak-check", no_argument, nullptr, LongOpts::SkipLeakCheck},
+        {"report-gpu-memory", no_argument, nullptr, LongOpts::ReportGpuMemory},
         {0, 0, 0, 0}};
 
 static const char* SHORT_OPTIONS = "c:r:h";
@@ -290,6 +294,10 @@
                 gRunLeakCheck = false;
                 break;
 
+            case LongOpts::ReportGpuMemory:
+                gOpts.reportGpuMemoryUsage = true;
+                break;
+
             case 'h':
                 printHelp();
                 exit(EXIT_SUCCESS);
@@ -313,12 +321,21 @@
     if (optind < argc) {
         do {
             const char* test = argv[optind++];
-            auto pos = TestScene::testMap().find(test);
-            if (pos == TestScene::testMap().end()) {
-                fprintf(stderr, "Unknown test '%s'\n", test);
-                exit(EXIT_FAILURE);
+            if (strchr(test, '*')) {
+                // Glob match
+                for (auto& iter : TestScene::testMap()) {
+                    if (!fnmatch(test, iter.first.c_str(), 0)) {
+                        gRunTests.push_back(iter.second);
+                    }
+                }
             } else {
-                gRunTests.push_back(pos->second);
+                auto pos = TestScene::testMap().find(test);
+                if (pos == TestScene::testMap().end()) {
+                    fprintf(stderr, "Unknown test '%s'\n", test);
+                    exit(EXIT_FAILURE);
+                } else {
+                    gRunTests.push_back(pos->second);
+                }
             }
         } while (optind < argc);
     } else {