Add a parallel_run.py tool

Sometimes when investigating a flake it is useful to add logging and
repeatedly run a command until the issue appears with the logging.
This can be tedious if the flake is rare. To make it easier I added a
simple parallel_run.py tool that can run a command in parallel
repeatedly until it fails and save the stdout from the failed run.

Usage:

% # Write a script that exits with 0 on failure and writes all output
% # to stdout.
% cat tst.sh
\#!/bin/bash
! (head -c32 /dev/urandom | hd | grep -E '\|.*a')

% # Run the script in parallel
% ./art/tools/parallel_run.py -j80 --out out.txt ./tst.sh
Temporary files in /tmp/tmp5013_ey7
1 runs
<snip>
31 runs

% # Examine output
% cat out.txt
00000000  6d e3 ac bf 1a 21 93 17  61 00 2a e0 8b f5 41 3b  |m....!..a.*...A;|

Test: ./art/tools/parallel_run.py -j70 --out out.txt ./tst.sh
Bug: 72608560
Change-Id: I83877ae1ccde5e843da20c5bd5a4f24e2e07fb19
diff --git a/tools/parallel_run.py b/tools/parallel_run.py
new file mode 100755
index 0000000..2d276fc
--- /dev/null
+++ b/tools/parallel_run.py
@@ -0,0 +1,81 @@
+#!/usr/bin/python3
+#
+# Copyright 2019, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Run a command using multiple cores in parallel. Stop when one exits zero and save the log from
+that run.
+"""
+
+import argparse
+import concurrent.futures
+import contextlib
+import itertools
+import os
+import os.path
+import shutil
+import subprocess
+import tempfile
+
+
+def run_one(cmd, tmpfile):
+  """Run the command and log result to tmpfile. Return both the file name and returncode."""
+  with open(tmpfile, "x") as fd:
+    return tmpfile, subprocess.run(cmd, stdout=fd).returncode
+
+@contextlib.contextmanager
+def nowait(ppe):
+  """Run a ProcessPoolExecutor and shutdown without waiting."""
+  try:
+    yield ppe
+  finally:
+    ppe.shutdown(False)
+
+def main():
+  parser = argparse.ArgumentParser(
+      description="Run a command using multiple cores and save non-zero exit log"
+  )
+  parser.add_argument("--jobs", "-j", type=int, help="max number of jobs. default 60", default=60)
+  parser.add_argument("cmd", help="command to run")
+  parser.add_argument("--out", type=str, help="where to put result", default="out_log")
+  args = parser.parse_args()
+  cnt = 0
+  ids = itertools.count(0)
+  with tempfile.TemporaryDirectory() as td:
+    print("Temporary files in {}".format(td))
+    with nowait(concurrent.futures.ProcessPoolExecutor(args.jobs)) as p:
+      fs = set()
+      while True:
+        for _, idx in zip(range(args.jobs - len(fs)), ids):
+          fs.add(p.submit(run_one, args.cmd, os.path.join(td, "run_log." + str(idx))))
+        ws = concurrent.futures.wait(fs, return_when=concurrent.futures.FIRST_COMPLETED)
+        fs = ws.not_done
+        done = list(map(lambda a: a.result(), ws.done))
+        cnt += len(done)
+        print("{} runs".format(cnt))
+        failed = [d for d,r in done if r != 0]
+        succ = [d for d,r in done if r == 0]
+        for f in succ:
+          os.remove(f)
+        if len(failed) != 0:
+          if len(failed) != 1:
+            for f,i in zip(failed, range(len(failed))):
+              shutil.copyfile(f, args.out+"."+str(i))
+          else:
+            shutil.copyfile(failed[0], args.out)
+          break
+
+if __name__ == '__main__':
+  main()