Add a parallel_run.py tool Sometimes when investigating a flake it is useful to add logging and repeatedly run a command until the issue appears with the logging. This can be tedious if the flake is rare. To make it easier I added a simple parallel_run.py tool that can run a command in parallel repeatedly until it fails and save the stdout from the failed run. Usage: % # Write a script that exits with 0 on failure and writes all output % # to stdout. % cat tst.sh \#!/bin/bash ! (head -c32 /dev/urandom | hd | grep -E '\|.*a') % # Run the script in parallel % ./art/tools/parallel_run.py -j80 --out out.txt ./tst.sh Temporary files in /tmp/tmp5013_ey7 1 runs <snip> 31 runs % # Examine output % cat out.txt 00000000 6d e3 ac bf 1a 21 93 17 61 00 2a e0 8b f5 41 3b |m....!..a.*...A;| Test: ./art/tools/parallel_run.py -j70 --out out.txt ./tst.sh Bug: 72608560 Change-Id: I83877ae1ccde5e843da20c5bd5a4f24e2e07fb19

commit: 4d8d83f9686b3cf84fc7055516adb22645ab9f6a [log] [tgz]
author: Alex Light <allight@google.com> Tue Apr 16 11:18:45 2019 -0700
committer: Treehugger Robot <treehugger-gerrit@google.com> Tue Apr 16 23:46:19 2019 +0000
tree: 2302c1ca3372f3e94328ab084919901d535506f5
parent: ef04ac6c05fa344428008ffa1eac7316c64a3467 [diff] [blame]
diff --git a/tools/parallel_run.py b/tools/parallel_run.py
new file mode 100755
index 0000000..2d276fc
--- /dev/null
+++ b/tools/parallel_run.py

@@ -0,0 +1,81 @@
+#!/usr/bin/python3
+#
+# Copyright 2019, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Run a command using multiple cores in parallel. Stop when one exits zero and save the log from
+that run.
+"""
+
+import argparse
+import concurrent.futures
+import contextlib
+import itertools
+import os
+import os.path
+import shutil
+import subprocess
+import tempfile
+
+
+def run_one(cmd, tmpfile):
+  """Run the command and log result to tmpfile. Return both the file name and returncode."""
+  with open(tmpfile, "x") as fd:
+    return tmpfile, subprocess.run(cmd, stdout=fd).returncode
+
+@contextlib.contextmanager
+def nowait(ppe):
+  """Run a ProcessPoolExecutor and shutdown without waiting."""
+  try:
+    yield ppe
+  finally:
+    ppe.shutdown(False)
+
+def main():
+  parser = argparse.ArgumentParser(
+      description="Run a command using multiple cores and save non-zero exit log"
+  )
+  parser.add_argument("--jobs", "-j", type=int, help="max number of jobs. default 60", default=60)
+  parser.add_argument("cmd", help="command to run")
+  parser.add_argument("--out", type=str, help="where to put result", default="out_log")
+  args = parser.parse_args()
+  cnt = 0
+  ids = itertools.count(0)
+  with tempfile.TemporaryDirectory() as td:
+    print("Temporary files in {}".format(td))
+    with nowait(concurrent.futures.ProcessPoolExecutor(args.jobs)) as p:
+      fs = set()
+      while True:
+        for _, idx in zip(range(args.jobs - len(fs)), ids):
+          fs.add(p.submit(run_one, args.cmd, os.path.join(td, "run_log." + str(idx))))
+        ws = concurrent.futures.wait(fs, return_when=concurrent.futures.FIRST_COMPLETED)
+        fs = ws.not_done
+        done = list(map(lambda a: a.result(), ws.done))
+        cnt += len(done)
+        print("{} runs".format(cnt))
+        failed = [d for d,r in done if r != 0]
+        succ = [d for d,r in done if r == 0]
+        for f in succ:
+          os.remove(f)
+        if len(failed) != 0:
+          if len(failed) != 1:
+            for f,i in zip(failed, range(len(failed))):
+              shutil.copyfile(f, args.out+"."+str(i))
+          else:
+            shutil.copyfile(failed[0], args.out)
+          break
+
+if __name__ == '__main__':
+  main()
commit	4d8d83f9686b3cf84fc7055516adb22645ab9f6a	[log] [tgz]
author	Alex Light <allight@google.com>	Tue Apr 16 11:18:45 2019 -0700
committer	Treehugger Robot <treehugger-gerrit@google.com>	Tue Apr 16 23:46:19 2019 +0000
tree	2302c1ca3372f3e94328ab084919901d535506f5
parent	ef04ac6c05fa344428008ffa1eac7316c64a3467 [diff] [blame]