diff options
author | 2019-04-16 11:18:45 -0700 | |
---|---|---|
committer | 2019-04-16 23:46:19 +0000 | |
commit | 4d8d83f9686b3cf84fc7055516adb22645ab9f6a (patch) | |
tree | 2302c1ca3372f3e94328ab084919901d535506f5 | |
parent | ef04ac6c05fa344428008ffa1eac7316c64a3467 (diff) |
Add a parallel_run.py tool
Sometimes when investigating a flake it is useful to add logging and
repeatedly run a command until the issue appears with the logging.
This can be tedious if the flake is rare. To make it easier I added a
simple parallel_run.py tool that can run a command in parallel
repeatedly until it fails and save the stdout from the failed run.
Usage:
% # Write a script that exits with 0 on failure and writes all output
% # to stdout.
% cat tst.sh
\#!/bin/bash
! (head -c32 /dev/urandom | hd | grep -E '\|.*a')
% # Run the script in parallel
% ./art/tools/parallel_run.py -j80 --out out.txt ./tst.sh
Temporary files in /tmp/tmp5013_ey7
1 runs
<snip>
31 runs
% # Examine output
% cat out.txt
00000000 6d e3 ac bf 1a 21 93 17 61 00 2a e0 8b f5 41 3b |m....!..a.*...A;|
Test: ./art/tools/parallel_run.py -j70 --out out.txt ./tst.sh
Bug: 72608560
Change-Id: I83877ae1ccde5e843da20c5bd5a4f24e2e07fb19
-rwxr-xr-x | tools/parallel_run.py | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/tools/parallel_run.py b/tools/parallel_run.py new file mode 100755 index 0000000000..2d276fc443 --- /dev/null +++ b/tools/parallel_run.py @@ -0,0 +1,81 @@ +#!/usr/bin/python3 +# +# Copyright 2019, The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Run a command using multiple cores in parallel. Stop when one exits zero and save the log from +that run. +""" + +import argparse +import concurrent.futures +import contextlib +import itertools +import os +import os.path +import shutil +import subprocess +import tempfile + + +def run_one(cmd, tmpfile): + """Run the command and log result to tmpfile. Return both the file name and returncode.""" + with open(tmpfile, "x") as fd: + return tmpfile, subprocess.run(cmd, stdout=fd).returncode + +@contextlib.contextmanager +def nowait(ppe): + """Run a ProcessPoolExecutor and shutdown without waiting.""" + try: + yield ppe + finally: + ppe.shutdown(False) + +def main(): + parser = argparse.ArgumentParser( + description="Run a command using multiple cores and save non-zero exit log" + ) + parser.add_argument("--jobs", "-j", type=int, help="max number of jobs. default 60", default=60) + parser.add_argument("cmd", help="command to run") + parser.add_argument("--out", type=str, help="where to put result", default="out_log") + args = parser.parse_args() + cnt = 0 + ids = itertools.count(0) + with tempfile.TemporaryDirectory() as td: + print("Temporary files in {}".format(td)) + with nowait(concurrent.futures.ProcessPoolExecutor(args.jobs)) as p: + fs = set() + while True: + for _, idx in zip(range(args.jobs - len(fs)), ids): + fs.add(p.submit(run_one, args.cmd, os.path.join(td, "run_log." + str(idx)))) + ws = concurrent.futures.wait(fs, return_when=concurrent.futures.FIRST_COMPLETED) + fs = ws.not_done + done = list(map(lambda a: a.result(), ws.done)) + cnt += len(done) + print("{} runs".format(cnt)) + failed = [d for d,r in done if r != 0] + succ = [d for d,r in done if r == 0] + for f in succ: + os.remove(f) + if len(failed) != 0: + if len(failed) != 1: + for f,i in zip(failed, range(len(failed))): + shutil.copyfile(f, args.out+"."+str(i)) + else: + shutil.copyfile(failed[0], args.out) + break + +if __name__ == '__main__': + main() |