summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Zhuoyao Zhang <zhuoyao@google.com> 2024-12-06 19:17:46 +0000
committer Zhuoyao Zhang <zhuoyao@google.com> 2024-12-06 22:53:35 +0000
commitec657cedbb84e26540a0e062915bc53b6cb0caaf (patch)
treede7e0fabf03c7c1f8b95d56c272a458148d258e0
parent96c15d81dd4b4f59e4e982733801ee95abc9d224 (diff)
Synchronize the setup process of edit monitor
This cl ensures only one edit monitor instance is in the setup proecess by requesting exclusive file lock on a setup lock file. If there is already an edit monitor in setup process, other instances trying to setup will fail. This prevents race conditions when multiple edit monitor instance are starting. Test: atest daemon_manager_test bug: 382135550 Change-Id: Ia8bd2f35318ae81fc002ccce62e2fde3b8cae26b
-rw-r--r--tools/edit_monitor/daemon_manager.py56
-rw-r--r--tools/edit_monitor/daemon_manager_test.py35
2 files changed, 67 insertions, 24 deletions
diff --git a/tools/edit_monitor/daemon_manager.py b/tools/edit_monitor/daemon_manager.py
index 36a7593e3a..7d666fed55 100644
--- a/tools/edit_monitor/daemon_manager.py
+++ b/tools/edit_monitor/daemon_manager.py
@@ -13,6 +13,8 @@
# limitations under the License.
+import errno
+import fcntl
import getpass
import hashlib
import logging
@@ -100,16 +102,32 @@ class DaemonManager:
logging.warning("Edit monitor for cog is not supported, exiting...")
return
- try:
- self._stop_any_existing_instance()
- self._write_pid_to_pidfile()
- self._start_daemon_process()
- except Exception as e:
- logging.exception("Failed to start daemon manager with error %s", e)
- self._send_error_event_to_clearcut(
- edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
- )
- raise e
+ setup_lock_file = pathlib.Path(tempfile.gettempdir()).joinpath(
+ self.pid_file_path.name + ".setup"
+ )
+ logging.info("setup lock file: %s", setup_lock_file)
+ with open(setup_lock_file, "w") as f:
+ try:
+ # Acquire an exclusive lock
+ fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ self._stop_any_existing_instance()
+ self._write_pid_to_pidfile()
+ self._start_daemon_process()
+ except Exception as e:
+ if (
+ isinstance(e, IOError) and e.errno == errno.EAGAIN
+ ): # Failed to acquire the file lock.
+ logging.warning("Another edit monitor is starting, exitinng...")
+ return
+ else:
+ logging.exception("Failed to start daemon manager with error %s", e)
+ self._send_error_event_to_clearcut(
+ edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
+ )
+ raise e
+ finally:
+ # Release the lock
+ fcntl.flock(f, fcntl.LOCK_UN)
def monitor_daemon(
self,
@@ -414,18 +432,18 @@ class DaemonManager:
pids = []
try:
- output = subprocess.check_output(
- ["ps", "-ef", "--no-headers"], text=True)
+ output = subprocess.check_output(["ps", "-ef", "--no-headers"], text=True)
for line in output.splitlines():
- parts = line.split()
- process_path = parts[7]
- if pathlib.Path(process_path).name == 'edit_monitor':
- pid = int(parts[1])
- if pid != self.pid: # exclude the current process
- pids.append(pid)
+ parts = line.split()
+ process_path = parts[7]
+ if pathlib.Path(process_path).name == "edit_monitor":
+ pid = int(parts[1])
+ if pid != self.pid: # exclude the current process
+ pids.append(pid)
except Exception:
logging.exception(
- "Failed to get pids of existing edit monitors from ps command.")
+ "Failed to get pids of existing edit monitors from ps command."
+ )
return pids
diff --git a/tools/edit_monitor/daemon_manager_test.py b/tools/edit_monitor/daemon_manager_test.py
index e412ab03e4..be28965c9e 100644
--- a/tools/edit_monitor/daemon_manager_test.py
+++ b/tools/edit_monitor/daemon_manager_test.py
@@ -14,6 +14,7 @@
"""Unittests for DaemonManager."""
+import fcntl
import logging
import multiprocessing
import os
@@ -82,7 +83,8 @@ class DaemonManagerTest(unittest.TestCase):
# tests will be cleaned.
tempfile.tempdir = self.working_dir.name
self.patch = mock.patch.dict(
- os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'true'})
+ os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'true'}
+ )
self.patch.start()
def tearDown(self):
@@ -102,6 +104,7 @@ class DaemonManagerTest(unittest.TestCase):
p = self._create_fake_deamon_process()
self.assert_run_simple_daemon_success()
+ self.assert_no_subprocess_running()
def test_start_success_with_existing_instance_already_dead(self):
# Create a pidfile with pid that does not exist.
@@ -137,7 +140,9 @@ class DaemonManagerTest(unittest.TestCase):
# Verify no daemon process is started.
self.assertIsNone(dm.daemon_process)
- @mock.patch.dict(os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'false'}, clear=True)
+ @mock.patch.dict(
+ os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'false'}, clear=True
+ )
def test_start_return_directly_if_disabled(self):
dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
dm.start()
@@ -154,6 +159,25 @@ class DaemonManagerTest(unittest.TestCase):
# Verify no daemon process is started.
self.assertIsNone(dm.daemon_process)
+ def test_start_failed_other_instance_is_starting(self):
+ f = open(
+ pathlib.Path(self.working_dir.name).joinpath(
+ TEST_PID_FILE_PATH + '.setup'
+ ),
+ 'w',
+ )
+ # Acquire an exclusive lock
+ fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+ dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
+ dm.start()
+
+ # Release the lock
+ fcntl.flock(f, fcntl.LOCK_UN)
+ f.close()
+ # Verify no daemon process is started.
+ self.assertIsNone(dm.daemon_process)
+
@mock.patch('os.kill')
def test_start_failed_to_kill_existing_instance(self, mock_kill):
mock_kill.side_effect = OSError('Unknown OSError')
@@ -177,6 +201,7 @@ class DaemonManagerTest(unittest.TestCase):
'edit_monitor'
)
pid_file_path_dir.mkdir(parents=True, exist_ok=True)
+
# Makes the directory read-only so write pidfile will fail.
os.chmod(pid_file_path_dir, 0o555)
@@ -216,7 +241,7 @@ class DaemonManagerTest(unittest.TestCase):
cclient=fake_cclient,
)
# set the fake total_memory_size
- dm.total_memory_size = 100 * 1024 *1024
+ dm.total_memory_size = 100 * 1024 * 1024
dm.start()
dm.monitor_daemon(interval=1)
@@ -452,7 +477,7 @@ class DaemonManagerTest(unittest.TestCase):
pass
def _create_fake_deamon_process(
- self, name: str = ''
+ self, name: str = TEST_PID_FILE_PATH
) -> multiprocessing.Process:
# Create a long running subprocess
p = multiprocessing.Process(target=long_running_daemon)
@@ -463,7 +488,7 @@ class DaemonManagerTest(unittest.TestCase):
'edit_monitor'
)
pid_file_path_dir.mkdir(parents=True, exist_ok=True)
- with open(pid_file_path_dir.joinpath(name + 'pid.lock'), 'w') as f:
+ with open(pid_file_path_dir.joinpath(name), 'w') as f:
f.write(str(p.pid))
return p