From dbc40b34617b5ee5d2f82272c8863c602265063c Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Wed, 14 Feb 2024 10:05:22 -0800 Subject: [PATCH] [lldb] Fix the flakey Concurrent tests on macOS (#81710) The concurrent tests all do a pthread_join at the end, and concurrent_base.py stops after that pthread_join and sanity checks that only 1 thread is running. On macOS, after pthread_join() has completed, there can be an extra thread still running which is completing the details of that task asynchronously; this causes testsuite failures. When this happens, we see the second thread is in ``` frame #0: 0x0000000180ce7700 libsystem_kernel.dylib`__ulock_wake + 8 frame #1: 0x0000000180d25ad4 libsystem_pthread.dylib`_pthread_joiner_wake + 52 frame #2: 0x0000000180d23c18 libsystem_pthread.dylib`_pthread_terminate + 384 frame #3: 0x0000000180d23a98 libsystem_pthread.dylib`_pthread_terminate_invoke + 92 frame #4: 0x0000000180d26740 libsystem_pthread.dylib`_pthread_exit + 112 frame #5: 0x0000000180d26040 libsystem_pthread.dylib`_pthread_start + 148 ``` there are none of the functions from the test file present on this thread. In this patch, instead of counting the number of threads, I iterate over the threads looking for functions from our test file (by name) and only count threads that have at least one of them. It's a lower frequency failure than the darwin kernel bug causing an extra step instruction mach exception when hardware breakpoint/watchpoints are used, but once I fixed that, this came up as the next most common failure for these tests. rdar://110555062 --- .../Python/lldbsuite/test/concurrent_base.py | 34 +++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/concurrent_base.py b/lldb/packages/Python/lldbsuite/test/concurrent_base.py index 39eb27fd997471..46d71666d06977 100644 --- a/lldb/packages/Python/lldbsuite/test/concurrent_base.py +++ b/lldb/packages/Python/lldbsuite/test/concurrent_base.py @@ -264,12 +264,40 @@ def do_thread_actions( "Expected main thread (finish) breakpoint to be hit once", ) - num_threads = self.inferior_process.GetNumThreads() + # There should be a single active thread (the main one) which hit + # the breakpoint after joining. Depending on the pthread + # implementation we may have a worker thread finishing the pthread_join() + # after it has returned. Filter the threads to only count those + # with user functions on them from our test case file, + # lldb/test/API/functionalities/thread/concurrent_events/main.cpp + user_code_funcnames = [ + "breakpoint_func", + "crash_func", + "do_action_args", + "dotest", + "main", + "register_signal_handler", + "signal_func", + "sigusr1_handler", + "start_threads", + "watchpoint_func", + ] + num_threads_with_usercode = 0 + for t in self.inferior_process.threads: + thread_has_user_code = False + for f in t.frames: + for funcname in user_code_funcnames: + if funcname in f.GetDisplayFunctionName(): + thread_has_user_code = True + break + if thread_has_user_code: + num_threads_with_usercode += 1 + self.assertEqual( 1, - num_threads, + num_threads_with_usercode, "Expecting 1 thread but seeing %d. Details:%s" - % (num_threads, "\n\t".join(self.describe_threads())), + % (num_threads_with_usercode, "\n\t".join(self.describe_threads())), ) self.runCmd("continue")