Skip to content

Commit 403d51f

Browse files
Fix block allocation with two or more workers hanging on failed function (#532)
* errors with local backend * The errors are not raised with multiple processes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix parameter --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 95c9480 commit 403d51f

2 files changed

Lines changed: 35 additions & 1 deletion

File tree

executorlib/interactive/shared.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ def __init__(
139139
super().__init__(max_cores=executor_kwargs.get("max_cores", None))
140140
executor_kwargs["future_queue"] = self._future_queue
141141
executor_kwargs["spawner"] = spawner
142+
executor_kwargs["queue_join_on_shutdown"] = False
142143
self._set_process(
143144
process=[
144145
RaisingThread(
@@ -209,6 +210,7 @@ def execute_parallel_tasks(
209210
hostname_localhost: Optional[bool] = None,
210211
init_function: Optional[Callable] = None,
211212
cache_directory: Optional[str] = None,
213+
queue_join_on_shutdown: bool = True,
212214
**kwargs,
213215
) -> None:
214216
"""
@@ -227,6 +229,7 @@ def execute_parallel_tasks(
227229
option to true
228230
init_function (Callable): optional function to preset arguments for functions which are submitted later
229231
cache_directory (str, optional): The directory to store cache files. Defaults to "cache".
232+
queue_join_on_shutdown (bool): Join communication queue when thread is closed. Defaults to True.
230233
"""
231234
interface = interface_bootup(
232235
command_lst=_get_backend_path(
@@ -244,7 +247,8 @@ def execute_parallel_tasks(
244247
if "shutdown" in task_dict.keys() and task_dict["shutdown"]:
245248
interface.shutdown(wait=task_dict["wait"])
246249
future_queue.task_done()
247-
future_queue.join()
250+
if queue_join_on_shutdown:
251+
future_queue.join()
248252
break
249253
elif "fn" in task_dict.keys() and "future" in task_dict.keys():
250254
if cache_directory is None:

tests/test_dependencies_executor.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ def merge(lst):
4040
return sum(lst)
4141

4242

43+
def raise_error():
44+
raise RuntimeError
45+
46+
4347
class TestExecutorWithDependencies(unittest.TestCase):
4448
def test_executor(self):
4549
with Executor(max_cores=1, backend="local") as exe:
@@ -227,3 +231,29 @@ def test_many_to_one_plot(self):
227231
)
228232
self.assertEqual(len(nodes), 18)
229233
self.assertEqual(len(edges), 21)
234+
235+
236+
class TestExecutorErrors(unittest.TestCase):
237+
def test_block_allocation_false_one_worker(self):
238+
with self.assertRaises(RuntimeError):
239+
with Executor(max_cores=1, backend="local", block_allocation=False) as exe:
240+
cloudpickle_register(ind=1)
241+
_ = exe.submit(raise_error)
242+
243+
def test_block_allocation_true_one_worker(self):
244+
with self.assertRaises(RuntimeError):
245+
with Executor(max_cores=1, backend="local", block_allocation=True) as exe:
246+
cloudpickle_register(ind=1)
247+
_ = exe.submit(raise_error)
248+
249+
def test_block_allocation_false_two_workers(self):
250+
with self.assertRaises(RuntimeError):
251+
with Executor(max_cores=2, backend="local", block_allocation=False) as exe:
252+
cloudpickle_register(ind=1)
253+
_ = exe.submit(raise_error)
254+
255+
def test_block_allocation_true_two_workers(self):
256+
with self.assertRaises(RuntimeError):
257+
with Executor(max_cores=2, backend="local", block_allocation=True) as exe:
258+
cloudpickle_register(ind=1)
259+
_ = exe.submit(raise_error)

0 commit comments

Comments
 (0)