test_parallel.py 59 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717
  1. """
  2. Test the parallel module.
  3. """
  4. # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
  5. # Copyright (c) 2010-2011 Gael Varoquaux
  6. # License: BSD Style, 3 clauses.
  7. import os
  8. import sys
  9. import time
  10. import mmap
  11. import threading
  12. from traceback import format_exception
  13. from math import sqrt
  14. from time import sleep
  15. from pickle import PicklingError
  16. from multiprocessing import TimeoutError
  17. import pickle
  18. import pytest
  19. from importlib import reload
  20. import joblib
  21. from joblib import parallel
  22. from joblib import dump, load
  23. from joblib.externals.loky import get_reusable_executor
  24. from joblib.test.common import np, with_numpy
  25. from joblib.test.common import with_multiprocessing
  26. from joblib.testing import (parametrize, raises, check_subprocess_call,
  27. skipif, SkipTest, warns)
  28. from joblib.externals.loky.process_executor import TerminatedWorkerError
  29. from queue import Queue
  30. try:
  31. import posix
  32. except ImportError:
  33. posix = None
  34. try:
  35. from ._openmp_test_helper.parallel_sum import parallel_sum
  36. except ImportError:
  37. parallel_sum = None
  38. try:
  39. import distributed
  40. except ImportError:
  41. distributed = None
  42. from joblib._parallel_backends import SequentialBackend
  43. from joblib._parallel_backends import ThreadingBackend
  44. from joblib._parallel_backends import MultiprocessingBackend
  45. from joblib._parallel_backends import ParallelBackendBase
  46. from joblib._parallel_backends import LokyBackend
  47. from joblib._parallel_backends import SafeFunction
  48. from joblib.parallel import Parallel, delayed
  49. from joblib.parallel import register_parallel_backend, parallel_backend
  50. from joblib.parallel import effective_n_jobs, cpu_count
  51. from joblib.parallel import mp, BACKENDS, DEFAULT_BACKEND, EXTERNAL_BACKENDS
  52. from joblib.my_exceptions import JoblibException
  53. from joblib.my_exceptions import WorkerInterrupt
  54. ALL_VALID_BACKENDS = [None] + sorted(BACKENDS.keys())
  55. # Add instances of backend classes deriving from ParallelBackendBase
  56. ALL_VALID_BACKENDS += [BACKENDS[backend_str]() for backend_str in BACKENDS]
  57. PROCESS_BACKENDS = ['multiprocessing', 'loky']
  58. PARALLEL_BACKENDS = PROCESS_BACKENDS + ['threading']
  59. if hasattr(mp, 'get_context'):
  60. # Custom multiprocessing context in Python 3.4+
  61. ALL_VALID_BACKENDS.append(mp.get_context('spawn'))
  62. DefaultBackend = BACKENDS[DEFAULT_BACKEND]
  63. def get_workers(backend):
  64. return getattr(backend, '_pool', getattr(backend, '_workers', None))
  65. def division(x, y):
  66. return x / y
  67. def square(x):
  68. return x ** 2
  69. class MyExceptionWithFinickyInit(Exception):
  70. """An exception class with non trivial __init__
  71. """
  72. def __init__(self, a, b, c, d):
  73. pass
  74. def exception_raiser(x, custom_exception=False):
  75. if x == 7:
  76. raise (MyExceptionWithFinickyInit('a', 'b', 'c', 'd')
  77. if custom_exception else ValueError)
  78. return x
  79. def interrupt_raiser(x):
  80. time.sleep(.05)
  81. raise KeyboardInterrupt
  82. def f(x, y=0, z=0):
  83. """ A module-level function so that it can be spawn with
  84. multiprocessing.
  85. """
  86. return x ** 2 + y + z
  87. def _active_backend_type():
  88. return type(parallel.get_active_backend()[0])
  89. def parallel_func(inner_n_jobs, backend):
  90. return Parallel(n_jobs=inner_n_jobs, backend=backend)(
  91. delayed(square)(i) for i in range(3))
  92. ###############################################################################
  93. def test_cpu_count():
  94. assert cpu_count() > 0
  95. def test_effective_n_jobs():
  96. assert effective_n_jobs() > 0
  97. @pytest.mark.parametrize(
  98. "backend_n_jobs, expected_n_jobs",
  99. [(3, 3), (-1, effective_n_jobs(n_jobs=-1)), (None, 1)],
  100. ids=["positive-int", "negative-int", "None"]
  101. )
  102. @with_multiprocessing
  103. def test_effective_n_jobs_None(backend_n_jobs, expected_n_jobs):
  104. # check the number of effective jobs when `n_jobs=None`
  105. # non-regression test for https://github.com/joblib/joblib/issues/984
  106. with parallel_backend("threading", n_jobs=backend_n_jobs):
  107. # when using a backend, the default of number jobs will be the one set
  108. # in the backend
  109. assert effective_n_jobs(n_jobs=None) == expected_n_jobs
  110. # without any backend, None will default to a single job
  111. assert effective_n_jobs(n_jobs=None) == 1
  112. ###############################################################################
  113. # Test parallel
  114. @parametrize('backend', ALL_VALID_BACKENDS)
  115. @parametrize('n_jobs', [1, 2, -1, -2])
  116. @parametrize('verbose', [2, 11, 100])
  117. def test_simple_parallel(backend, n_jobs, verbose):
  118. assert ([square(x) for x in range(5)] ==
  119. Parallel(n_jobs=n_jobs, backend=backend,
  120. verbose=verbose)(
  121. delayed(square)(x) for x in range(5)))
  122. @parametrize('backend', ALL_VALID_BACKENDS)
  123. def test_main_thread_renamed_no_warning(backend, monkeypatch):
  124. # Check that no default backend relies on the name of the main thread:
  125. # https://github.com/joblib/joblib/issues/180#issuecomment-253266247
  126. # Some programs use a different name for the main thread. This is the case
  127. # for uWSGI apps for instance.
  128. monkeypatch.setattr(target=threading.current_thread(), name='name',
  129. value='some_new_name_for_the_main_thread')
  130. with warns(None) as warninfo:
  131. results = Parallel(n_jobs=2, backend=backend)(
  132. delayed(square)(x) for x in range(3))
  133. assert results == [0, 1, 4]
  134. # Due to the default parameters of LokyBackend, there is a chance that
  135. # warninfo catches Warnings from worker timeouts. We remove it if it exists
  136. warninfo = [w for w in warninfo if "worker timeout" not in str(w.message)]
  137. # The multiprocessing backend will raise a warning when detecting that is
  138. # started from the non-main thread. Let's check that there is no false
  139. # positive because of the name change.
  140. assert len(warninfo) == 0
  141. def _assert_warning_nested(backend, inner_n_jobs, expected):
  142. with warns(None) as records:
  143. parallel_func(backend=backend, inner_n_jobs=inner_n_jobs)
  144. if expected:
  145. # with threading, we might see more that one records
  146. if len(records) > 0:
  147. return 'backed parallel loops cannot' in records[0].message.args[0]
  148. return False
  149. else:
  150. assert len(records) == 0
  151. return True
  152. @with_multiprocessing
  153. @parametrize('parent_backend,child_backend,expected', [
  154. ('loky', 'multiprocessing', True), ('loky', 'loky', False),
  155. ('multiprocessing', 'multiprocessing', True),
  156. ('multiprocessing', 'loky', True),
  157. ('threading', 'multiprocessing', True),
  158. ('threading', 'loky', True),
  159. ])
  160. def test_nested_parallel_warnings(parent_backend, child_backend, expected):
  161. # no warnings if inner_n_jobs=1
  162. Parallel(n_jobs=2, backend=parent_backend)(
  163. delayed(_assert_warning_nested)(
  164. backend=child_backend, inner_n_jobs=1,
  165. expected=False)
  166. for _ in range(5))
  167. # warnings if inner_n_jobs != 1 and expected
  168. res = Parallel(n_jobs=2, backend=parent_backend)(
  169. delayed(_assert_warning_nested)(
  170. backend=child_backend, inner_n_jobs=2,
  171. expected=expected)
  172. for _ in range(5))
  173. # warning handling is not thread safe. One thread might see multiple
  174. # warning or no warning at all.
  175. if parent_backend == "threading":
  176. assert any(res)
  177. else:
  178. assert all(res)
  179. @with_multiprocessing
  180. @parametrize('backend', ['loky', 'multiprocessing', 'threading'])
  181. def test_background_thread_parallelism(backend):
  182. is_run_parallel = [False]
  183. def background_thread(is_run_parallel):
  184. with warns(None) as records:
  185. Parallel(n_jobs=2)(
  186. delayed(sleep)(.1) for _ in range(4))
  187. print(len(records))
  188. is_run_parallel[0] = len(records) == 0
  189. t = threading.Thread(target=background_thread, args=(is_run_parallel,))
  190. t.start()
  191. t.join()
  192. assert is_run_parallel[0]
  193. def nested_loop(backend):
  194. Parallel(n_jobs=2, backend=backend)(
  195. delayed(square)(.01) for _ in range(2))
  196. @parametrize('child_backend', BACKENDS)
  197. @parametrize('parent_backend', BACKENDS)
  198. def test_nested_loop(parent_backend, child_backend):
  199. Parallel(n_jobs=2, backend=parent_backend)(
  200. delayed(nested_loop)(child_backend) for _ in range(2))
  201. def raise_exception(backend):
  202. raise ValueError
  203. def test_nested_loop_with_exception_with_loky():
  204. with raises(ValueError):
  205. with Parallel(n_jobs=2, backend="loky") as parallel:
  206. parallel([delayed(nested_loop)("loky"),
  207. delayed(raise_exception)("loky")])
  208. def test_mutate_input_with_threads():
  209. """Input is mutable when using the threading backend"""
  210. q = Queue(maxsize=5)
  211. Parallel(n_jobs=2, backend="threading")(
  212. delayed(q.put)(1) for _ in range(5))
  213. assert q.full()
  214. @parametrize('n_jobs', [1, 2, 3])
  215. def test_parallel_kwargs(n_jobs):
  216. """Check the keyword argument processing of pmap."""
  217. lst = range(10)
  218. assert ([f(x, y=1) for x in lst] ==
  219. Parallel(n_jobs=n_jobs)(delayed(f)(x, y=1) for x in lst))
  220. @parametrize('backend', PARALLEL_BACKENDS)
  221. def test_parallel_as_context_manager(backend):
  222. lst = range(10)
  223. expected = [f(x, y=1) for x in lst]
  224. with Parallel(n_jobs=4, backend=backend) as p:
  225. # Internally a pool instance has been eagerly created and is managed
  226. # via the context manager protocol
  227. managed_backend = p._backend
  228. # We make call with the managed parallel object several times inside
  229. # the managed block:
  230. assert expected == p(delayed(f)(x, y=1) for x in lst)
  231. assert expected == p(delayed(f)(x, y=1) for x in lst)
  232. # Those calls have all used the same pool instance:
  233. if mp is not None:
  234. assert get_workers(managed_backend) is get_workers(p._backend)
  235. # As soon as we exit the context manager block, the pool is terminated and
  236. # no longer referenced from the parallel object:
  237. if mp is not None:
  238. assert get_workers(p._backend) is None
  239. # It's still possible to use the parallel instance in non-managed mode:
  240. assert expected == p(delayed(f)(x, y=1) for x in lst)
  241. if mp is not None:
  242. assert get_workers(p._backend) is None
  243. @with_multiprocessing
  244. def test_parallel_pickling():
  245. """ Check that pmap captures the errors when it is passed an object
  246. that cannot be pickled.
  247. """
  248. class UnpicklableObject(object):
  249. def __reduce__(self):
  250. raise RuntimeError('123')
  251. with raises(PicklingError, match=r"the task to send"):
  252. Parallel(n_jobs=2)(delayed(id)(UnpicklableObject()) for _ in range(10))
  253. @parametrize('backend', PARALLEL_BACKENDS)
  254. def test_parallel_timeout_success(backend):
  255. # Check that timeout isn't thrown when function is fast enough
  256. assert len(Parallel(n_jobs=2, backend=backend, timeout=10)(
  257. delayed(sleep)(0.001) for x in range(10))) == 10
  258. @with_multiprocessing
  259. @parametrize('backend', PARALLEL_BACKENDS)
  260. def test_parallel_timeout_fail(backend):
  261. # Check that timeout properly fails when function is too slow
  262. with raises(TimeoutError):
  263. Parallel(n_jobs=2, backend=backend, timeout=0.01)(
  264. delayed(sleep)(10) for x in range(10))
  265. @with_multiprocessing
  266. @parametrize('backend', PROCESS_BACKENDS)
  267. def test_error_capture(backend):
  268. # Check that error are captured, and that correct exceptions
  269. # are raised.
  270. if mp is not None:
  271. with raises(ZeroDivisionError):
  272. Parallel(n_jobs=2, backend=backend)(
  273. [delayed(division)(x, y)
  274. for x, y in zip((0, 1), (1, 0))])
  275. with raises(WorkerInterrupt):
  276. Parallel(n_jobs=2, backend=backend)(
  277. [delayed(interrupt_raiser)(x) for x in (1, 0)])
  278. # Try again with the context manager API
  279. with Parallel(n_jobs=2, backend=backend) as parallel:
  280. assert get_workers(parallel._backend) is not None
  281. original_workers = get_workers(parallel._backend)
  282. with raises(ZeroDivisionError):
  283. parallel([delayed(division)(x, y)
  284. for x, y in zip((0, 1), (1, 0))])
  285. # The managed pool should still be available and be in a working
  286. # state despite the previously raised (and caught) exception
  287. assert get_workers(parallel._backend) is not None
  288. # The pool should have been interrupted and restarted:
  289. assert get_workers(parallel._backend) is not original_workers
  290. assert ([f(x, y=1) for x in range(10)] ==
  291. parallel(delayed(f)(x, y=1) for x in range(10)))
  292. original_workers = get_workers(parallel._backend)
  293. with raises(WorkerInterrupt):
  294. parallel([delayed(interrupt_raiser)(x) for x in (1, 0)])
  295. # The pool should still be available despite the exception
  296. assert get_workers(parallel._backend) is not None
  297. # The pool should have been interrupted and restarted:
  298. assert get_workers(parallel._backend) is not original_workers
  299. assert ([f(x, y=1) for x in range(10)] ==
  300. parallel(delayed(f)(x, y=1) for x in range(10)))
  301. # Check that the inner pool has been terminated when exiting the
  302. # context manager
  303. assert get_workers(parallel._backend) is None
  304. else:
  305. with raises(KeyboardInterrupt):
  306. Parallel(n_jobs=2)(
  307. [delayed(interrupt_raiser)(x) for x in (1, 0)])
  308. # wrapped exceptions should inherit from the class of the original
  309. # exception to make it easy to catch them
  310. with raises(ZeroDivisionError):
  311. Parallel(n_jobs=2)(
  312. [delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))])
  313. with raises(MyExceptionWithFinickyInit):
  314. Parallel(n_jobs=2, verbose=0)(
  315. (delayed(exception_raiser)(i, custom_exception=True)
  316. for i in range(30)))
  317. try:
  318. # JoblibException wrapping is disabled in sequential mode:
  319. Parallel(n_jobs=1)(
  320. delayed(division)(x, y) for x, y in zip((0, 1), (1, 0)))
  321. except Exception as ex:
  322. assert not isinstance(ex, JoblibException)
  323. else:
  324. raise ValueError("The excepted error has not been raised.")
  325. def consumer(queue, item):
  326. queue.append('Consumed %s' % item)
  327. @parametrize('backend', BACKENDS)
  328. @parametrize('batch_size, expected_queue',
  329. [(1, ['Produced 0', 'Consumed 0',
  330. 'Produced 1', 'Consumed 1',
  331. 'Produced 2', 'Consumed 2',
  332. 'Produced 3', 'Consumed 3',
  333. 'Produced 4', 'Consumed 4',
  334. 'Produced 5', 'Consumed 5']),
  335. (4, [ # First Batch
  336. 'Produced 0', 'Produced 1', 'Produced 2', 'Produced 3',
  337. 'Consumed 0', 'Consumed 1', 'Consumed 2', 'Consumed 3',
  338. # Second batch
  339. 'Produced 4', 'Produced 5', 'Consumed 4', 'Consumed 5'])])
  340. def test_dispatch_one_job(backend, batch_size, expected_queue):
  341. """ Test that with only one job, Parallel does act as a iterator.
  342. """
  343. queue = list()
  344. def producer():
  345. for i in range(6):
  346. queue.append('Produced %i' % i)
  347. yield i
  348. Parallel(n_jobs=1, batch_size=batch_size, backend=backend)(
  349. delayed(consumer)(queue, x) for x in producer())
  350. assert queue == expected_queue
  351. assert len(queue) == 12
  352. @with_multiprocessing
  353. @parametrize('backend', PARALLEL_BACKENDS)
  354. def test_dispatch_multiprocessing(backend):
  355. """ Check that using pre_dispatch Parallel does indeed dispatch items
  356. lazily.
  357. """
  358. manager = mp.Manager()
  359. queue = manager.list()
  360. def producer():
  361. for i in range(6):
  362. queue.append('Produced %i' % i)
  363. yield i
  364. Parallel(n_jobs=2, batch_size=1, pre_dispatch=3, backend=backend)(
  365. delayed(consumer)(queue, 'any') for _ in producer())
  366. queue_contents = list(queue)
  367. assert queue_contents[0] == 'Produced 0'
  368. # Only 3 tasks are pre-dispatched out of 6. The 4th task is dispatched only
  369. # after any of the first 3 jobs have completed.
  370. first_consumption_index = queue_contents[:4].index('Consumed any')
  371. assert first_consumption_index > -1
  372. produced_3_index = queue_contents.index('Produced 3') # 4th task produced
  373. assert produced_3_index > first_consumption_index
  374. assert len(queue) == 12
  375. def test_batching_auto_threading():
  376. # batching='auto' with the threading backend leaves the effective batch
  377. # size to 1 (no batching) as it has been found to never be beneficial with
  378. # this low-overhead backend.
  379. with Parallel(n_jobs=2, batch_size='auto', backend='threading') as p:
  380. p(delayed(id)(i) for i in range(5000)) # many very fast tasks
  381. assert p._backend.compute_batch_size() == 1
  382. @with_multiprocessing
  383. @parametrize('backend', PROCESS_BACKENDS)
  384. def test_batching_auto_subprocesses(backend):
  385. with Parallel(n_jobs=2, batch_size='auto', backend=backend) as p:
  386. p(delayed(id)(i) for i in range(5000)) # many very fast tasks
  387. # It should be strictly larger than 1 but as we don't want heisen
  388. # failures on clogged CI worker environment be safe and only check that
  389. # it's a strictly positive number.
  390. assert p._backend.compute_batch_size() > 0
  391. def test_exception_dispatch():
  392. """Make sure that exception raised during dispatch are indeed captured"""
  393. with raises(ValueError):
  394. Parallel(n_jobs=2, pre_dispatch=16, verbose=0)(
  395. delayed(exception_raiser)(i) for i in range(30))
  396. def nested_function_inner(i):
  397. Parallel(n_jobs=2)(
  398. delayed(exception_raiser)(j) for j in range(30))
  399. def nested_function_outer(i):
  400. Parallel(n_jobs=2)(
  401. delayed(nested_function_inner)(j) for j in range(30))
  402. @with_multiprocessing
  403. @parametrize('backend', PARALLEL_BACKENDS)
  404. @pytest.mark.xfail(reason="https://github.com/joblib/loky/pull/255")
  405. def test_nested_exception_dispatch(backend):
  406. """Ensure errors for nested joblib cases gets propagated
  407. We rely on the Python 3 built-in __cause__ system that already
  408. report this kind of information to the user.
  409. """
  410. with raises(ValueError) as excinfo:
  411. Parallel(n_jobs=2, backend=backend)(
  412. delayed(nested_function_outer)(i) for i in range(30))
  413. # Check that important information such as function names are visible
  414. # in the final error message reported to the user
  415. report_lines = format_exception(excinfo.type, excinfo.value, excinfo.tb)
  416. report = "".join(report_lines)
  417. assert 'nested_function_outer' in report
  418. assert 'nested_function_inner' in report
  419. assert 'exception_raiser' in report
  420. assert type(excinfo.value) is ValueError
  421. class FakeParallelBackend(SequentialBackend):
  422. """Pretends to run concurrently while running sequentially."""
  423. def configure(self, n_jobs=1, parallel=None, **backend_args):
  424. self.n_jobs = self.effective_n_jobs(n_jobs)
  425. self.parallel = parallel
  426. return n_jobs
  427. def effective_n_jobs(self, n_jobs=1):
  428. if n_jobs < 0:
  429. n_jobs = max(mp.cpu_count() + 1 + n_jobs, 1)
  430. return n_jobs
  431. def test_invalid_backend():
  432. with raises(ValueError):
  433. Parallel(backend='unit-testing')
  434. @parametrize('backend', ALL_VALID_BACKENDS)
  435. def test_invalid_njobs(backend):
  436. with raises(ValueError) as excinfo:
  437. Parallel(n_jobs=0, backend=backend)._initialize_backend()
  438. assert "n_jobs == 0 in Parallel has no meaning" in str(excinfo.value)
  439. def test_register_parallel_backend():
  440. try:
  441. register_parallel_backend("test_backend", FakeParallelBackend)
  442. assert "test_backend" in BACKENDS
  443. assert BACKENDS["test_backend"] == FakeParallelBackend
  444. finally:
  445. del BACKENDS["test_backend"]
  446. def test_overwrite_default_backend():
  447. assert _active_backend_type() == DefaultBackend
  448. try:
  449. register_parallel_backend("threading", BACKENDS["threading"],
  450. make_default=True)
  451. assert _active_backend_type() == ThreadingBackend
  452. finally:
  453. # Restore the global default manually
  454. parallel.DEFAULT_BACKEND = DEFAULT_BACKEND
  455. assert _active_backend_type() == DefaultBackend
  456. def check_backend_context_manager(backend_name):
  457. with parallel_backend(backend_name, n_jobs=3):
  458. active_backend, active_n_jobs = parallel.get_active_backend()
  459. assert active_n_jobs == 3
  460. assert effective_n_jobs(3) == 3
  461. p = Parallel()
  462. assert p.n_jobs == 3
  463. if backend_name == 'multiprocessing':
  464. assert type(active_backend) == MultiprocessingBackend
  465. assert type(p._backend) == MultiprocessingBackend
  466. elif backend_name == 'loky':
  467. assert type(active_backend) == LokyBackend
  468. assert type(p._backend) == LokyBackend
  469. elif backend_name == 'threading':
  470. assert type(active_backend) == ThreadingBackend
  471. assert type(p._backend) == ThreadingBackend
  472. elif backend_name.startswith('test_'):
  473. assert type(active_backend) == FakeParallelBackend
  474. assert type(p._backend) == FakeParallelBackend
  475. all_backends_for_context_manager = PARALLEL_BACKENDS[:]
  476. all_backends_for_context_manager.extend(
  477. ['test_backend_%d' % i for i in range(3)]
  478. )
  479. @with_multiprocessing
  480. @parametrize('backend', all_backends_for_context_manager)
  481. def test_backend_context_manager(monkeypatch, backend):
  482. if backend not in BACKENDS:
  483. monkeypatch.setitem(BACKENDS, backend, FakeParallelBackend)
  484. assert _active_backend_type() == DefaultBackend
  485. # check that this possible to switch parallel backends sequentially
  486. check_backend_context_manager(backend)
  487. # The default backend is restored
  488. assert _active_backend_type() == DefaultBackend
  489. # Check that context manager switching is thread safe:
  490. Parallel(n_jobs=2, backend='threading')(
  491. delayed(check_backend_context_manager)(b)
  492. for b in all_backends_for_context_manager if not b)
  493. # The default backend is again restored
  494. assert _active_backend_type() == DefaultBackend
  495. class ParameterizedParallelBackend(SequentialBackend):
  496. """Pretends to run conncurrently while running sequentially."""
  497. def __init__(self, param=None):
  498. if param is None:
  499. raise ValueError('param should not be None')
  500. self.param = param
  501. def test_parameterized_backend_context_manager(monkeypatch):
  502. monkeypatch.setitem(BACKENDS, 'param_backend',
  503. ParameterizedParallelBackend)
  504. assert _active_backend_type() == DefaultBackend
  505. with parallel_backend('param_backend', param=42, n_jobs=3):
  506. active_backend, active_n_jobs = parallel.get_active_backend()
  507. assert type(active_backend) == ParameterizedParallelBackend
  508. assert active_backend.param == 42
  509. assert active_n_jobs == 3
  510. p = Parallel()
  511. assert p.n_jobs == 3
  512. assert p._backend is active_backend
  513. results = p(delayed(sqrt)(i) for i in range(5))
  514. assert results == [sqrt(i) for i in range(5)]
  515. # The default backend is again restored
  516. assert _active_backend_type() == DefaultBackend
  517. def test_directly_parameterized_backend_context_manager():
  518. assert _active_backend_type() == DefaultBackend
  519. # Check that it's possible to pass a backend instance directly,
  520. # without registration
  521. with parallel_backend(ParameterizedParallelBackend(param=43), n_jobs=5):
  522. active_backend, active_n_jobs = parallel.get_active_backend()
  523. assert type(active_backend) == ParameterizedParallelBackend
  524. assert active_backend.param == 43
  525. assert active_n_jobs == 5
  526. p = Parallel()
  527. assert p.n_jobs == 5
  528. assert p._backend is active_backend
  529. results = p(delayed(sqrt)(i) for i in range(5))
  530. assert results == [sqrt(i) for i in range(5)]
  531. # The default backend is again restored
  532. assert _active_backend_type() == DefaultBackend
  533. def sleep_and_return_pid():
  534. sleep(.1)
  535. return os.getpid()
  536. def get_nested_pids():
  537. assert _active_backend_type() == ThreadingBackend
  538. # Assert that the nested backend does not change the default number of
  539. # jobs used in Parallel
  540. assert Parallel()._effective_n_jobs() == 1
  541. # Assert that the tasks are running only on one process
  542. return Parallel(n_jobs=2)(delayed(sleep_and_return_pid)()
  543. for _ in range(2))
  544. class MyBackend(joblib._parallel_backends.LokyBackend):
  545. """Backend to test backward compatibility with older backends"""
  546. def get_nested_backend(self, ):
  547. # Older backends only return a backend, without n_jobs indications.
  548. return super(MyBackend, self).get_nested_backend()[0]
  549. register_parallel_backend('back_compat_backend', MyBackend)
  550. @with_multiprocessing
  551. @parametrize('backend', ['threading', 'loky', 'multiprocessing',
  552. 'back_compat_backend'])
  553. def test_nested_backend_context_manager(backend):
  554. # Check that by default, nested parallel calls will always use the
  555. # ThreadingBackend
  556. with parallel_backend(backend):
  557. pid_groups = Parallel(n_jobs=2)(
  558. delayed(get_nested_pids)()
  559. for _ in range(10)
  560. )
  561. for pid_group in pid_groups:
  562. assert len(set(pid_group)) == 1
  563. @with_multiprocessing
  564. @parametrize('n_jobs', [2, -1, None])
  565. @parametrize('backend', PARALLEL_BACKENDS)
  566. def test_nested_backend_in_sequential(backend, n_jobs):
  567. # Check that by default, nested parallel calls will always use the
  568. # ThreadingBackend
  569. def check_nested_backend(expected_backend_type, expected_n_job):
  570. # Assert that the sequential backend at top level, does not change the
  571. # backend for nested calls.
  572. assert _active_backend_type() == BACKENDS[expected_backend_type]
  573. # Assert that the nested backend in SequentialBackend does not change
  574. # the default number of jobs used in Parallel
  575. expected_n_job = effective_n_jobs(expected_n_job)
  576. assert Parallel()._effective_n_jobs() == expected_n_job
  577. Parallel(n_jobs=1)(
  578. delayed(check_nested_backend)('loky', 1)
  579. for _ in range(10)
  580. )
  581. with parallel_backend(backend, n_jobs=n_jobs):
  582. Parallel(n_jobs=1)(
  583. delayed(check_nested_backend)(backend, n_jobs)
  584. for _ in range(10)
  585. )
  586. def check_nesting_level(inner_backend, expected_level):
  587. with parallel_backend(inner_backend) as (backend, n_jobs):
  588. assert backend.nesting_level == expected_level
  589. @with_multiprocessing
  590. @parametrize('outer_backend', PARALLEL_BACKENDS)
  591. @parametrize('inner_backend', PARALLEL_BACKENDS)
  592. def test_backend_nesting_level(outer_backend, inner_backend):
  593. # Check that the nesting level for the backend is correctly set
  594. check_nesting_level(outer_backend, 0)
  595. Parallel(n_jobs=2, backend=outer_backend)(
  596. delayed(check_nesting_level)(inner_backend, 1)
  597. for _ in range(10)
  598. )
  599. with parallel_backend(inner_backend, n_jobs=2):
  600. Parallel()(delayed(check_nesting_level)(inner_backend, 1)
  601. for _ in range(10))
  602. @with_multiprocessing
  603. def test_retrieval_context():
  604. import contextlib
  605. class MyBackend(ThreadingBackend):
  606. i = 0
  607. @contextlib.contextmanager
  608. def retrieval_context(self):
  609. self.i += 1
  610. yield
  611. register_parallel_backend("retrieval", MyBackend)
  612. def nested_call(n):
  613. return Parallel(n_jobs=2)(delayed(id)(i) for i in range(n))
  614. with parallel_backend("retrieval") as (ba, _):
  615. Parallel(n_jobs=2)(
  616. delayed(nested_call, check_pickle=False)(i)
  617. for i in range(5)
  618. )
  619. assert ba.i == 1
  620. ###############################################################################
  621. # Test helpers
  622. def test_joblib_exception():
  623. # Smoke-test the custom exception
  624. e = JoblibException('foobar')
  625. # Test the repr
  626. repr(e)
  627. # Test the pickle
  628. pickle.dumps(e)
  629. def test_safe_function():
  630. safe_division = SafeFunction(division)
  631. with raises(ZeroDivisionError):
  632. safe_division(1, 0)
  633. safe_interrupt = SafeFunction(interrupt_raiser)
  634. with raises(WorkerInterrupt):
  635. safe_interrupt('x')
  636. @parametrize('batch_size', [0, -1, 1.42])
  637. def test_invalid_batch_size(batch_size):
  638. with raises(ValueError):
  639. Parallel(batch_size=batch_size)
  640. @parametrize('n_tasks, n_jobs, pre_dispatch, batch_size',
  641. [(2, 2, 'all', 'auto'),
  642. (2, 2, 'n_jobs', 'auto'),
  643. (10, 2, 'n_jobs', 'auto'),
  644. (517, 2, 'n_jobs', 'auto'),
  645. (10, 2, 'n_jobs', 'auto'),
  646. (10, 4, 'n_jobs', 'auto'),
  647. (200, 12, 'n_jobs', 'auto'),
  648. (25, 12, '2 * n_jobs', 1),
  649. (250, 12, 'all', 1),
  650. (250, 12, '2 * n_jobs', 7),
  651. (200, 12, '2 * n_jobs', 'auto')])
  652. def test_dispatch_race_condition(n_tasks, n_jobs, pre_dispatch, batch_size):
  653. # Check that using (async-)dispatch does not yield a race condition on the
  654. # iterable generator that is not thread-safe natively.
  655. # This is a non-regression test for the "Pool seems closed" class of error
  656. params = {'n_jobs': n_jobs, 'pre_dispatch': pre_dispatch,
  657. 'batch_size': batch_size}
  658. expected = [square(i) for i in range(n_tasks)]
  659. results = Parallel(**params)(delayed(square)(i) for i in range(n_tasks))
  660. assert results == expected
  661. @with_multiprocessing
  662. def test_default_mp_context():
  663. mp_start_method = mp.get_start_method()
  664. p = Parallel(n_jobs=2, backend='multiprocessing')
  665. context = p._backend_args.get('context')
  666. start_method = context.get_start_method()
  667. assert start_method == mp_start_method
  668. @with_numpy
  669. @with_multiprocessing
  670. @parametrize('backend', PROCESS_BACKENDS)
  671. def test_no_blas_crash_or_freeze_with_subprocesses(backend):
  672. if backend == 'multiprocessing':
  673. # Use the spawn backend that is both robust and available on all
  674. # platforms
  675. backend = mp.get_context('spawn')
  676. # Check that on recent Python version, the 'spawn' start method can make
  677. # it possible to use multiprocessing in conjunction of any BLAS
  678. # implementation that happens to be used by numpy with causing a freeze or
  679. # a crash
  680. rng = np.random.RandomState(42)
  681. # call BLAS DGEMM to force the initialization of the internal thread-pool
  682. # in the main process
  683. a = rng.randn(1000, 1000)
  684. np.dot(a, a.T)
  685. # check that the internal BLAS thread-pool is not in an inconsistent state
  686. # in the worker processes managed by multiprocessing
  687. Parallel(n_jobs=2, backend=backend)(
  688. delayed(np.dot)(a, a.T) for i in range(2))
  689. UNPICKLABLE_CALLABLE_SCRIPT_TEMPLATE_NO_MAIN = """\
  690. from joblib import Parallel, delayed
  691. def square(x):
  692. return x ** 2
  693. backend = "{}"
  694. if backend == "spawn":
  695. from multiprocessing import get_context
  696. backend = get_context(backend)
  697. print(Parallel(n_jobs=2, backend=backend)(
  698. delayed(square)(i) for i in range(5)))
  699. """
  700. @with_multiprocessing
  701. @parametrize('backend', PROCESS_BACKENDS)
  702. def test_parallel_with_interactively_defined_functions(backend):
  703. # When using the "-c" flag, interactive functions defined in __main__
  704. # should work with any backend.
  705. if backend == "multiprocessing" and mp.get_start_method() != "fork":
  706. pytest.skip("Require fork start method to use interactively defined "
  707. "functions with multiprocessing.")
  708. code = UNPICKLABLE_CALLABLE_SCRIPT_TEMPLATE_NO_MAIN.format(backend)
  709. check_subprocess_call(
  710. [sys.executable, '-c', code], timeout=10,
  711. stdout_regex=r'\[0, 1, 4, 9, 16\]')
  712. UNPICKLABLE_CALLABLE_SCRIPT_TEMPLATE_MAIN = """\
  713. import sys
  714. # Make sure that joblib is importable in the subprocess launching this
  715. # script. This is needed in case we run the tests from the joblib root
  716. # folder without having installed joblib
  717. sys.path.insert(0, {joblib_root_folder!r})
  718. from joblib import Parallel, delayed
  719. def run(f, x):
  720. return f(x)
  721. {define_func}
  722. if __name__ == "__main__":
  723. backend = "{backend}"
  724. if backend == "spawn":
  725. from multiprocessing import get_context
  726. backend = get_context(backend)
  727. callable_position = "{callable_position}"
  728. if callable_position == "delayed":
  729. print(Parallel(n_jobs=2, backend=backend)(
  730. delayed(square)(i) for i in range(5)))
  731. elif callable_position == "args":
  732. print(Parallel(n_jobs=2, backend=backend)(
  733. delayed(run)(square, i) for i in range(5)))
  734. else:
  735. print(Parallel(n_jobs=2, backend=backend)(
  736. delayed(run)(f=square, x=i) for i in range(5)))
  737. """
  738. SQUARE_MAIN = """\
  739. def square(x):
  740. return x ** 2
  741. """
  742. SQUARE_LOCAL = """\
  743. def gen_square():
  744. def square(x):
  745. return x ** 2
  746. return square
  747. square = gen_square()
  748. """
  749. SQUARE_LAMBDA = """\
  750. square = lambda x: x ** 2
  751. """
  752. @with_multiprocessing
  753. @parametrize('backend', PROCESS_BACKENDS + ([] if mp is None else ['spawn']))
  754. @parametrize('define_func', [SQUARE_MAIN, SQUARE_LOCAL, SQUARE_LAMBDA])
  755. @parametrize('callable_position', ['delayed', 'args', 'kwargs'])
  756. def test_parallel_with_unpicklable_functions_in_args(
  757. backend, define_func, callable_position, tmpdir):
  758. if backend in ['multiprocessing', 'spawn'] and (
  759. define_func != SQUARE_MAIN or sys.platform == "win32"):
  760. pytest.skip("Not picklable with pickle")
  761. code = UNPICKLABLE_CALLABLE_SCRIPT_TEMPLATE_MAIN.format(
  762. define_func=define_func, backend=backend,
  763. callable_position=callable_position,
  764. joblib_root_folder=os.path.dirname(os.path.dirname(joblib.__file__)))
  765. code_file = tmpdir.join("unpicklable_func_script.py")
  766. code_file.write(code)
  767. check_subprocess_call(
  768. [sys.executable, code_file.strpath], timeout=10,
  769. stdout_regex=r'\[0, 1, 4, 9, 16\]')
  770. INTERACTIVE_DEFINED_FUNCTION_AND_CLASS_SCRIPT_CONTENT = """\
  771. import sys
  772. # Make sure that joblib is importable in the subprocess launching this
  773. # script. This is needed in case we run the tests from the joblib root
  774. # folder without having installed joblib
  775. sys.path.insert(0, {joblib_root_folder!r})
  776. from joblib import Parallel, delayed
  777. from functools import partial
  778. class MyClass:
  779. '''Class defined in the __main__ namespace'''
  780. def __init__(self, value):
  781. self.value = value
  782. def square(x, ignored=None, ignored2=None):
  783. '''Function defined in the __main__ namespace'''
  784. return x.value ** 2
  785. square2 = partial(square, ignored2='something')
  786. # Here, we do not need the `if __name__ == "__main__":` safeguard when
  787. # using the default `loky` backend (even on Windows).
  788. # The following baroque function call is meant to check that joblib
  789. # introspection rightfully uses cloudpickle instead of the (faster) pickle
  790. # module of the standard library when necessary. In particular cloudpickle is
  791. # necessary for functions and instances of classes interactively defined in the
  792. # __main__ module.
  793. print(Parallel(n_jobs=2)(
  794. delayed(square2)(MyClass(i), ignored=[dict(a=MyClass(1))])
  795. for i in range(5)
  796. ))
  797. """.format(joblib_root_folder=os.path.dirname(
  798. os.path.dirname(joblib.__file__)))
  799. @with_multiprocessing
  800. def test_parallel_with_interactively_defined_functions_default_backend(tmpdir):
  801. # The default backend (loky) accepts interactive functions defined in
  802. # __main__ and does not require if __name__ == '__main__' even when
  803. # the __main__ module is defined by the result of the execution of a
  804. # filesystem script.
  805. script = tmpdir.join('joblib_interactively_defined_function.py')
  806. script.write(INTERACTIVE_DEFINED_FUNCTION_AND_CLASS_SCRIPT_CONTENT)
  807. check_subprocess_call([sys.executable, script.strpath],
  808. stdout_regex=r'\[0, 1, 4, 9, 16\]',
  809. timeout=5)
  810. INTERACTIVELY_DEFINED_SUBCLASS_WITH_METHOD_SCRIPT_CONTENT = """\
  811. import sys
  812. # Make sure that joblib is importable in the subprocess launching this
  813. # script. This is needed in case we run the tests from the joblib root
  814. # folder without having installed joblib
  815. sys.path.insert(0, {joblib_root_folder!r})
  816. from joblib import Parallel, delayed, hash
  817. import multiprocessing as mp
  818. mp.util.log_to_stderr(5)
  819. class MyList(list):
  820. '''MyList is interactively defined by MyList.append is a built-in'''
  821. def __hash__(self):
  822. # XXX: workaround limitation in cloudpickle
  823. return hash(self).__hash__()
  824. l = MyList()
  825. print(Parallel(n_jobs=2)(
  826. delayed(l.append)(i) for i in range(3)
  827. ))
  828. """.format(joblib_root_folder=os.path.dirname(
  829. os.path.dirname(joblib.__file__)))
  830. @with_multiprocessing
  831. def test_parallel_with_interactively_defined_bound_method(tmpdir):
  832. script = tmpdir.join('joblib_interactive_bound_method_script.py')
  833. script.write(INTERACTIVELY_DEFINED_SUBCLASS_WITH_METHOD_SCRIPT_CONTENT)
  834. check_subprocess_call([sys.executable, script.strpath],
  835. stdout_regex=r'\[None, None, None\]',
  836. stderr_regex=r'LokyProcess',
  837. timeout=15)
  838. def test_parallel_with_exhausted_iterator():
  839. exhausted_iterator = iter([])
  840. assert Parallel(n_jobs=2)(exhausted_iterator) == []
  841. def check_memmap(a):
  842. if not isinstance(a, np.memmap):
  843. raise TypeError('Expected np.memmap instance, got %r',
  844. type(a))
  845. return a.copy() # return a regular array instead of a memmap
  846. @with_numpy
  847. @with_multiprocessing
  848. @parametrize('backend', PROCESS_BACKENDS)
  849. def test_auto_memmap_on_arrays_from_generator(backend):
  850. # Non-regression test for a problem with a bad interaction between the
  851. # GC collecting arrays recently created during iteration inside the
  852. # parallel dispatch loop and the auto-memmap feature of Parallel.
  853. # See: https://github.com/joblib/joblib/pull/294
  854. def generate_arrays(n):
  855. for i in range(n):
  856. yield np.ones(10, dtype=np.float32) * i
  857. # Use max_nbytes=1 to force the use of memory-mapping even for small
  858. # arrays
  859. results = Parallel(n_jobs=2, max_nbytes=1, backend=backend)(
  860. delayed(check_memmap)(a) for a in generate_arrays(100))
  861. for result, expected in zip(results, generate_arrays(len(results))):
  862. np.testing.assert_array_equal(expected, result)
  863. # Second call to force loky to adapt the executor by growing the number
  864. # of worker processes. This is a non-regression test for:
  865. # https://github.com/joblib/joblib/issues/629.
  866. results = Parallel(n_jobs=4, max_nbytes=1, backend=backend)(
  867. delayed(check_memmap)(a) for a in generate_arrays(100))
  868. for result, expected in zip(results, generate_arrays(len(results))):
  869. np.testing.assert_array_equal(expected, result)
  870. def identity(arg):
  871. return arg
  872. @with_numpy
  873. @with_multiprocessing
  874. def test_memmap_with_big_offset(tmpdir):
  875. fname = tmpdir.join('test.mmap').strpath
  876. size = mmap.ALLOCATIONGRANULARITY
  877. obj = [np.zeros(size, dtype='uint8'), np.ones(size, dtype='uint8')]
  878. dump(obj, fname)
  879. memmap = load(fname, mmap_mode='r')
  880. result, = Parallel(n_jobs=2)(delayed(identity)(memmap) for _ in [0])
  881. assert isinstance(memmap[1], np.memmap)
  882. assert memmap[1].offset > size
  883. np.testing.assert_array_equal(obj, result)
  884. def test_warning_about_timeout_not_supported_by_backend():
  885. with warns(None) as warninfo:
  886. Parallel(timeout=1)(delayed(square)(i) for i in range(50))
  887. assert len(warninfo) == 1
  888. w = warninfo[0]
  889. assert isinstance(w.message, UserWarning)
  890. assert str(w.message) == (
  891. "The backend class 'SequentialBackend' does not support timeout. "
  892. "You have set 'timeout=1' in Parallel but the 'timeout' parameter "
  893. "will not be used.")
  894. @parametrize('backend', ALL_VALID_BACKENDS)
  895. @parametrize('n_jobs', [1, 2, -2, -1])
  896. def test_abort_backend(n_jobs, backend):
  897. delays = ["a"] + [10] * 100
  898. with raises(TypeError):
  899. t_start = time.time()
  900. Parallel(n_jobs=n_jobs, backend=backend)(
  901. delayed(time.sleep)(i) for i in delays)
  902. dt = time.time() - t_start
  903. assert dt < 20
  904. @with_numpy
  905. @with_multiprocessing
  906. @parametrize('backend', PROCESS_BACKENDS)
  907. def test_memmapping_leaks(backend, tmpdir):
  908. # Non-regression test for memmapping backends. Ensure that the data
  909. # does not stay too long in memory
  910. tmpdir = tmpdir.strpath
  911. # Use max_nbytes=1 to force the use of memory-mapping even for small
  912. # arrays
  913. with Parallel(n_jobs=2, max_nbytes=1, backend=backend,
  914. temp_folder=tmpdir) as p:
  915. p(delayed(check_memmap)(a) for a in [np.random.random(10)] * 2)
  916. # The memmap folder should not be clean in the context scope
  917. assert len(os.listdir(tmpdir)) > 0
  918. # Make sure that the shared memory is cleaned at the end when we exit
  919. # the context
  920. for _ in range(100):
  921. if not os.listdir(tmpdir):
  922. break
  923. sleep(.1)
  924. else:
  925. raise AssertionError('temporary directory of Parallel was not removed')
  926. # Make sure that the shared memory is cleaned at the end of a call
  927. p = Parallel(n_jobs=2, max_nbytes=1, backend=backend)
  928. p(delayed(check_memmap)(a) for a in [np.random.random(10)] * 2)
  929. for _ in range(100):
  930. if not os.listdir(tmpdir):
  931. break
  932. sleep(.1)
  933. else:
  934. raise AssertionError('temporary directory of Parallel was not removed')
  935. @parametrize('backend', [None, 'loky', 'threading'])
  936. def test_lambda_expression(backend):
  937. # cloudpickle is used to pickle delayed callables
  938. results = Parallel(n_jobs=2, backend=backend)(
  939. delayed(lambda x: x ** 2)(i) for i in range(10))
  940. assert results == [i ** 2 for i in range(10)]
  941. def test_delayed_check_pickle_deprecated():
  942. class UnpicklableCallable(object):
  943. def __call__(self, *args, **kwargs):
  944. return 42
  945. def __reduce__(self):
  946. raise ValueError()
  947. with warns(DeprecationWarning):
  948. f, args, kwargs = delayed(lambda x: 42, check_pickle=False)('a')
  949. assert f('a') == 42
  950. assert args == ('a',)
  951. assert kwargs == dict()
  952. with warns(DeprecationWarning):
  953. f, args, kwargs = delayed(UnpicklableCallable(),
  954. check_pickle=False)('a', option='b')
  955. assert f('a', option='b') == 42
  956. assert args == ('a',)
  957. assert kwargs == dict(option='b')
  958. with warns(DeprecationWarning):
  959. with raises(ValueError):
  960. delayed(UnpicklableCallable(), check_pickle=True)
  961. @with_multiprocessing
  962. @parametrize('backend', PROCESS_BACKENDS)
  963. def test_backend_batch_statistics_reset(backend):
  964. """Test that a parallel backend correctly resets its batch statistics."""
  965. n_jobs = 2
  966. n_inputs = 500
  967. task_time = 2. / n_inputs
  968. p = Parallel(verbose=10, n_jobs=n_jobs, backend=backend)
  969. p(delayed(time.sleep)(task_time) for i in range(n_inputs))
  970. assert (p._backend._effective_batch_size ==
  971. p._backend._DEFAULT_EFFECTIVE_BATCH_SIZE)
  972. assert (p._backend._smoothed_batch_duration ==
  973. p._backend._DEFAULT_SMOOTHED_BATCH_DURATION)
  974. p(delayed(time.sleep)(task_time) for i in range(n_inputs))
  975. assert (p._backend._effective_batch_size ==
  976. p._backend._DEFAULT_EFFECTIVE_BATCH_SIZE)
  977. assert (p._backend._smoothed_batch_duration ==
  978. p._backend._DEFAULT_SMOOTHED_BATCH_DURATION)
  979. def test_backend_hinting_and_constraints():
  980. for n_jobs in [1, 2, -1]:
  981. assert type(Parallel(n_jobs=n_jobs)._backend) == LokyBackend
  982. p = Parallel(n_jobs=n_jobs, prefer='threads')
  983. assert type(p._backend) == ThreadingBackend
  984. p = Parallel(n_jobs=n_jobs, prefer='processes')
  985. assert type(p._backend) == LokyBackend
  986. p = Parallel(n_jobs=n_jobs, require='sharedmem')
  987. assert type(p._backend) == ThreadingBackend
  988. # Explicit backend selection can override backend hinting although it
  989. # is useless to pass a hint when selecting a backend.
  990. p = Parallel(n_jobs=2, backend='loky', prefer='threads')
  991. assert type(p._backend) == LokyBackend
  992. with parallel_backend('loky', n_jobs=2):
  993. # Explicit backend selection by the user with the context manager
  994. # should be respected when combined with backend hints only.
  995. p = Parallel(prefer='threads')
  996. assert type(p._backend) == LokyBackend
  997. assert p.n_jobs == 2
  998. with parallel_backend('loky', n_jobs=2):
  999. # Locally hard-coded n_jobs value is respected.
  1000. p = Parallel(n_jobs=3, prefer='threads')
  1001. assert type(p._backend) == LokyBackend
  1002. assert p.n_jobs == 3
  1003. with parallel_backend('loky', n_jobs=2):
  1004. # Explicit backend selection by the user with the context manager
  1005. # should be ignored when the Parallel call has hard constraints.
  1006. # In this case, the default backend that supports shared mem is
  1007. # used an the default number of processes is used.
  1008. p = Parallel(require='sharedmem')
  1009. assert type(p._backend) == ThreadingBackend
  1010. assert p.n_jobs == 1
  1011. with parallel_backend('loky', n_jobs=2):
  1012. p = Parallel(n_jobs=3, require='sharedmem')
  1013. assert type(p._backend) == ThreadingBackend
  1014. assert p.n_jobs == 3
  1015. def test_backend_hinting_and_constraints_with_custom_backends(capsys):
  1016. # Custom backends can declare that they use threads and have shared memory
  1017. # semantics:
  1018. class MyCustomThreadingBackend(ParallelBackendBase):
  1019. supports_sharedmem = True
  1020. use_threads = True
  1021. def apply_async(self):
  1022. pass
  1023. def effective_n_jobs(self, n_jobs):
  1024. return n_jobs
  1025. with parallel_backend(MyCustomThreadingBackend()):
  1026. p = Parallel(n_jobs=2, prefer='processes') # ignored
  1027. assert type(p._backend) == MyCustomThreadingBackend
  1028. p = Parallel(n_jobs=2, require='sharedmem')
  1029. assert type(p._backend) == MyCustomThreadingBackend
  1030. class MyCustomProcessingBackend(ParallelBackendBase):
  1031. supports_sharedmem = False
  1032. use_threads = False
  1033. def apply_async(self):
  1034. pass
  1035. def effective_n_jobs(self, n_jobs):
  1036. return n_jobs
  1037. with parallel_backend(MyCustomProcessingBackend()):
  1038. p = Parallel(n_jobs=2, prefer='processes')
  1039. assert type(p._backend) == MyCustomProcessingBackend
  1040. out, err = capsys.readouterr()
  1041. assert out == ""
  1042. assert err == ""
  1043. p = Parallel(n_jobs=2, require='sharedmem', verbose=10)
  1044. assert type(p._backend) == ThreadingBackend
  1045. out, err = capsys.readouterr()
  1046. expected = ("Using ThreadingBackend as joblib.Parallel backend "
  1047. "instead of MyCustomProcessingBackend as the latter "
  1048. "does not provide shared memory semantics.")
  1049. assert out.strip() == expected
  1050. assert err == ""
  1051. with raises(ValueError):
  1052. Parallel(backend=MyCustomProcessingBackend(), require='sharedmem')
  1053. def test_invalid_backend_hinting_and_constraints():
  1054. with raises(ValueError):
  1055. Parallel(prefer='invalid')
  1056. with raises(ValueError):
  1057. Parallel(require='invalid')
  1058. with raises(ValueError):
  1059. # It is inconsistent to prefer process-based parallelism while
  1060. # requiring shared memory semantics.
  1061. Parallel(prefer='processes', require='sharedmem')
  1062. # It is inconsistent to ask explictly for a process-based parallelism
  1063. # while requiring shared memory semantics.
  1064. with raises(ValueError):
  1065. Parallel(backend='loky', require='sharedmem')
  1066. with raises(ValueError):
  1067. Parallel(backend='multiprocessing', require='sharedmem')
  1068. def test_global_parallel_backend():
  1069. default = Parallel()._backend
  1070. pb = parallel_backend('threading')
  1071. assert isinstance(Parallel()._backend, ThreadingBackend)
  1072. pb.unregister()
  1073. assert type(Parallel()._backend) is type(default)
  1074. def test_external_backends():
  1075. def register_foo():
  1076. BACKENDS['foo'] = ThreadingBackend
  1077. EXTERNAL_BACKENDS['foo'] = register_foo
  1078. with parallel_backend('foo'):
  1079. assert isinstance(Parallel()._backend, ThreadingBackend)
  1080. def _recursive_backend_info(limit=3, **kwargs):
  1081. """Perform nested parallel calls and introspect the backend on the way"""
  1082. with Parallel(n_jobs=2) as p:
  1083. this_level = [(type(p._backend).__name__, p._backend.nesting_level)]
  1084. if limit == 0:
  1085. return this_level
  1086. results = p(delayed(_recursive_backend_info)(limit=limit - 1, **kwargs)
  1087. for i in range(1))
  1088. return this_level + results[0]
  1089. @with_multiprocessing
  1090. @parametrize('backend', ['loky', 'threading'])
  1091. def test_nested_parallelism_limit(backend):
  1092. with parallel_backend(backend, n_jobs=2):
  1093. backend_types_and_levels = _recursive_backend_info()
  1094. if cpu_count() == 1:
  1095. second_level_backend_type = 'SequentialBackend'
  1096. max_level = 1
  1097. else:
  1098. second_level_backend_type = 'ThreadingBackend'
  1099. max_level = 2
  1100. top_level_backend_type = backend.title() + 'Backend'
  1101. expected_types_and_levels = [
  1102. (top_level_backend_type, 0),
  1103. (second_level_backend_type, 1),
  1104. ('SequentialBackend', max_level),
  1105. ('SequentialBackend', max_level)
  1106. ]
  1107. assert backend_types_and_levels == expected_types_and_levels
  1108. @with_numpy
  1109. @skipif(distributed is None, reason='This test requires dask')
  1110. def test_nested_parallelism_with_dask():
  1111. client = distributed.Client(n_workers=2, threads_per_worker=2) # noqa
  1112. # 10 MB of data as argument to trigger implicit scattering
  1113. data = np.ones(int(1e7), dtype=np.uint8)
  1114. for i in range(2):
  1115. with parallel_backend('dask'):
  1116. backend_types_and_levels = _recursive_backend_info(data=data)
  1117. assert len(backend_types_and_levels) == 4
  1118. assert all(name == 'DaskDistributedBackend'
  1119. for name, _ in backend_types_and_levels)
  1120. # No argument
  1121. with parallel_backend('dask'):
  1122. backend_types_and_levels = _recursive_backend_info()
  1123. assert len(backend_types_and_levels) == 4
  1124. assert all(name == 'DaskDistributedBackend'
  1125. for name, _ in backend_types_and_levels)
  1126. def _recursive_parallel(nesting_limit=None):
  1127. """A horrible function that does recursive parallel calls"""
  1128. return Parallel()(delayed(_recursive_parallel)() for i in range(2))
  1129. @parametrize('backend', ['loky', 'threading'])
  1130. def test_thread_bomb_mitigation(backend):
  1131. # Test that recursive parallelism raises a recursion rather than
  1132. # saturating the operating system resources by creating a unbounded number
  1133. # of threads.
  1134. with parallel_backend(backend, n_jobs=2):
  1135. with raises(BaseException) as excinfo:
  1136. _recursive_parallel()
  1137. exc = excinfo.value
  1138. if backend == "loky" and isinstance(exc, TerminatedWorkerError):
  1139. # The recursion exception can itself cause an error when pickling it to
  1140. # be send back to the parent process. In this case the worker crashes
  1141. # but the original traceback is still printed on stderr. This could be
  1142. # improved but does not seem simple to do and this is is not critical
  1143. # for users (as long as there is no process or thread bomb happening).
  1144. pytest.xfail("Loky worker crash when serializing RecursionError")
  1145. else:
  1146. assert isinstance(exc, RecursionError)
  1147. def _run_parallel_sum():
  1148. env_vars = {}
  1149. for var in ['OMP_NUM_THREADS', 'OPENBLAS_NUM_THREADS', 'MKL_NUM_THREADS',
  1150. 'VECLIB_MAXIMUM_THREADS', 'NUMEXPR_NUM_THREADS',
  1151. 'NUMBA_NUM_THREADS', 'ENABLE_IPC']:
  1152. env_vars[var] = os.environ.get(var)
  1153. return env_vars, parallel_sum(100)
  1154. @parametrize("backend", [None, 'loky'])
  1155. @skipif(parallel_sum is None, reason="Need OpenMP helper compiled")
  1156. def test_parallel_thread_limit(backend):
  1157. results = Parallel(n_jobs=2, backend=backend)(
  1158. delayed(_run_parallel_sum)() for _ in range(2)
  1159. )
  1160. expected_num_threads = max(cpu_count() // 2, 1)
  1161. for worker_env_vars, omp_num_threads in results:
  1162. assert omp_num_threads == expected_num_threads
  1163. for name, value in worker_env_vars.items():
  1164. if name.endswith("_THREADS"):
  1165. assert value == str(expected_num_threads)
  1166. else:
  1167. assert name == "ENABLE_IPC"
  1168. assert value == "1"
  1169. @skipif(distributed is not None,
  1170. reason='This test requires dask NOT installed')
  1171. def test_dask_backend_when_dask_not_installed():
  1172. with raises(ValueError, match='Please install dask'):
  1173. parallel_backend('dask')
  1174. def test_zero_worker_backend():
  1175. # joblib.Parallel should reject with an explicit error message parallel
  1176. # backends that have no worker.
  1177. class ZeroWorkerBackend(ThreadingBackend):
  1178. def configure(self, *args, **kwargs):
  1179. return 0
  1180. def apply_async(self, func, callback=None): # pragma: no cover
  1181. raise TimeoutError("No worker available")
  1182. def effective_n_jobs(self, n_jobs): # pragma: no cover
  1183. return 0
  1184. expected_msg = "ZeroWorkerBackend has no active worker"
  1185. with parallel_backend(ZeroWorkerBackend()):
  1186. with pytest.raises(RuntimeError, match=expected_msg):
  1187. Parallel(n_jobs=2)(delayed(id)(i) for i in range(2))
  1188. def test_globals_update_at_each_parallel_call():
  1189. # This is a non-regression test related to joblib issues #836 and #833.
  1190. # Cloudpickle versions between 0.5.4 and 0.7 introduced a bug where global
  1191. # variables changes in a parent process between two calls to
  1192. # joblib.Parallel would not be propagated into the workers.
  1193. global MY_GLOBAL_VARIABLE
  1194. MY_GLOBAL_VARIABLE = "original value"
  1195. def check_globals():
  1196. global MY_GLOBAL_VARIABLE
  1197. return MY_GLOBAL_VARIABLE
  1198. assert check_globals() == "original value"
  1199. workers_global_variable = Parallel(n_jobs=2)(
  1200. delayed(check_globals)() for i in range(2))
  1201. assert set(workers_global_variable) == {"original value"}
  1202. # Change the value of MY_GLOBAL_VARIABLE, and make sure this change gets
  1203. # propagated into the workers environment
  1204. MY_GLOBAL_VARIABLE = "changed value"
  1205. assert check_globals() == "changed value"
  1206. workers_global_variable = Parallel(n_jobs=2)(
  1207. delayed(check_globals)() for i in range(2))
  1208. assert set(workers_global_variable) == {"changed value"}
  1209. ##############################################################################
  1210. # Test environment variable in child env, in particular for limiting
  1211. # the maximal number of threads in C-library threadpools.
  1212. #
  1213. def _check_numpy_threadpool_limits():
  1214. import numpy as np
  1215. # Let's call BLAS on a Matrix Matrix multiplication with dimensions large
  1216. # enough to ensure that the threadpool managed by the underlying BLAS
  1217. # implementation is actually used so as to force its initialization.
  1218. a = np.random.randn(100, 100)
  1219. np.dot(a, a)
  1220. from threadpoolctl import threadpool_info
  1221. return threadpool_info()
  1222. def _parent_max_num_threads_for(child_module, parent_info):
  1223. for parent_module in parent_info:
  1224. if parent_module['filepath'] == child_module['filepath']:
  1225. return parent_module['num_threads']
  1226. raise ValueError("An unexpected module was loaded in child:\n{}"
  1227. .format(child_module))
  1228. def check_child_num_threads(workers_info, parent_info, num_threads):
  1229. # Check that the number of threads reported in workers_info is consistent
  1230. # with the expectation. We need to be carefull to handle the cases where
  1231. # the requested number of threads is below max_num_thread for the library.
  1232. for child_threadpool_info in workers_info:
  1233. for child_module in child_threadpool_info:
  1234. parent_max_num_threads = _parent_max_num_threads_for(
  1235. child_module, parent_info)
  1236. expected = {min(num_threads, parent_max_num_threads), num_threads}
  1237. assert child_module['num_threads'] in expected
  1238. @with_numpy
  1239. @with_multiprocessing
  1240. @parametrize('n_jobs', [2, 4, -2, -1])
  1241. def test_threadpool_limitation_in_child(n_jobs):
  1242. # Check that the protection against oversubscription in workers is working
  1243. # using threadpoolctl functionalities.
  1244. # Skip this test if numpy is not linked to a BLAS library
  1245. parent_info = _check_numpy_threadpool_limits()
  1246. if len(parent_info) == 0:
  1247. pytest.skip(msg="Need a version of numpy linked to BLAS")
  1248. workers_threadpool_infos = Parallel(n_jobs=n_jobs)(
  1249. delayed(_check_numpy_threadpool_limits)() for i in range(2))
  1250. n_jobs = effective_n_jobs(n_jobs)
  1251. expected_child_num_threads = max(cpu_count() // n_jobs, 1)
  1252. check_child_num_threads(workers_threadpool_infos, parent_info,
  1253. expected_child_num_threads)
  1254. @with_numpy
  1255. @with_multiprocessing
  1256. @parametrize('inner_max_num_threads', [1, 2, 4, None])
  1257. @parametrize('n_jobs', [2, -1])
  1258. def test_threadpool_limitation_in_child_context(n_jobs, inner_max_num_threads):
  1259. # Check that the protection against oversubscription in workers is working
  1260. # using threadpoolctl functionalities.
  1261. # Skip this test if numpy is not linked to a BLAS library
  1262. parent_info = _check_numpy_threadpool_limits()
  1263. if len(parent_info) == 0:
  1264. pytest.skip(msg="Need a version of numpy linked to BLAS")
  1265. with parallel_backend('loky', inner_max_num_threads=inner_max_num_threads):
  1266. workers_threadpool_infos = Parallel(n_jobs=n_jobs)(
  1267. delayed(_check_numpy_threadpool_limits)() for i in range(2))
  1268. n_jobs = effective_n_jobs(n_jobs)
  1269. if inner_max_num_threads is None:
  1270. expected_child_num_threads = max(cpu_count() // n_jobs, 1)
  1271. else:
  1272. expected_child_num_threads = inner_max_num_threads
  1273. check_child_num_threads(workers_threadpool_infos, parent_info,
  1274. expected_child_num_threads)
  1275. @with_multiprocessing
  1276. @parametrize('n_jobs', [2, -1])
  1277. @parametrize('var_name', ["OPENBLAS_NUM_THREADS",
  1278. "MKL_NUM_THREADS",
  1279. "OMP_NUM_THREADS"])
  1280. def test_threadpool_limitation_in_child_override(n_jobs, var_name):
  1281. # Check that environment variables set by the user on the main process
  1282. # always have the priority.
  1283. # Clean up the existing executor because we change the environment of the
  1284. # parent at runtime and it is not detected in loky intentionally.
  1285. get_reusable_executor(reuse=True).shutdown()
  1286. def _get_env(var_name):
  1287. return os.environ.get(var_name)
  1288. original_var_value = os.environ.get(var_name)
  1289. try:
  1290. os.environ[var_name] = "4"
  1291. # Skip this test if numpy is not linked to a BLAS library
  1292. results = Parallel(n_jobs=n_jobs)(
  1293. delayed(_get_env)(var_name) for i in range(2))
  1294. assert results == ["4", "4"]
  1295. with parallel_backend('loky', inner_max_num_threads=1):
  1296. results = Parallel(n_jobs=n_jobs)(
  1297. delayed(_get_env)(var_name) for i in range(2))
  1298. assert results == ["1", "1"]
  1299. finally:
  1300. if original_var_value is None:
  1301. del os.environ[var_name]
  1302. else:
  1303. os.environ[var_name] = original_var_value
  1304. @with_numpy
  1305. @with_multiprocessing
  1306. @parametrize('backend', ['multiprocessing', 'threading',
  1307. MultiprocessingBackend(), ThreadingBackend()])
  1308. def test_threadpool_limitation_in_child_context_error(backend):
  1309. with raises(AssertionError, match=r"does not acc.*inner_max_num_threads"):
  1310. parallel_backend(backend, inner_max_num_threads=1)
  1311. @with_multiprocessing
  1312. @parametrize('n_jobs', [2, 4, -1])
  1313. def test_loky_reuse_workers(n_jobs):
  1314. # Non-regression test for issue #967 where the workers are not reused when
  1315. # calling multiple Parallel loops.
  1316. def parallel_call(n_jobs):
  1317. x = range(10)
  1318. Parallel(n_jobs=n_jobs)(delayed(sum)(x) for i in range(10))
  1319. # Run a parallel loop and get the workers used for computations
  1320. parallel_call(n_jobs)
  1321. first_executor = get_reusable_executor(reuse=True)
  1322. # Ensure that the workers are reused for the next calls, as the executor is
  1323. # not restarted.
  1324. for _ in range(10):
  1325. parallel_call(n_jobs)
  1326. executor = get_reusable_executor(reuse=True)
  1327. assert executor == first_executor