cloudpickle_fast.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. """
  2. New, fast version of the CloudPickler.
  3. This new CloudPickler class can now extend the fast C Pickler instead of the
  4. previous Python implementation of the Pickler class. Because this functionality
  5. is only available for Python versions 3.8+, a lot of backward-compatibility
  6. code is also removed.
  7. Note that the C Pickler sublassing API is CPython-specific. Therefore, some
  8. guards present in cloudpickle.py that were written to handle PyPy specificities
  9. are not present in cloudpickle_fast.py
  10. """
  11. import abc
  12. import copyreg
  13. import io
  14. import itertools
  15. import logging
  16. import _pickle
  17. import pickle
  18. import sys
  19. import types
  20. import weakref
  21. import typing
  22. from _pickle import Pickler
  23. from .cloudpickle import (
  24. _is_dynamic, _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL,
  25. _find_imported_submodules, _get_cell_contents, _is_importable_by_name, _builtin_type,
  26. Enum, _get_or_create_tracker_id, _make_skeleton_class, _make_skeleton_enum,
  27. _extract_class_dict, dynamic_subimport, subimport, _typevar_reduce, _get_bases,
  28. )
  29. load, loads = _pickle.load, _pickle.loads
  30. # Shorthands similar to pickle.dump/pickle.dumps
  31. def dump(obj, file, protocol=None, buffer_callback=None):
  32. """Serialize obj as bytes streamed into file
  33. protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
  34. pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed
  35. between processes running the same Python version.
  36. Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
  37. compatibility with older versions of Python.
  38. """
  39. CloudPickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj)
  40. def dumps(obj, protocol=None, buffer_callback=None):
  41. """Serialize obj as a string of bytes allocated in memory
  42. protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
  43. pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed
  44. between processes running the same Python version.
  45. Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
  46. compatibility with older versions of Python.
  47. """
  48. with io.BytesIO() as file:
  49. cp = CloudPickler(file, protocol=protocol, buffer_callback=buffer_callback)
  50. cp.dump(obj)
  51. return file.getvalue()
  52. # COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS
  53. # -------------------------------------------------
  54. def _class_getnewargs(obj):
  55. type_kwargs = {}
  56. if "__slots__" in obj.__dict__:
  57. type_kwargs["__slots__"] = obj.__slots__
  58. __dict__ = obj.__dict__.get('__dict__', None)
  59. if isinstance(__dict__, property):
  60. type_kwargs['__dict__'] = __dict__
  61. return (type(obj), obj.__name__, _get_bases(obj), type_kwargs,
  62. _get_or_create_tracker_id(obj), None)
  63. def _enum_getnewargs(obj):
  64. members = dict((e.name, e.value) for e in obj)
  65. return (obj.__bases__, obj.__name__, obj.__qualname__, members,
  66. obj.__module__, _get_or_create_tracker_id(obj), None)
  67. # COLLECTION OF OBJECTS RECONSTRUCTORS
  68. # ------------------------------------
  69. def _file_reconstructor(retval):
  70. return retval
  71. # COLLECTION OF OBJECTS STATE GETTERS
  72. # -----------------------------------
  73. def _function_getstate(func):
  74. # - Put func's dynamic attributes (stored in func.__dict__) in state. These
  75. # attributes will be restored at unpickling time using
  76. # f.__dict__.update(state)
  77. # - Put func's members into slotstate. Such attributes will be restored at
  78. # unpickling time by iterating over slotstate and calling setattr(func,
  79. # slotname, slotvalue)
  80. slotstate = {
  81. "__name__": func.__name__,
  82. "__qualname__": func.__qualname__,
  83. "__annotations__": func.__annotations__,
  84. "__kwdefaults__": func.__kwdefaults__,
  85. "__defaults__": func.__defaults__,
  86. "__module__": func.__module__,
  87. "__doc__": func.__doc__,
  88. "__closure__": func.__closure__,
  89. }
  90. f_globals_ref = _extract_code_globals(func.__code__)
  91. f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in
  92. func.__globals__}
  93. closure_values = (
  94. list(map(_get_cell_contents, func.__closure__))
  95. if func.__closure__ is not None else ()
  96. )
  97. # Extract currently-imported submodules used by func. Storing these modules
  98. # in a smoke _cloudpickle_subimports attribute of the object's state will
  99. # trigger the side effect of importing these modules at unpickling time
  100. # (which is necessary for func to work correctly once depickled)
  101. slotstate["_cloudpickle_submodules"] = _find_imported_submodules(
  102. func.__code__, itertools.chain(f_globals.values(), closure_values))
  103. slotstate["__globals__"] = f_globals
  104. state = func.__dict__
  105. return state, slotstate
  106. def _class_getstate(obj):
  107. clsdict = _extract_class_dict(obj)
  108. clsdict.pop('__weakref__', None)
  109. if issubclass(type(obj), abc.ABCMeta):
  110. # If obj is an instance of an ABCMeta subclass, dont pickle the
  111. # cache/negative caches populated during isinstance/issubclass
  112. # checks, but pickle the list of registered subclasses of obj.
  113. clsdict.pop('_abc_impl', None)
  114. (registry, _, _, _) = abc._get_dump(obj)
  115. clsdict["_abc_impl"] = [subclass_weakref()
  116. for subclass_weakref in registry]
  117. if "__slots__" in clsdict:
  118. # pickle string length optimization: member descriptors of obj are
  119. # created automatically from obj's __slots__ attribute, no need to
  120. # save them in obj's state
  121. if isinstance(obj.__slots__, str):
  122. clsdict.pop(obj.__slots__)
  123. else:
  124. for k in obj.__slots__:
  125. clsdict.pop(k, None)
  126. clsdict.pop('__dict__', None) # unpicklable property object
  127. return (clsdict, {})
  128. def _enum_getstate(obj):
  129. clsdict, slotstate = _class_getstate(obj)
  130. members = dict((e.name, e.value) for e in obj)
  131. # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class:
  132. # Those attributes are already handled by the metaclass.
  133. for attrname in ["_generate_next_value_", "_member_names_",
  134. "_member_map_", "_member_type_",
  135. "_value2member_map_"]:
  136. clsdict.pop(attrname, None)
  137. for member in members:
  138. clsdict.pop(member)
  139. # Special handling of Enum subclasses
  140. return clsdict, slotstate
  141. # COLLECTIONS OF OBJECTS REDUCERS
  142. # -------------------------------
  143. # A reducer is a function taking a single argument (obj), and that returns a
  144. # tuple with all the necessary data to re-construct obj. Apart from a few
  145. # exceptions (list, dict, bytes, int, etc.), a reducer is necessary to
  146. # correctly pickle an object.
  147. # While many built-in objects (Exceptions objects, instances of the "object"
  148. # class, etc), are shipped with their own built-in reducer (invoked using
  149. # obj.__reduce__), some do not. The following methods were created to "fill
  150. # these holes".
  151. def _code_reduce(obj):
  152. """codeobject reducer"""
  153. args = (
  154. obj.co_argcount, obj.co_posonlyargcount,
  155. obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
  156. obj.co_flags, obj.co_code, obj.co_consts, obj.co_names,
  157. obj.co_varnames, obj.co_filename, obj.co_name,
  158. obj.co_firstlineno, obj.co_lnotab, obj.co_freevars,
  159. obj.co_cellvars
  160. )
  161. return types.CodeType, args
  162. def _cell_reduce(obj):
  163. """Cell (containing values of a function's free variables) reducer"""
  164. try:
  165. obj.cell_contents
  166. except ValueError: # cell is empty
  167. return types.CellType, ()
  168. else:
  169. return types.CellType, (obj.cell_contents,)
  170. def _classmethod_reduce(obj):
  171. orig_func = obj.__func__
  172. return type(obj), (orig_func,)
  173. def _file_reduce(obj):
  174. """Save a file"""
  175. import io
  176. if not hasattr(obj, "name") or not hasattr(obj, "mode"):
  177. raise pickle.PicklingError(
  178. "Cannot pickle files that do not map to an actual file"
  179. )
  180. if obj is sys.stdout:
  181. return getattr, (sys, "stdout")
  182. if obj is sys.stderr:
  183. return getattr, (sys, "stderr")
  184. if obj is sys.stdin:
  185. raise pickle.PicklingError("Cannot pickle standard input")
  186. if obj.closed:
  187. raise pickle.PicklingError("Cannot pickle closed files")
  188. if hasattr(obj, "isatty") and obj.isatty():
  189. raise pickle.PicklingError(
  190. "Cannot pickle files that map to tty objects"
  191. )
  192. if "r" not in obj.mode and "+" not in obj.mode:
  193. raise pickle.PicklingError(
  194. "Cannot pickle files that are not opened for reading: %s"
  195. % obj.mode
  196. )
  197. name = obj.name
  198. retval = io.StringIO()
  199. try:
  200. # Read the whole file
  201. curloc = obj.tell()
  202. obj.seek(0)
  203. contents = obj.read()
  204. obj.seek(curloc)
  205. except IOError:
  206. raise pickle.PicklingError(
  207. "Cannot pickle file %s as it cannot be read" % name
  208. )
  209. retval.write(contents)
  210. retval.seek(curloc)
  211. retval.name = name
  212. return _file_reconstructor, (retval,)
  213. def _getset_descriptor_reduce(obj):
  214. return getattr, (obj.__objclass__, obj.__name__)
  215. def _mappingproxy_reduce(obj):
  216. return types.MappingProxyType, (dict(obj),)
  217. def _memoryview_reduce(obj):
  218. return bytes, (obj.tobytes(),)
  219. def _module_reduce(obj):
  220. if _is_dynamic(obj):
  221. obj.__dict__.pop('__builtins__', None)
  222. return dynamic_subimport, (obj.__name__, vars(obj))
  223. else:
  224. return subimport, (obj.__name__,)
  225. def _method_reduce(obj):
  226. return (types.MethodType, (obj.__func__, obj.__self__))
  227. def _logger_reduce(obj):
  228. return logging.getLogger, (obj.name,)
  229. def _root_logger_reduce(obj):
  230. return logging.getLogger, ()
  231. def _property_reduce(obj):
  232. return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__)
  233. def _weakset_reduce(obj):
  234. return weakref.WeakSet, (list(obj),)
  235. def _dynamic_class_reduce(obj):
  236. """
  237. Save a class that can't be stored as module global.
  238. This method is used to serialize classes that are defined inside
  239. functions, or that otherwise can't be serialized as attribute lookups
  240. from global modules.
  241. """
  242. if Enum is not None and issubclass(obj, Enum):
  243. return (
  244. _make_skeleton_enum, _enum_getnewargs(obj), _enum_getstate(obj),
  245. None, None, _class_setstate
  246. )
  247. else:
  248. return (
  249. _make_skeleton_class, _class_getnewargs(obj), _class_getstate(obj),
  250. None, None, _class_setstate
  251. )
  252. def _class_reduce(obj):
  253. """Select the reducer depending on the dynamic nature of the class obj"""
  254. if obj is type(None): # noqa
  255. return type, (None,)
  256. elif obj is type(Ellipsis):
  257. return type, (Ellipsis,)
  258. elif obj is type(NotImplemented):
  259. return type, (NotImplemented,)
  260. elif obj in _BUILTIN_TYPE_NAMES:
  261. return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],)
  262. elif not _is_importable_by_name(obj):
  263. return _dynamic_class_reduce(obj)
  264. return NotImplemented
  265. # COLLECTIONS OF OBJECTS STATE SETTERS
  266. # ------------------------------------
  267. # state setters are called at unpickling time, once the object is created and
  268. # it has to be updated to how it was at unpickling time.
  269. def _function_setstate(obj, state):
  270. """Update the state of a dynaamic function.
  271. As __closure__ and __globals__ are readonly attributes of a function, we
  272. cannot rely on the native setstate routine of pickle.load_build, that calls
  273. setattr on items of the slotstate. Instead, we have to modify them inplace.
  274. """
  275. state, slotstate = state
  276. obj.__dict__.update(state)
  277. obj_globals = slotstate.pop("__globals__")
  278. obj_closure = slotstate.pop("__closure__")
  279. # _cloudpickle_subimports is a set of submodules that must be loaded for
  280. # the pickled function to work correctly at unpickling time. Now that these
  281. # submodules are depickled (hence imported), they can be removed from the
  282. # object's state (the object state only served as a reference holder to
  283. # these submodules)
  284. slotstate.pop("_cloudpickle_submodules")
  285. obj.__globals__.update(obj_globals)
  286. obj.__globals__["__builtins__"] = __builtins__
  287. if obj_closure is not None:
  288. for i, cell in enumerate(obj_closure):
  289. try:
  290. value = cell.cell_contents
  291. except ValueError: # cell is empty
  292. continue
  293. obj.__closure__[i].cell_contents = value
  294. for k, v in slotstate.items():
  295. setattr(obj, k, v)
  296. def _class_setstate(obj, state):
  297. state, slotstate = state
  298. registry = None
  299. for attrname, attr in state.items():
  300. if attrname == "_abc_impl":
  301. registry = attr
  302. else:
  303. setattr(obj, attrname, attr)
  304. if registry is not None:
  305. for subclass in registry:
  306. obj.register(subclass)
  307. return obj
  308. class CloudPickler(Pickler):
  309. """Fast C Pickler extension with additional reducing routines.
  310. CloudPickler's extensions exist into into:
  311. * its dispatch_table containing reducers that are called only if ALL
  312. built-in saving functions were previously discarded.
  313. * a special callback named "reducer_override", invoked before standard
  314. function/class builtin-saving method (save_global), to serialize dynamic
  315. functions
  316. """
  317. # cloudpickle's own dispatch_table, containing the additional set of
  318. # objects (compared to the standard library pickle) that cloupickle can
  319. # serialize.
  320. dispatch = {}
  321. dispatch[classmethod] = _classmethod_reduce
  322. dispatch[io.TextIOWrapper] = _file_reduce
  323. dispatch[logging.Logger] = _logger_reduce
  324. dispatch[logging.RootLogger] = _root_logger_reduce
  325. dispatch[memoryview] = _memoryview_reduce
  326. dispatch[property] = _property_reduce
  327. dispatch[staticmethod] = _classmethod_reduce
  328. dispatch[types.CellType] = _cell_reduce
  329. dispatch[types.CodeType] = _code_reduce
  330. dispatch[types.GetSetDescriptorType] = _getset_descriptor_reduce
  331. dispatch[types.ModuleType] = _module_reduce
  332. dispatch[types.MethodType] = _method_reduce
  333. dispatch[types.MappingProxyType] = _mappingproxy_reduce
  334. dispatch[weakref.WeakSet] = _weakset_reduce
  335. dispatch[typing.TypeVar] = _typevar_reduce
  336. def __init__(self, file, protocol=None, buffer_callback=None):
  337. if protocol is None:
  338. protocol = DEFAULT_PROTOCOL
  339. Pickler.__init__(self, file, protocol=protocol, buffer_callback=buffer_callback)
  340. # map functions __globals__ attribute ids, to ensure that functions
  341. # sharing the same global namespace at pickling time also share their
  342. # global namespace at unpickling time.
  343. self.globals_ref = {}
  344. # Take into account potential custom reducers registered by external
  345. # modules
  346. self.dispatch_table = copyreg.dispatch_table.copy()
  347. self.dispatch_table.update(self.dispatch)
  348. self.proto = int(protocol)
  349. def reducer_override(self, obj):
  350. """Type-agnostic reducing callback for function and classes.
  351. For performance reasons, subclasses of the C _pickle.Pickler class
  352. cannot register custom reducers for functions and classes in the
  353. dispatch_table. Reducer for such types must instead implemented in the
  354. special reducer_override method.
  355. Note that method will be called for any object except a few
  356. builtin-types (int, lists, dicts etc.), which differs from reducers in
  357. the Pickler's dispatch_table, each of them being invoked for objects of
  358. a specific type only.
  359. This property comes in handy for classes: although most classes are
  360. instances of the ``type`` metaclass, some of them can be instances of
  361. other custom metaclasses (such as enum.EnumMeta for example). In
  362. particular, the metaclass will likely not be known in advance, and thus
  363. cannot be special-cased using an entry in the dispatch_table.
  364. reducer_override, among other things, allows us to register a reducer
  365. that will be called for any class, independently of its type.
  366. Notes:
  367. * reducer_override has the priority over dispatch_table-registered
  368. reducers.
  369. * reducer_override can be used to fix other limitations of cloudpickle
  370. for other types that suffered from type-specific reducers, such as
  371. Exceptions. See https://github.com/cloudpipe/cloudpickle/issues/248
  372. """
  373. t = type(obj)
  374. try:
  375. is_anyclass = issubclass(t, type)
  376. except TypeError: # t is not a class (old Boost; see SF #502085)
  377. is_anyclass = False
  378. if is_anyclass:
  379. return _class_reduce(obj)
  380. elif isinstance(obj, types.FunctionType):
  381. return self._function_reduce(obj)
  382. else:
  383. # fallback to save_global, including the Pickler's distpatch_table
  384. return NotImplemented
  385. # function reducers are defined as instance methods of CloudPickler
  386. # objects, as they rely on a CloudPickler attribute (globals_ref)
  387. def _dynamic_function_reduce(self, func):
  388. """Reduce a function that is not pickleable via attribute lookup."""
  389. newargs = self._function_getnewargs(func)
  390. state = _function_getstate(func)
  391. return (types.FunctionType, newargs, state, None, None,
  392. _function_setstate)
  393. def _function_reduce(self, obj):
  394. """Reducer for function objects.
  395. If obj is a top-level attribute of a file-backed module, this
  396. reducer returns NotImplemented, making the CloudPickler fallback to
  397. traditional _pickle.Pickler routines to save obj. Otherwise, it reduces
  398. obj using a custom cloudpickle reducer designed specifically to handle
  399. dynamic functions.
  400. As opposed to cloudpickle.py, There no special handling for builtin
  401. pypy functions because cloudpickle_fast is CPython-specific.
  402. """
  403. if _is_importable_by_name(obj):
  404. return NotImplemented
  405. else:
  406. return self._dynamic_function_reduce(obj)
  407. def _function_getnewargs(self, func):
  408. code = func.__code__
  409. # base_globals represents the future global namespace of func at
  410. # unpickling time. Looking it up and storing it in
  411. # CloudpiPickler.globals_ref allow functions sharing the same globals
  412. # at pickling time to also share them once unpickled, at one condition:
  413. # since globals_ref is an attribute of a CloudPickler instance, and
  414. # that a new CloudPickler is created each time pickle.dump or
  415. # pickle.dumps is called, functions also need to be saved within the
  416. # same invocation of cloudpickle.dump/cloudpickle.dumps (for example:
  417. # cloudpickle.dumps([f1, f2])). There is no such limitation when using
  418. # CloudPickler.dump, as long as the multiple invocations are bound to
  419. # the same CloudPickler.
  420. base_globals = self.globals_ref.setdefault(id(func.__globals__), {})
  421. if base_globals == {}:
  422. # Add module attributes used to resolve relative imports
  423. # instructions inside func.
  424. for k in ["__package__", "__name__", "__path__", "__file__"]:
  425. if k in func.__globals__:
  426. base_globals[k] = func.__globals__[k]
  427. # Do not bind the free variables before the function is created to
  428. # avoid infinite recursion.
  429. if func.__closure__ is None:
  430. closure = None
  431. else:
  432. closure = tuple(
  433. types.CellType() for _ in range(len(code.co_freevars)))
  434. return code, base_globals, None, None, closure
  435. def dump(self, obj):
  436. try:
  437. return Pickler.dump(self, obj)
  438. except RuntimeError as e:
  439. if "recursion" in e.args[0]:
  440. msg = (
  441. "Could not pickle object as excessively deep recursion "
  442. "required."
  443. )
  444. raise pickle.PicklingError(msg)
  445. else:
  446. raise