|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
Pickle and restore the intepreter session. |
|
""" |
|
|
|
__all__ = [ |
|
'dump_module', 'load_module', 'load_module_asdict', |
|
'dump_session', 'load_session' |
|
] |
|
|
|
import re |
|
import os |
|
import sys |
|
import warnings |
|
|
|
from dill import _dill, Pickler, Unpickler |
|
from ._dill import ( |
|
BuiltinMethodType, FunctionType, MethodType, ModuleType, TypeType, |
|
_import_module, _is_builtin_module, _is_imported_module, _main_module, |
|
_reverse_typemap, __builtin__, |
|
) |
|
|
|
|
|
from typing import Optional, Union |
|
|
|
import pathlib |
|
import tempfile |
|
|
|
TEMPDIR = pathlib.PurePath(tempfile.gettempdir()) |
|
|
|
def _module_map(): |
|
"""get map of imported modules""" |
|
from collections import defaultdict |
|
from types import SimpleNamespace |
|
modmap = SimpleNamespace( |
|
by_name=defaultdict(list), |
|
by_id=defaultdict(list), |
|
top_level={}, |
|
) |
|
for modname, module in sys.modules.items(): |
|
if modname in ('__main__', '__mp_main__') or not isinstance(module, ModuleType): |
|
continue |
|
if '.' not in modname: |
|
modmap.top_level[id(module)] = modname |
|
for objname, modobj in module.__dict__.items(): |
|
modmap.by_name[objname].append((modobj, modname)) |
|
modmap.by_id[id(modobj)].append((modobj, objname, modname)) |
|
return modmap |
|
|
|
IMPORTED_AS_TYPES = (ModuleType, TypeType, FunctionType, MethodType, BuiltinMethodType) |
|
if 'PyCapsuleType' in _reverse_typemap: |
|
IMPORTED_AS_TYPES += (_reverse_typemap['PyCapsuleType'],) |
|
IMPORTED_AS_MODULES = ('ctypes', 'typing', 'subprocess', 'threading', |
|
r'concurrent\.futures(\.\w+)?', r'multiprocessing(\.\w+)?') |
|
IMPORTED_AS_MODULES = tuple(re.compile(x) for x in IMPORTED_AS_MODULES) |
|
|
|
def _lookup_module(modmap, name, obj, main_module): |
|
"""lookup name or id of obj if module is imported""" |
|
for modobj, modname in modmap.by_name[name]: |
|
if modobj is obj and sys.modules[modname] is not main_module: |
|
return modname, name |
|
__module__ = getattr(obj, '__module__', None) |
|
if isinstance(obj, IMPORTED_AS_TYPES) or (__module__ is not None |
|
and any(regex.fullmatch(__module__) for regex in IMPORTED_AS_MODULES)): |
|
for modobj, objname, modname in modmap.by_id[id(obj)]: |
|
if sys.modules[modname] is not main_module: |
|
return modname, objname |
|
return None, None |
|
|
|
def _stash_modules(main_module): |
|
modmap = _module_map() |
|
newmod = ModuleType(main_module.__name__) |
|
|
|
imported = [] |
|
imported_as = [] |
|
imported_top_level = [] |
|
original = {} |
|
for name, obj in main_module.__dict__.items(): |
|
if obj is main_module: |
|
original[name] = newmod |
|
elif obj is main_module.__dict__: |
|
original[name] = newmod.__dict__ |
|
|
|
elif any(obj is singleton for singleton in (None, False, True)) \ |
|
or isinstance(obj, ModuleType) and _is_builtin_module(obj): |
|
original[name] = obj |
|
else: |
|
source_module, objname = _lookup_module(modmap, name, obj, main_module) |
|
if source_module is not None: |
|
if objname == name: |
|
imported.append((source_module, name)) |
|
else: |
|
imported_as.append((source_module, objname, name)) |
|
else: |
|
try: |
|
imported_top_level.append((modmap.top_level[id(obj)], name)) |
|
except KeyError: |
|
original[name] = obj |
|
|
|
if len(original) < len(main_module.__dict__): |
|
newmod.__dict__.update(original) |
|
newmod.__dill_imported = imported |
|
newmod.__dill_imported_as = imported_as |
|
newmod.__dill_imported_top_level = imported_top_level |
|
if getattr(newmod, '__loader__', None) is None and _is_imported_module(main_module): |
|
|
|
newmod.__loader__ = True |
|
return newmod |
|
else: |
|
return main_module |
|
|
|
def _restore_modules(unpickler, main_module): |
|
try: |
|
for modname, name in main_module.__dict__.pop('__dill_imported'): |
|
main_module.__dict__[name] = unpickler.find_class(modname, name) |
|
for modname, objname, name in main_module.__dict__.pop('__dill_imported_as'): |
|
main_module.__dict__[name] = unpickler.find_class(modname, objname) |
|
for modname, name in main_module.__dict__.pop('__dill_imported_top_level'): |
|
main_module.__dict__[name] = __import__(modname) |
|
except KeyError: |
|
pass |
|
|
|
|
|
def dump_module( |
|
filename: Union[str, os.PathLike] = None, |
|
module: Optional[Union[ModuleType, str]] = None, |
|
refimported: bool = False, |
|
**kwds |
|
) -> None: |
|
"""Pickle the current state of :py:mod:`__main__` or another module to a file. |
|
|
|
Save the contents of :py:mod:`__main__` (e.g. from an interactive |
|
interpreter session), an imported module, or a module-type object (e.g. |
|
built with :py:class:`~types.ModuleType`), to a file. The pickled |
|
module can then be restored with the function :py:func:`load_module`. |
|
|
|
Args: |
|
filename: a path-like object or a writable stream. If `None` |
|
(the default), write to a named file in a temporary directory. |
|
module: a module object or the name of an importable module. If `None` |
|
(the default), :py:mod:`__main__` is saved. |
|
refimported: if `True`, all objects identified as having been imported |
|
into the module's namespace are saved by reference. *Note:* this is |
|
similar but independent from ``dill.settings[`byref`]``, as |
|
``refimported`` refers to virtually all imported objects, while |
|
``byref`` only affects select objects. |
|
**kwds: extra keyword arguments passed to :py:class:`Pickler()`. |
|
|
|
Raises: |
|
:py:exc:`PicklingError`: if pickling fails. |
|
|
|
Examples: |
|
|
|
- Save current interpreter session state: |
|
|
|
>>> import dill |
|
>>> squared = lambda x: x*x |
|
>>> dill.dump_module() # save state of __main__ to /tmp/session.pkl |
|
|
|
- Save the state of an imported/importable module: |
|
|
|
>>> import dill |
|
>>> import pox |
|
>>> pox.plus_one = lambda x: x+1 |
|
>>> dill.dump_module('pox_session.pkl', module=pox) |
|
|
|
- Save the state of a non-importable, module-type object: |
|
|
|
>>> import dill |
|
>>> from types import ModuleType |
|
>>> foo = ModuleType('foo') |
|
>>> foo.values = [1,2,3] |
|
>>> import math |
|
>>> foo.sin = math.sin |
|
>>> dill.dump_module('foo_session.pkl', module=foo, refimported=True) |
|
|
|
- Restore the state of the saved modules: |
|
|
|
>>> import dill |
|
>>> dill.load_module() |
|
>>> squared(2) |
|
4 |
|
>>> pox = dill.load_module('pox_session.pkl') |
|
>>> pox.plus_one(1) |
|
2 |
|
>>> foo = dill.load_module('foo_session.pkl') |
|
>>> [foo.sin(x) for x in foo.values] |
|
[0.8414709848078965, 0.9092974268256817, 0.1411200080598672] |
|
|
|
- Use `refimported` to save imported objects by reference: |
|
|
|
>>> import dill |
|
>>> from html.entities import html5 |
|
>>> type(html5), len(html5) |
|
(dict, 2231) |
|
>>> import io |
|
>>> buf = io.BytesIO() |
|
>>> dill.dump_module(buf) # saves __main__, with html5 saved by value |
|
>>> len(buf.getvalue()) # pickle size in bytes |
|
71665 |
|
>>> buf = io.BytesIO() |
|
>>> dill.dump_module(buf, refimported=True) # html5 saved by reference |
|
>>> len(buf.getvalue()) |
|
438 |
|
|
|
*Changed in version 0.3.6:* Function ``dump_session()`` was renamed to |
|
``dump_module()``. Parameters ``main`` and ``byref`` were renamed to |
|
``module`` and ``refimported``, respectively. |
|
|
|
Note: |
|
Currently, ``dill.settings['byref']`` and ``dill.settings['recurse']`` |
|
don't apply to this function. |
|
""" |
|
for old_par, par in [('main', 'module'), ('byref', 'refimported')]: |
|
if old_par in kwds: |
|
message = "The argument %r has been renamed %r" % (old_par, par) |
|
if old_par == 'byref': |
|
message += " to distinguish it from dill.settings['byref']" |
|
warnings.warn(message + ".", PendingDeprecationWarning) |
|
if locals()[par]: |
|
raise TypeError("both %r and %r arguments were used" % (par, old_par)) |
|
refimported = kwds.pop('byref', refimported) |
|
module = kwds.pop('main', module) |
|
|
|
from .settings import settings |
|
protocol = settings['protocol'] |
|
main = module |
|
if main is None: |
|
main = _main_module |
|
elif isinstance(main, str): |
|
main = _import_module(main) |
|
if not isinstance(main, ModuleType): |
|
raise TypeError("%r is not a module" % main) |
|
if hasattr(filename, 'write'): |
|
file = filename |
|
else: |
|
if filename is None: |
|
filename = str(TEMPDIR/'session.pkl') |
|
file = open(filename, 'wb') |
|
try: |
|
pickler = Pickler(file, protocol, **kwds) |
|
pickler._original_main = main |
|
if refimported: |
|
main = _stash_modules(main) |
|
pickler._main = main |
|
pickler._byref = False |
|
pickler._recurse = False |
|
pickler._session = True |
|
pickler._first_pass = True |
|
pickler._main_modified = main is not pickler._original_main |
|
pickler.dump(main) |
|
finally: |
|
if file is not filename: |
|
file.close() |
|
return |
|
|
|
|
|
def dump_session(filename=None, main=None, byref=False, **kwds): |
|
warnings.warn("dump_session() has been renamed dump_module()", PendingDeprecationWarning) |
|
dump_module(filename, module=main, refimported=byref, **kwds) |
|
dump_session.__doc__ = dump_module.__doc__ |
|
|
|
class _PeekableReader: |
|
"""lightweight stream wrapper that implements peek()""" |
|
def __init__(self, stream): |
|
self.stream = stream |
|
def read(self, n): |
|
return self.stream.read(n) |
|
def readline(self): |
|
return self.stream.readline() |
|
def tell(self): |
|
return self.stream.tell() |
|
def close(self): |
|
return self.stream.close() |
|
def peek(self, n): |
|
stream = self.stream |
|
try: |
|
if hasattr(stream, 'flush'): stream.flush() |
|
position = stream.tell() |
|
stream.seek(position) |
|
chunk = stream.read(n) |
|
stream.seek(position) |
|
return chunk |
|
except (AttributeError, OSError): |
|
raise NotImplementedError("stream is not peekable: %r", stream) from None |
|
|
|
def _make_peekable(stream): |
|
"""return stream as an object with a peek() method""" |
|
import io |
|
if hasattr(stream, 'peek'): |
|
return stream |
|
if not (hasattr(stream, 'tell') and hasattr(stream, 'seek')): |
|
try: |
|
return io.BufferedReader(stream) |
|
except Exception: |
|
pass |
|
return _PeekableReader(stream) |
|
|
|
def _identify_module(file, main=None): |
|
"""identify the name of the module stored in the given file-type object""" |
|
from pickletools import genops |
|
UNICODE = {'UNICODE', 'BINUNICODE', 'SHORT_BINUNICODE'} |
|
found_import = False |
|
try: |
|
for opcode, arg, pos in genops(file.peek(256)): |
|
if not found_import: |
|
if opcode.name in ('GLOBAL', 'SHORT_BINUNICODE') and \ |
|
arg.endswith('_import_module'): |
|
found_import = True |
|
else: |
|
if opcode.name in UNICODE: |
|
return arg |
|
else: |
|
raise UnpicklingError("reached STOP without finding main module") |
|
except (NotImplementedError, ValueError) as error: |
|
|
|
if isinstance(error, NotImplementedError) and main is not None: |
|
|
|
return None |
|
raise UnpicklingError("unable to identify main module") from error |
|
|
|
def load_module( |
|
filename: Union[str, os.PathLike] = None, |
|
module: Optional[Union[ModuleType, str]] = None, |
|
**kwds |
|
) -> Optional[ModuleType]: |
|
"""Update the selected module (default is :py:mod:`__main__`) with |
|
the state saved at ``filename``. |
|
|
|
Restore a module to the state saved with :py:func:`dump_module`. The |
|
saved module can be :py:mod:`__main__` (e.g. an interpreter session), |
|
an imported module, or a module-type object (e.g. created with |
|
:py:class:`~types.ModuleType`). |
|
|
|
When restoring the state of a non-importable module-type object, the |
|
current instance of this module may be passed as the argument ``main``. |
|
Otherwise, a new instance is created with :py:class:`~types.ModuleType` |
|
and returned. |
|
|
|
Args: |
|
filename: a path-like object or a readable stream. If `None` |
|
(the default), read from a named file in a temporary directory. |
|
module: a module object or the name of an importable module; |
|
the module name and kind (i.e. imported or non-imported) must |
|
match the name and kind of the module stored at ``filename``. |
|
**kwds: extra keyword arguments passed to :py:class:`Unpickler()`. |
|
|
|
Raises: |
|
:py:exc:`UnpicklingError`: if unpickling fails. |
|
:py:exc:`ValueError`: if the argument ``main`` and module saved |
|
at ``filename`` are incompatible. |
|
|
|
Returns: |
|
A module object, if the saved module is not :py:mod:`__main__` or |
|
a module instance wasn't provided with the argument ``main``. |
|
|
|
Examples: |
|
|
|
- Save the state of some modules: |
|
|
|
>>> import dill |
|
>>> squared = lambda x: x*x |
|
>>> dill.dump_module() # save state of __main__ to /tmp/session.pkl |
|
>>> |
|
>>> import pox # an imported module |
|
>>> pox.plus_one = lambda x: x+1 |
|
>>> dill.dump_module('pox_session.pkl', module=pox) |
|
>>> |
|
>>> from types import ModuleType |
|
>>> foo = ModuleType('foo') # a module-type object |
|
>>> foo.values = [1,2,3] |
|
>>> import math |
|
>>> foo.sin = math.sin |
|
>>> dill.dump_module('foo_session.pkl', module=foo, refimported=True) |
|
|
|
- Restore the state of the interpreter: |
|
|
|
>>> import dill |
|
>>> dill.load_module() # updates __main__ from /tmp/session.pkl |
|
>>> squared(2) |
|
4 |
|
|
|
- Load the saved state of an importable module: |
|
|
|
>>> import dill |
|
>>> pox = dill.load_module('pox_session.pkl') |
|
>>> pox.plus_one(1) |
|
2 |
|
>>> import sys |
|
>>> pox in sys.modules.values() |
|
True |
|
|
|
- Load the saved state of a non-importable module-type object: |
|
|
|
>>> import dill |
|
>>> foo = dill.load_module('foo_session.pkl') |
|
>>> [foo.sin(x) for x in foo.values] |
|
[0.8414709848078965, 0.9092974268256817, 0.1411200080598672] |
|
>>> import math |
|
>>> foo.sin is math.sin # foo.sin was saved by reference |
|
True |
|
>>> import sys |
|
>>> foo in sys.modules.values() |
|
False |
|
|
|
- Update the state of a non-importable module-type object: |
|
|
|
>>> import dill |
|
>>> from types import ModuleType |
|
>>> foo = ModuleType('foo') |
|
>>> foo.values = ['a','b'] |
|
>>> foo.sin = lambda x: x*x |
|
>>> dill.load_module('foo_session.pkl', module=foo) |
|
>>> [foo.sin(x) for x in foo.values] |
|
[0.8414709848078965, 0.9092974268256817, 0.1411200080598672] |
|
|
|
*Changed in version 0.3.6:* Function ``load_session()`` was renamed to |
|
``load_module()``. Parameter ``main`` was renamed to ``module``. |
|
|
|
See also: |
|
:py:func:`load_module_asdict` to load the contents of module saved |
|
with :py:func:`dump_module` into a dictionary. |
|
""" |
|
if 'main' in kwds: |
|
warnings.warn( |
|
"The argument 'main' has been renamed 'module'.", |
|
PendingDeprecationWarning |
|
) |
|
if module is not None: |
|
raise TypeError("both 'module' and 'main' arguments were used") |
|
module = kwds.pop('main') |
|
main = module |
|
if hasattr(filename, 'read'): |
|
file = filename |
|
else: |
|
if filename is None: |
|
filename = str(TEMPDIR/'session.pkl') |
|
file = open(filename, 'rb') |
|
try: |
|
file = _make_peekable(file) |
|
|
|
unpickler = Unpickler(file, **kwds) |
|
unpickler._session = True |
|
|
|
|
|
pickle_main = _identify_module(file, main) |
|
if main is None and pickle_main is not None: |
|
main = pickle_main |
|
if isinstance(main, str): |
|
if main.startswith('__runtime__.'): |
|
|
|
main = ModuleType(main.partition('.')[-1]) |
|
else: |
|
main = _import_module(main) |
|
if main is not None: |
|
if not isinstance(main, ModuleType): |
|
raise TypeError("%r is not a module" % main) |
|
unpickler._main = main |
|
else: |
|
main = unpickler._main |
|
|
|
|
|
is_main_imported = _is_imported_module(main) |
|
if pickle_main is not None: |
|
is_runtime_mod = pickle_main.startswith('__runtime__.') |
|
if is_runtime_mod: |
|
pickle_main = pickle_main.partition('.')[-1] |
|
error_msg = "can't update{} module{} %r with the saved state of{} module{} %r" |
|
if is_runtime_mod and is_main_imported: |
|
raise ValueError( |
|
error_msg.format(" imported", "", "", "-type object") |
|
% (main.__name__, pickle_main) |
|
) |
|
if not is_runtime_mod and not is_main_imported: |
|
raise ValueError( |
|
error_msg.format("", "-type object", " imported", "") |
|
% (pickle_main, main.__name__) |
|
) |
|
if main.__name__ != pickle_main: |
|
raise ValueError(error_msg.format("", "", "", "") % (main.__name__, pickle_main)) |
|
|
|
|
|
if not is_main_imported: |
|
runtime_main = '__runtime__.%s' % main.__name__ |
|
sys.modules[runtime_main] = main |
|
|
|
loaded = unpickler.load() |
|
finally: |
|
if not hasattr(filename, 'read'): |
|
file.close() |
|
try: |
|
del sys.modules[runtime_main] |
|
except (KeyError, NameError): |
|
pass |
|
assert loaded is main |
|
_restore_modules(unpickler, main) |
|
if main is _main_module or main is module: |
|
return None |
|
else: |
|
return main |
|
|
|
|
|
def load_session(filename=None, main=None, **kwds): |
|
warnings.warn("load_session() has been renamed load_module().", PendingDeprecationWarning) |
|
load_module(filename, module=main, **kwds) |
|
load_session.__doc__ = load_module.__doc__ |
|
|
|
def load_module_asdict( |
|
filename: Union[str, os.PathLike] = None, |
|
update: bool = False, |
|
**kwds |
|
) -> dict: |
|
""" |
|
Load the contents of a saved module into a dictionary. |
|
|
|
``load_module_asdict()`` is the near-equivalent of:: |
|
|
|
lambda filename: vars(dill.load_module(filename)).copy() |
|
|
|
however, does not alter the original module. Also, the path of |
|
the loaded module is stored in the ``__session__`` attribute. |
|
|
|
Args: |
|
filename: a path-like object or a readable stream. If `None` |
|
(the default), read from a named file in a temporary directory. |
|
update: if `True`, initialize the dictionary with the current state |
|
of the module prior to loading the state stored at filename. |
|
**kwds: extra keyword arguments passed to :py:class:`Unpickler()` |
|
|
|
Raises: |
|
:py:exc:`UnpicklingError`: if unpickling fails |
|
|
|
Returns: |
|
A copy of the restored module's dictionary. |
|
|
|
Note: |
|
If ``update`` is True, the corresponding module may first be imported |
|
into the current namespace before the saved state is loaded from |
|
filename to the dictionary. Note that any module that is imported into |
|
the current namespace as a side-effect of using ``update`` will not be |
|
modified by loading the saved module in filename to a dictionary. |
|
|
|
Example: |
|
>>> import dill |
|
>>> alist = [1, 2, 3] |
|
>>> anum = 42 |
|
>>> dill.dump_module() |
|
>>> anum = 0 |
|
>>> new_var = 'spam' |
|
>>> main = dill.load_module_asdict() |
|
>>> main['__name__'], main['__session__'] |
|
('__main__', '/tmp/session.pkl') |
|
>>> main is globals() # loaded objects don't reference globals |
|
False |
|
>>> main['alist'] == alist |
|
True |
|
>>> main['alist'] is alist # was saved by value |
|
False |
|
>>> main['anum'] == anum # changed after the session was saved |
|
False |
|
>>> new_var in main # would be True if the option 'update' was set |
|
False |
|
""" |
|
if 'module' in kwds: |
|
raise TypeError("'module' is an invalid keyword argument for load_module_asdict()") |
|
if hasattr(filename, 'read'): |
|
file = filename |
|
else: |
|
if filename is None: |
|
filename = str(TEMPDIR/'session.pkl') |
|
file = open(filename, 'rb') |
|
try: |
|
file = _make_peekable(file) |
|
main_name = _identify_module(file) |
|
old_main = sys.modules.get(main_name) |
|
main = ModuleType(main_name) |
|
if update: |
|
if old_main is None: |
|
old_main = _import_module(main_name) |
|
main.__dict__.update(old_main.__dict__) |
|
else: |
|
main.__builtins__ = __builtin__ |
|
sys.modules[main_name] = main |
|
load_module(file, **kwds) |
|
finally: |
|
if not hasattr(filename, 'read'): |
|
file.close() |
|
try: |
|
if old_main is None: |
|
del sys.modules[main_name] |
|
else: |
|
sys.modules[main_name] = old_main |
|
except NameError: |
|
pass |
|
main.__session__ = str(filename) |
|
return main.__dict__ |
|
|
|
|
|
|
|
|
|
for name in ( |
|
'_lookup_module', '_module_map', '_restore_modules', '_stash_modules', |
|
'dump_session', 'load_session' |
|
): |
|
setattr(_dill, name, globals()[name]) |
|
del name |
|
|