Skip to content

gh-103000: Optimise dataclasses asdict/astuple for common types #103005

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Apr 10, 2023
Merged
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 73 additions & 17 deletions Lib/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,29 @@ def __repr__(self):
# https://bugs.python.org/issue33453 for details.
_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)')

# Types for which deepcopy(obj) is known to return obj unmodified
# Used to skip deepcopy in asdict and astuple for performance
_ATOMIC_TYPES = {
# Common JSON Serializable types
types.NoneType,
bool,
int,
float,
complex,
bytes,
str,
# Other types that are also unaffected by deepcopy
types.EllipsisType,
types.NotImplementedType,
types.CodeType,
types.BuiltinFunctionType,
types.FunctionType,
type,
range,
property,
# weakref.ref, # weakref is not currently imported by dataclasses directly
}

# This function's logic is copied from "recursive_repr" function in
# reprlib module to avoid dependency.
def _recursive_repr(user_function):
Expand Down Expand Up @@ -1293,11 +1316,22 @@ class C:

def _asdict_inner(obj, dict_factory):
if _is_dataclass_instance(obj):
result = []
for f in fields(obj):
value = _asdict_inner(getattr(obj, f.name), dict_factory)
result.append((f.name, value))
return dict_factory(result)
if dict_factory is dict:
result = {}
for f in fields(obj):
value = getattr(obj, f.name)
if type(value) not in _ATOMIC_TYPES:
value = _asdict_inner(value, dict_factory)
result[f.name] = value
return result
else:
result = []
for f in fields(obj):
value = getattr(obj, f.name)
if type(value) not in _ATOMIC_TYPES:
value = _asdict_inner(value, dict_factory)
result.append((f.name, value))
return dict_factory(result)
elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
# obj is a namedtuple. Recurse into it, but the returned
# object is another namedtuple of the same type. This is
Expand All @@ -1318,23 +1352,33 @@ def _asdict_inner(obj, dict_factory):
# namedtuples, we could no longer call asdict() on a data
# structure where a namedtuple was used as a dict key.

return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj])
return type(obj)(*[
v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory)
for v in obj
])
elif isinstance(obj, (list, tuple)):
# Assume we can create an object of this type by passing in a
# generator (which is not true for namedtuples, handled
# above).
return type(obj)(_asdict_inner(v, dict_factory) for v in obj)
return type(obj)(
v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory)
for v in obj
)
elif isinstance(obj, dict):
if hasattr(type(obj), 'default_factory'):
# obj is a defaultdict, which has a different constructor from
# dict as it requires the default_factory as its first arg.
result = type(obj)(getattr(obj, 'default_factory'))
for k, v in obj.items():
result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory)
k = k if type(k) in _ATOMIC_TYPES else _asdict_inner(k, dict_factory)
v = v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory)
result[k] = v
return result
return type(obj)((_asdict_inner(k, dict_factory),
_asdict_inner(v, dict_factory))
for k, v in obj.items())

return type(obj)(
(k if type(k) in _ATOMIC_TYPES else _asdict_inner(k, dict_factory),
v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory))
for k, v in obj.items())
else:
return copy.deepcopy(obj)

Expand Down Expand Up @@ -1367,7 +1411,9 @@ def _astuple_inner(obj, tuple_factory):
if _is_dataclass_instance(obj):
result = []
for f in fields(obj):
value = _astuple_inner(getattr(obj, f.name), tuple_factory)
value = getattr(obj, f.name)
if type(value) not in _ATOMIC_TYPES:
value = _astuple_inner(value, tuple_factory)
result.append(value)
return tuple_factory(result)
elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
Expand All @@ -1377,23 +1423,33 @@ def _astuple_inner(obj, tuple_factory):
# treated (see below), but we just need to create them
# differently because a namedtuple's __init__ needs to be
# called differently (see bpo-34363).
return type(obj)(*[_astuple_inner(v, tuple_factory) for v in obj])
return type(obj)(*[
v if type(v) in _ATOMIC_TYPES else _astuple_inner(v, tuple_factory)
for v in obj
])
elif isinstance(obj, (list, tuple)):
# Assume we can create an object of this type by passing in a
# generator (which is not true for namedtuples, handled
# above).
return type(obj)(_astuple_inner(v, tuple_factory) for v in obj)
return type(obj)(
v if type(v) in _ATOMIC_TYPES else _astuple_inner(v, tuple_factory)
for v in obj
)
elif isinstance(obj, dict):
obj_type = type(obj)
if hasattr(obj_type, 'default_factory'):
# obj is a defaultdict, which has a different constructor from
# dict as it requires the default_factory as its first arg.
result = obj_type(getattr(obj, 'default_factory'))
for k, v in obj.items():
result[_astuple_inner(k, tuple_factory)] = _astuple_inner(v, tuple_factory)
k = k if type(k) in _ATOMIC_TYPES else _astuple_inner(k, tuple_factory)
v = v if type(v) in _ATOMIC_TYPES else _astuple_inner(v, tuple_factory)
result[k] = v
return result
return obj_type((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory))
for k, v in obj.items())
return obj_type(
(k if type(k) in _ATOMIC_TYPES else _astuple_inner(k, tuple_factory),
v if type(v) in _ATOMIC_TYPES else _astuple_inner(v, tuple_factory))
for k, v in obj.items())
else:
return copy.deepcopy(obj)

Expand Down