diff --git a/pyproject.toml b/pyproject.toml index fb00a36a5..fe76ee5e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ classifiers = [ 'Typing :: Typed', ] dependencies = [ - 'typing-extensions>=4.13.0', + 'typing-extensions@git+https://github.com/HexDecimal/typing_extensions@conforming-sentinel', ] dynamic = ['license', 'readme', 'version'] diff --git a/python/pydantic_core/__init__.py b/python/pydantic_core/__init__.py index a146499d0..aed27b1ad 100644 --- a/python/pydantic_core/__init__.py +++ b/python/pydantic_core/__init__.py @@ -3,6 +3,8 @@ import sys as _sys from typing import Any as _Any +from typing_extensions import Sentinel + from ._pydantic_core import ( ArgsKwargs, MultiHostUrl, @@ -40,6 +42,7 @@ __all__ = [ '__version__', + 'UNSET', 'CoreConfig', 'CoreSchema', 'CoreSchemaType', @@ -140,3 +143,28 @@ class MultiHostHost(_TypedDict): """The host part of this host, or `None`.""" port: int | None """The port part of this host, or `None`.""" + + +UNSET = Sentinel('UNSET', module_name='pydantic_core') +"""A singleton indicating a field value was not set during validation. + +This singleton can be used a default value, as an alternative to `None` when it has +an explicit meaning. During serialization, any field with `UNSET` as a value is excluded +from the output. + +Example: + ```python + from pydantic import BaseModel + from pydantic.experimental.unset import UNSET + + + class Configuration(BaseModel): + timeout: int | None | UNSET = UNSET + + + # configuration defaults, stored somewhere else: + defaults = {'timeout': 200} + + conf = Configuration.model_validate({...}) + timeout = conf.timeout if timeout.timeout is not UNSET else defaults['timeout'] +""" diff --git a/python/pydantic_core/core_schema.py b/python/pydantic_core/core_schema.py index 803569244..e3cfd02b8 100644 --- a/python/pydantic_core/core_schema.py +++ b/python/pydantic_core/core_schema.py @@ -1341,6 +1341,16 @@ class Color(Enum): ) +class UnsetSentinelSchema(TypedDict, total=False): + type: Required[Literal['unset-sentinel']] + + +def unset_sentinel_schema() -> UnsetSentinelSchema: + """Returns a schema for the [`UNSET`][pydantic_core.UNSET] sentinel.""" + + return {'type': 'unset-sentinel'} + + # must match input/parse_json.rs::JsonType::try_from JsonType = Literal['null', 'bool', 'int', 'float', 'str', 'list', 'dict'] @@ -4065,6 +4075,7 @@ def definition_reference_schema( DatetimeSchema, TimedeltaSchema, LiteralSchema, + UnsetSentinelSchema, EnumSchema, IsInstanceSchema, IsSubclassSchema, @@ -4123,6 +4134,7 @@ def definition_reference_schema( 'datetime', 'timedelta', 'literal', + 'unset-sentinel', 'enum', 'is-instance', 'is-subclass', diff --git a/src/common/mod.rs b/src/common/mod.rs index 47c0a0349..20ad896b0 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,2 +1,3 @@ pub(crate) mod prebuilt; pub(crate) mod union; +pub(crate) mod unset_sentinel; diff --git a/src/common/unset_sentinel.rs b/src/common/unset_sentinel.rs new file mode 100644 index 000000000..f34d172ed --- /dev/null +++ b/src/common/unset_sentinel.rs @@ -0,0 +1,16 @@ +use pyo3::intern; +use pyo3::prelude::*; +use pyo3::sync::GILOnceCell; + +static UNSET_SENTINEL_OBJECT: GILOnceCell> = GILOnceCell::new(); + +pub fn get_unset_sentinel_object(py: Python) -> &Bound<'_, PyAny> { + UNSET_SENTINEL_OBJECT + .get_or_init(py, || { + py.import(intern!(py, "pydantic_core")) + .and_then(|core_module| core_module.getattr(intern!(py, "UNSET"))) + .unwrap() + .into() + }) + .bind(py) +} diff --git a/src/errors/types.rs b/src/errors/types.rs index 0c75e1e24..c85d9253f 100644 --- a/src/errors/types.rs +++ b/src/errors/types.rs @@ -316,6 +316,8 @@ error_types! { expected: {ctx_type: String, ctx_fn: field_from_context}, }, // --------------------- + // unset sentinel + UnsetSentinelError {}, // date errors DateType {}, DateParsing { @@ -531,6 +533,7 @@ impl ErrorType { Self::AssertionError {..} => "Assertion failed, {error}", Self::CustomError {..} => "", // custom errors are handled separately Self::LiteralError {..} => "Input should be {expected}", + Self::UnsetSentinelError { .. } => "Input should be the 'UNSET' sentinel", Self::DateType {..} => "Input should be a valid date", Self::DateParsing {..} => "Input should be a valid date in the format YYYY-MM-DD, {error}", Self::DateFromDatetimeParsing {..} => "Input should be a valid date or datetime, {error}", diff --git a/src/serializers/computed_fields.rs b/src/serializers/computed_fields.rs index 7a574093c..8867f1920 100644 --- a/src/serializers/computed_fields.rs +++ b/src/serializers/computed_fields.rs @@ -4,6 +4,7 @@ use pyo3::{intern, PyTraverseError, PyVisit}; use serde::ser::SerializeMap; use crate::build_tools::py_schema_error_type; +use crate::common::unset_sentinel::get_unset_sentinel_object; use crate::definitions::DefinitionsBuilder; use crate::py_gc::PyGcTraverse; use crate::serializers::filter::SchemaFilter; @@ -148,6 +149,10 @@ impl ComputedFields { if extra.exclude_none && value.is_none() { continue; } + let unset_obj = get_unset_sentinel_object(model.py()); + if value.is(unset_obj) { + continue; + } let field_extra = Extra { field_name: Some(&computed_field.property_name), diff --git a/src/serializers/fields.rs b/src/serializers/fields.rs index a5c5bc6b3..f9b2db1dc 100644 --- a/src/serializers/fields.rs +++ b/src/serializers/fields.rs @@ -7,6 +7,7 @@ use ahash::AHashMap; use serde::ser::SerializeMap; use smallvec::SmallVec; +use crate::common::unset_sentinel::get_unset_sentinel_object; use crate::serializers::extra::SerCheck; use crate::PydanticSerializationUnexpectedValue; @@ -15,8 +16,7 @@ use super::errors::py_err_se_err; use super::extra::Extra; use super::filter::SchemaFilter; use super::infer::{infer_json_key, infer_serialize, infer_to_python, SerializeInfer}; -use super::shared::PydanticSerializer; -use super::shared::{CombinedSerializer, TypeSerializer}; +use super::shared::{CombinedSerializer, PydanticSerializer, TypeSerializer}; /// representation of a field for serialization #[derive(Debug)] @@ -154,6 +154,7 @@ impl GeneralFieldsSerializer { ) -> PyResult> { let output_dict = PyDict::new(py); let mut used_req_fields: usize = 0; + let unset_obj = get_unset_sentinel_object(py); // NOTE! we maintain the order of the input dict assuming that's right for result in main_iter { @@ -163,6 +164,10 @@ impl GeneralFieldsSerializer { if extra.exclude_none && value.is_none() { continue; } + if value.is(unset_obj) { + continue; + } + let field_extra = Extra { field_name: Some(key_str), ..extra @@ -238,9 +243,13 @@ impl GeneralFieldsSerializer { for result in main_iter { let (key, value) = result.map_err(py_err_se_err)?; + let unset_obj = get_unset_sentinel_object(value.py()); if extra.exclude_none && value.is_none() { continue; } + if value.is(unset_obj) { + continue; + } let key_str = key_str(&key).map_err(py_err_se_err)?; let field_extra = Extra { field_name: Some(key_str), @@ -326,6 +335,7 @@ impl TypeSerializer for GeneralFieldsSerializer { extra: &Extra, ) -> PyResult { let py = value.py(); + let unset_obj = get_unset_sentinel_object(py); // If there is already a model registered (from a dataclass, BaseModel) // then do not touch it // If there is no model, we (a TypedDict) are the model @@ -347,6 +357,9 @@ impl TypeSerializer for GeneralFieldsSerializer { if extra.exclude_none && value.is_none() { continue; } + if value.is(unset_obj) { + continue; + } if let Some((next_include, next_exclude)) = self.filter.key_filter(&key, include, exclude)? { let value = match &self.extra_serializer { Some(serializer) => { @@ -380,7 +393,7 @@ impl TypeSerializer for GeneralFieldsSerializer { extra.warnings.on_fallback_ser::(self.get_name(), value, extra)?; return infer_serialize(value, serializer, include, exclude, extra); }; - + let unset_obj = get_unset_sentinel_object(value.py()); // If there is already a model registered (from a dataclass, BaseModel) // then do not touch it // If there is no model, we (a TypedDict) are the model @@ -407,6 +420,9 @@ impl TypeSerializer for GeneralFieldsSerializer { if extra.exclude_none && value.is_none() { continue; } + if value.is(unset_obj) { + continue; + } let filter = self.filter.key_filter(&key, include, exclude).map_err(py_err_se_err)?; if let Some((next_include, next_exclude)) = filter { let output_key = infer_json_key(&key, extra).map_err(py_err_se_err)?; diff --git a/src/serializers/shared.rs b/src/serializers/shared.rs index 81a673dde..9cdcc193b 100644 --- a/src/serializers/shared.rs +++ b/src/serializers/shared.rs @@ -142,6 +142,7 @@ combined_serializer! { Union: super::type_serializers::union::UnionSerializer; TaggedUnion: super::type_serializers::union::TaggedUnionSerializer; Literal: super::type_serializers::literal::LiteralSerializer; + UnsetSentinel: super::type_serializers::unset_sentinel::UnsetSentinelSerializer; Enum: super::type_serializers::enum_::EnumSerializer; Recursive: super::type_serializers::definitions::DefinitionRefSerializer; Tuple: super::type_serializers::tuple::TupleSerializer; @@ -343,6 +344,7 @@ impl PyGcTraverse for CombinedSerializer { CombinedSerializer::Union(inner) => inner.py_gc_traverse(visit), CombinedSerializer::TaggedUnion(inner) => inner.py_gc_traverse(visit), CombinedSerializer::Literal(inner) => inner.py_gc_traverse(visit), + CombinedSerializer::UnsetSentinel(inner) => inner.py_gc_traverse(visit), CombinedSerializer::Enum(inner) => inner.py_gc_traverse(visit), CombinedSerializer::Recursive(inner) => inner.py_gc_traverse(visit), CombinedSerializer::Tuple(inner) => inner.py_gc_traverse(visit), diff --git a/src/serializers/type_serializers/mod.rs b/src/serializers/type_serializers/mod.rs index dabd006a3..9aefdfe59 100644 --- a/src/serializers/type_serializers/mod.rs +++ b/src/serializers/type_serializers/mod.rs @@ -25,6 +25,7 @@ pub mod timedelta; pub mod tuple; pub mod typed_dict; pub mod union; +pub mod unset_sentinel; pub mod url; pub mod uuid; pub mod with_default; diff --git a/src/serializers/type_serializers/unset_sentinel.rs b/src/serializers/type_serializers/unset_sentinel.rs new file mode 100644 index 000000000..13c794696 --- /dev/null +++ b/src/serializers/type_serializers/unset_sentinel.rs @@ -0,0 +1,76 @@ +// This serializer is defined so that building a schema serializer containing an +// 'unset-sentinel' core schema doesn't crash. In practice, the serializer isn't +// used for model-like classes, as the 'fields' serializer takes care of omitting +// the fields from the output (the serializer can still be used if the 'unset-sentinel' +// core schema is used standalone (e.g. with a Pydantic type adapter), but this isn't +// something we explicitly support. + +use std::borrow::Cow; + +use pyo3::prelude::*; +use pyo3::types::PyDict; + +use serde::ser::Error; + +use crate::common::unset_sentinel::get_unset_sentinel_object; +use crate::definitions::DefinitionsBuilder; +use crate::PydanticSerializationUnexpectedValue; + +use super::{BuildSerializer, CombinedSerializer, Extra, TypeSerializer}; + +#[derive(Debug)] +pub struct UnsetSentinelSerializer {} + +impl BuildSerializer for UnsetSentinelSerializer { + const EXPECTED_TYPE: &'static str = "unset-sentinel"; + + fn build( + _schema: &Bound<'_, PyDict>, + _config: Option<&Bound<'_, PyDict>>, + _definitions: &mut DefinitionsBuilder, + ) -> PyResult { + Ok(Self {}.into()) + } +} + +impl_py_gc_traverse!(UnsetSentinelSerializer {}); + +impl TypeSerializer for UnsetSentinelSerializer { + fn to_python( + &self, + value: &Bound<'_, PyAny>, + _include: Option<&Bound<'_, PyAny>>, + _exclude: Option<&Bound<'_, PyAny>>, + _extra: &Extra, + ) -> PyResult { + let unset_obj = get_unset_sentinel_object(value.py()); + + if value.is(unset_obj) { + Ok(unset_obj.to_owned().into()) + } else { + Err( + PydanticSerializationUnexpectedValue::new_from_msg(Some("Expected 'UNSET' sentinel".to_string())) + .to_py_err(), + ) + } + } + + fn json_key<'a>(&self, key: &'a Bound<'_, PyAny>, extra: &Extra) -> PyResult> { + self.invalid_as_json_key(key, extra, Self::EXPECTED_TYPE) + } + + fn serde_serialize( + &self, + _value: &Bound<'_, PyAny>, + _serializer: S, + _include: Option<&Bound<'_, PyAny>>, + _exclude: Option<&Bound<'_, PyAny>>, + _extra: &Extra, + ) -> Result { + Err(Error::custom("'UNSET' can't be serialized to JSON".to_string())) + } + + fn get_name(&self) -> &str { + Self::EXPECTED_TYPE + } +} diff --git a/src/validators/mod.rs b/src/validators/mod.rs index 2fd79c495..8f06af057 100644 --- a/src/validators/mod.rs +++ b/src/validators/mod.rs @@ -59,6 +59,7 @@ mod timedelta; mod tuple; mod typed_dict; mod union; +mod unset_sentinel; mod url; mod uuid; mod validation_state; @@ -574,6 +575,8 @@ fn build_validator_inner( call::CallValidator, // literals literal::LiteralValidator, + // unset sentinel + unset_sentinel::UnsetSentinelValidator, // enums enum_::BuildEnumValidator, // any @@ -741,6 +744,8 @@ pub enum CombinedValidator { FunctionCall(call::CallValidator), // literals Literal(literal::LiteralValidator), + // Unset sentinel + UnsetSentinel(unset_sentinel::UnsetSentinelValidator), // enums IntEnum(enum_::EnumValidator), StrEnum(enum_::EnumValidator), diff --git a/src/validators/unset_sentinel.rs b/src/validators/unset_sentinel.rs new file mode 100644 index 000000000..2b98283cb --- /dev/null +++ b/src/validators/unset_sentinel.rs @@ -0,0 +1,47 @@ +use core::fmt::Debug; + +use pyo3::prelude::*; +use pyo3::types::PyDict; + +use crate::common::unset_sentinel::get_unset_sentinel_object; +use crate::errors::{ErrorType, ValError, ValResult}; +use crate::input::Input; + +use super::{BuildValidator, CombinedValidator, DefinitionsBuilder, ValidationState, Validator}; + +#[derive(Debug, Clone)] +pub struct UnsetSentinelValidator {} + +impl BuildValidator for UnsetSentinelValidator { + const EXPECTED_TYPE: &'static str = "unset-sentinel"; + + fn build( + _schema: &Bound<'_, PyDict>, + _config: Option<&Bound<'_, PyDict>>, + _definitions: &mut DefinitionsBuilder, + ) -> PyResult { + Ok(CombinedValidator::UnsetSentinel(Self {})) + } +} + +impl_py_gc_traverse!(UnsetSentinelValidator {}); + +impl Validator for UnsetSentinelValidator { + fn validate<'py>( + &self, + py: Python<'py>, + input: &(impl Input<'py> + ?Sized), + _state: &mut ValidationState<'_, 'py>, + ) -> ValResult { + let unset_obj = get_unset_sentinel_object(py); + + match input.as_python() { + Some(v) if v.is(unset_obj) => Ok(v.to_owned().into()), + _ => Err(ValError::new(ErrorType::UnsetSentinelError { context: None }, input)), + } + } + + fn get_name(&self) -> &str { + Self::EXPECTED_TYPE + } +} diff --git a/uv.lock b/uv.lock index de11e849a..24c891eb7 100644 --- a/uv.lock +++ b/uv.lock @@ -636,7 +636,7 @@ wasm = [ ] [package.metadata] -requires-dist = [{ name = "typing-extensions", specifier = ">=4.13.0" }] +requires-dist = [{ name = "typing-extensions", git = "https://github.com/HexDecimal/typing_extensions?rev=conforming-sentinel" }] [package.metadata.requires-dev] all = [ @@ -957,11 +957,7 @@ wheels = [ [[package]] name = "typing-extensions" version = "4.14.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d1/bc/51647cd02527e87d05cb083ccc402f93e441606ff1f01739a62c8ad09ba5/typing_extensions-4.14.0.tar.gz", hash = "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4", size = 107423, upload-time = "2025-06-02T14:52:11.399Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/69/e0/552843e0d356fbb5256d21449fa957fa4eff3bbc135a74a691ee70c7c5da/typing_extensions-4.14.0-py3-none-any.whl", hash = "sha256:a1514509136dd0b477638fc68d6a91497af5076466ad0fa6c338e44e359944af", size = 43839, upload-time = "2025-06-02T14:52:10.026Z" }, -] +source = { git = "https://github.com/HexDecimal/typing_extensions?rev=conforming-sentinel#5c0e8a79317478ebfda949c56b011be47a618eb0" } [[package]] name = "typing-inspection"