Pydantic入门

date
Mar 11, 2021
slug
18
status
Published
tags
Python
summary
type
Post
from datetime import datetime
from typing import List, Optional
from pydantic import BaseModel


class User(BaseModel):
    id: int
    name = 'John Doe'
    signup_ts: Optional[datetime] = None
    friends: List[int] = []


external_data = {
    'id': '123',
    'signup_ts': '2019-06-01 12:22',
    'friends': [1, 2, '3'],
}
user = User(**external_data)
print(user.id)
#> 123
print(repr(user.signup_ts))
#> datetime.datetime(2019, 6, 1, 12, 22)
print(user.friends)
#> [1, 2, 3]
print(user.dict())
"""
{
    'id': 123,
    'signup_ts': datetime.datetime(2019, 6, 1, 12, 22),
    'friends': [1, 2, 3],
    'name': 'John Doe',
}
"""
如果失败
from pydantic import ValidationError
try:
    User(signup_ts='broken', friends=[1, 2, 'not number'])
except ValidationError as e:
    print(e.json())


[
  {
    "loc": [
      "id"
    ],
    "msg": "field required",
    "type": "value_error.missing"
  },
  {
    "loc": [
      "signup_ts"
    ],
    "msg": "invalid datetime format",
    "type": "value_error.datetime"
  },
  {
    "loc": [
      "friends",
      2
    ],
    "msg": "value is not a valid integer",
    "type": "type_error.integer"
  }
]

基本原理

基于type hint 中的__annotation__ 实现. 好处是不需要额外学习微语言.

模型

from pydantic import BaseModel

class User(BaseModel):
    id: int # 必选
    name = 'Jane Doe' # 可选 因为提供了默认值, 类型也由默认值推定.

user = User(id='123') # 这里自动进行验证, 能自动转换的就自动转换 譬如 '123' --> 123

# 可以使用dict 查看所有属性 --- 值对.
assert user.dict() == dict(user) == {'id': 123, 'name': 'Jane Doe'}
 
模型具有以下方法和属性:
  • dict()
    • 返回模型的字段和值的字典。
  • json()
    • 返回表示 dict() 的 JSON 字符串。
  • copy()
    • 返回模型的副本(默认情况下为浅副本)。
  • parse_obj()
    • 如果一个对象不是字典,可以使用该方法将其加载到具有错误处理的模型中。
  • parse_raw()
    • 用于加载多种格式字符串的实用程序。
  • parse_file()
    • 与 parse_raw() 类似,但是作用于文件路径。
  • from_orm()
    • 从任意类加载数据到模型中。
  • schema()
    • 返回一个将模型表示为 JSON 模式的字典
  • schema_json()
    • 返回表示 schema() 的 JSON 字符串。
  • construct()
    • 用于创建模型而不执行验证的类方法;
  • fields_set
    • 当模型实例初始化时设置的字段名称集合。
  • config
    • 模型的配置类。

递归模型

from typing import List
from pydantic import BaseModel


class Foo(BaseModel):
    count: int
    size: float = None


class Bar(BaseModel):
    apple = 'x'
    banana = 'y'


class Spam(BaseModel):
    foo: Foo
    bars: List[Bar]


m = Spam(foo={'count': 4}, bars=[{'apple': 'x1'}, {'apple': 'x2'}])
print(m)
#> foo=Foo(count=4, size=None) bars=[Bar(apple='x1', banana='y'),
#> Bar(apple='x2', banana='y')]
print(m.dict())
"""
{
    'foo': {'count': 4, 'size': None},
    'bars': [
        {'apple': 'x1', 'banana': 'y'},
        {'apple': 'x2', 'banana': 'y'},
    ],
}
"""

ORM 模式

💡
class Config: orm_mode = True # 这里必须为True
 
from typing import List
from sqlalchemy import Column, Integer, String
from sqlalchemy.dialects.postgresql import ARRAY
from sqlalchemy.ext.declarative import declarative_base
from pydantic import BaseModel, constr

Base = declarative_base()


class CompanyOrm(Base):
    __tablename__ = "companies"
    id = Column(Integer, primary_key=True, nullable=False)
    public_key = Column(String(20), index=True, nullable=False, unique=True)
    name = Column(String(63), unique=True)
    domains = Column(ARRAY(String(255)))


class CompanyModel(BaseModel):
    id: int
    public_key: constr(max_length=20)
    name: constr(max_length=63)
    domains: List[constr(max_length=255)]

    class Config:
        orm_mode = True # 这里必须为True


co_orm = CompanyOrm(
    id=123,
    public_key="foobar",
    name="Testing",
    domains=["example.com", "foobar.com"],
)
print(co_orm)
# > <models_orm_mode.CompanyOrm object at 0x7f2e727a27c0>
co_model = CompanyModel.from_orm(co_orm)
print(co_model)
# > id=123 public_key='foobar' name='Testing' domains=['example.com',
# > 'foobar.com']

字段名自动映射

 
import typing

from pydantic import BaseModel, Field
import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base


class MyModel(BaseModel):
    metadata: typing.Dict[str, str] = Field(alias="metadata_") # 这里

    class Config:
        orm_mode = True


BaseModel = declarative_base()


class SQLModel(BaseModel):
    __tablename__ = "my_table"
    id = sa.Column("id", sa.Integer, primary_key=True)
    # 'metadata' is reserved by SQLAlchemy, hence the '_'
    metadata_ = sa.Column("metadata", sa.JSON)


sql_model = SQLModel(metadata_={"key": "val"}, id=1)

pydantic_model = MyModel.from_orm(sql_model)

print(pydantic_model.dict())
# > {'metadata': {'key': 'val'}}
print(pydantic_model.dict(by_alias=True))
# > {'metadata_': {'key': 'val'}}

递归 ORM 模型

错误处理

每当在正在验证的数据中发现错误时,pydantic都会引发 ValidationError. 但是定制异常的时候,不要使用ValidationError这个异常.
你可以通过几种方式访问这些错误:e.errors()
返回输入数据中发现的错误的列表。
  • e.json()
    • 返回一个表示 errors 的 JSON。
  • str(e)
    • 返回人类可读的错误表示。
每一个错误对象包含:
  • loc
    • 错误的位置列表。列表中的第一项将是发生错误的字段,如果该字段是[子模型], 则将出现后续项以指示错误的嵌套位置。
  • type
    • 计算机可读的错误类型的标识符。
  • msg
    • 人类可读的错误解释。
  • ctx
    • 包含呈现错误消息所需的值的可选对象。
例子:
from typing import List
from pydantic import BaseModel, ValidationError, conint


class Location(BaseModel):
    lat = 0.1
    lng = 10.1


class Model(BaseModel):
    is_required: float
    gt_int: conint(gt=42)
    list_of_ints: List[int] = None
    a_float: float = None
    recursive_model: Location = None


data = dict(
    list_of_ints=['1', 2, 'bad'],
    a_float='not a float',
    recursive_model={'lat': 4.2, 'lng': 'New York'},
    gt_int=21,
)

try:
    Model(**data)
except ValidationError as e:
    print(e)
    """
    5 validation errors for Model
    is_required
      field required (type=value_error.missing)
    gt_int
      ensure this value is greater than 42 (type=value_error.number.not_gt;
    limit_value=42)
    list_of_ints -> 2
      value is not a valid integer (type=type_error.integer)
    a_float
      value is not a valid float (type=type_error.float)
    recursive_model -> lng
      value is not a valid float (type=type_error.float)
    """

try:
    Model(**data)
except ValidationError as e:
    print(e.json())
    """
    [
      {
        "loc": [
          "is_required"
        ],
        "msg": "field required",
        "type": "value_error.missing"
      },
      {
        "loc": [
          "gt_int"
        ],
        "msg": "ensure this value is greater than 42",
        "type": "value_error.number.not_gt",
        "ctx": {
          "limit_value": 42
        }
      },
      {
        "loc": [
          "list_of_ints",
          2
        ],
        "msg": "value is not a valid integer",
        "type": "type_error.integer"
      },
      {
        "loc": [
          "a_float"
        ],
        "msg": "value is not a valid float",
        "type": "type_error.float"
      },
      {
        "loc": [
          "recursive_model",
          "lng"
        ],
        "msg": "value is not a valid float",
        "type": "type_error.float"
      }
    ]
    """

定制错误

from pydantic import BaseModel, ValidationError, validator


class Model(BaseModel):
    foo: str

    @validator("foo") # 这里
    def name_must_contain_space(cls, v):
        if v != "bar":
            raise ValueError('value must be "bar"') # 这里异常不是ValidationError

        return v


try:
    Model(foo="ber")
except ValidationError as e:
    print(e.errors())
    """
    [
        {
            'loc': ('foo',),
            'msg': 'value must be "bar"',
            'type': 'value_error',
        },
    ]
    """

帮助函数

三个类方法,用于输入数据.

parse_obj

parse_raw

parse_file

import pickle
from datetime import datetime
from pathlib import Path

from pydantic import BaseModel, ValidationError


class User(BaseModel):
    id: int
    name = "John Doe"
    signup_ts: datetime = None


m = User.parse_obj({"id": 123, "name": "James"})
print(m)
# > id=123 signup_ts=None name='James'

try:
    User.parse_obj(["not", "a", "dict"])
except ValidationError as e:
    print(e)
    """
    1 validation error for User
    __root__
      User expected dict not list (type=type_error)
    """

# assumes json as no content type passed
m = User.parse_raw('{"id": 123, "name": "James"}')
print(m)
# > id=123 signup_ts=None name='James'

pickle_data = pickle.dumps({"id": 123, "name": "James", "signup_ts": datetime(2017, 7, 14)})
m = User.parse_raw(pickle_data, content_type="application/pickle", allow_pickle=True)
print(m)
# > id=123 signup_ts=datetime.datetime(2017, 7, 14, 0, 0) name='James'

path = Path("data.json")
path.write_text('{"id": 123, "name": "James"}')
m = User.parse_file(path)
print(m)
# > id=123 signup_ts=None name='James'

创建未经验证的模型

不做数据验证,目的是提高效率.

construct()

from pydantic import BaseModel


class User(BaseModel):
    id: int
    age: int
    name: str = 'John Doe'


original_user = User(id=123, age=32)

user_data = original_user.dict()
print(user_data)
#> {'id': 123, 'age': 32, 'name': 'John Doe'}
fields_set = original_user.__fields_set__
print(fields_set)
#> {'id', 'age'}

# ...
# pass user_data and fields_set to RPC or save to the database etc.
# ...

# you can then create a new instance of User without
# re-running validation which would be unnecessary at this point:
new_user = User.construct(_fields_set=fields_set, **user_data)
print(repr(new_user))
#> User(name='John Doe', id=123, age=32)
print(new_user.__fields_set__)
#> {'id', 'age'}

# construct can be dangerous, only use it with validated data!:
bad_user = User.construct(id='dog')
print(repr(bad_user))
#> User(name='John Doe', id='dog')

泛型模型

from typing import Generic, TypeVar, Optional, List

from pydantic import BaseModel, validator, ValidationError
from pydantic.generics import GenericModel

DataT = TypeVar("DataT")


class Error(BaseModel):
    code: int
    message: str


class DataModel(BaseModel):
    numbers: List[int]
    people: List[str]


class Response(GenericModel, Generic[DataT]): # 这里必须继承自这两个类
    data: Optional[DataT]
    error: Optional[Error]

    @validator("error", always=True)
    def check_consistency(cls, v, values):
        if v is not None and values["data"] is not None:
            raise ValueError("must not provide both data and error")
        if v is None and values.get("data") is None:
            raise ValueError("must provide data or error")
        return v


data = DataModel(numbers=[1, 2, 3], people=[])
error = Error(code=404, message="Not found")

print(Response[int](data=1))
# > data=1 error=None
print(Response[str](data="value"))
# > data='value' error=None
print(Response[str](data="value").dict())
# > {'data': 'value', 'error': None}
print(Response[DataModel](data=data).dict())
"""
{
    'data': {'numbers': [1, 2, 3], 'people': []},
    'error': None,
}
"""
print(Response[DataModel](error=error).dict())
"""
{
    'data': None,
    'error': {'code': 404, 'message': 'Not found'},
}
"""
try:
    Response[int](data="value")
except ValidationError as e:
    print(e)
    """
    2 validation errors for Response[int]
    data
      value is not a valid integer (type=type_error.integer)
    error
      must provide data or error (type=value_error)
    """
如果有继承, 父类的方法和属性都会在子类当中生效. 但是子类中必须明确定义继承自Generic[TypeX]
from typing import TypeVar, Generic
from pydantic.generics import GenericModel

TypeX = TypeVar("TypeX")


class BaseClass(GenericModel, Generic[TypeX]):
    X: TypeX


class ChildClass(BaseClass[TypeX], Generic[TypeX]): # 这里
    # Inherit from Generic[TypeX]
    pass


# Replace TypeX by int
print(ChildClass[int](X=1))
# > X=1
部分继承
from typing import TypeVar, Generic
from pydantic.generics import GenericModel

TypeX = TypeVar('TypeX')
TypeY = TypeVar('TypeY')
TypeZ = TypeVar('TypeZ')


class BaseClass(GenericModel, Generic[TypeX, TypeY]):
    x: TypeX
    y: TypeY


class ChildClass(BaseClass[int, TypeY], Generic[TypeY, TypeZ]):
    z: TypeZ


# Replace TypeY by str
print(ChildClass[str, int](x=1, y='y', z=3))
#> x=1 y='y' z=3
多个类模型使用同一个泛型,必须类型相同,否则报错.
from typing import Generic, TypeVar

from pydantic import ValidationError
from pydantic.generics import GenericModel

T = TypeVar("T")


class InnerT(GenericModel, Generic[T]):
    inner: T


class OuterT(GenericModel, Generic[T]):
    outer: T
    nested: InnerT[T]


nested = InnerT[int](inner=1)
print(OuterT[int](outer=1, nested=nested))
# > outer=1 nested=InnerT[T][int](inner=1)
try:
    nested = InnerT[str](inner="a")
    print(OuterT[int](outer="a", nested=nested))
except ValidationError as e:
    print(e)
    """
    2 validation errors for OuterT[int]
    outer
      value is not a valid integer (type=type_error.integer)
    nested -> inner
      value is not a valid integer (type=type_error.integer)
    """
这节将的是类似partial的功能.
from typing import Generic, TypeVar

from pydantic import ValidationError
from pydantic.generics import GenericModel

AT = TypeVar("AT")
BT = TypeVar("BT")


class Model(GenericModel, Generic[AT, BT]):
    a: AT
    b: BT


print(Model(a="a", b="a"))
# > a='a' b='a'

IntT = TypeVar("IntT", bound=int)
typevar_model = Model[int, IntT]
print(typevar_model(a=1, b=1))
# > a=1 b=1
try:
    typevar_model(a="a", b="a")
except ValidationError as exc:
    print(exc)
    """
    2 validation errors for Model[int, IntT]
    a
      value is not a valid integer (type=type_error.integer)
    b
      value is not a valid integer (type=type_error.integer)
    """

concrete_model = typevar_model[int] # 这里
print(concrete_model(a=1, b=1))
# > a=1 b=1

try:
    concrete_model(a="a", b="a")
except ValidationError as exc:
    print(exc)
"""
2 validation errors for Model[int, IntT][int]
a
  value is not a valid integer (type=type_error.integer)
b
  value is not a valid integer (type=type_error.integer)
"""

动态模型创建

create_model
from pydantic import BaseModel, create_model

DynamicFoobarModel = create_model('DynamicFoobarModel', foo=(str, ...), bar=123)


class StaticFoobarModel(BaseModel):
    foo: str
    bar: int = 123
加俩参数
from pydantic import BaseModel, create_model


class FooModel(BaseModel):
    foo: str
    bar: int = 123


BarModel = create_model(
    'BarModel',
    apple='russet',
    banana='yellow',
    __base__=FooModel, # 这里
)
print(BarModel)
#> <class 'pydantic.main.BarModel'>
print(BarModel.__fields__.keys()) # 和这里
#> dict_keys(['foo', 'bar', 'apple', 'banana'])
__validators__
from pydantic import create_model, ValidationError, validator


def username_alphanumeric(cls, v):
    assert v.isalnum(), "must be alphanumeric"
    return v


validators = {"username_validator": validator("username")(username_alphanumeric)}

UserModel = create_model("UserModel", username=(str, ...), __validators__=validators)

user = UserModel(username="scolvin")
print(user)
# > username='scolvin'

try:
    UserModel(username="scolvi%n")
except ValidationError as e:
    print(e)
    """
    1 validation error for UserModel
    username
      must be alphanumeric (type=assertion_error)
    """

自定义根类型

__root__
之前的Model 都是定义属性, 这里没有属性. 只定义自己.
通过parse_obj 或者__init__的第一个参数实现
from typing import List
import json
from pydantic import BaseModel
from pydantic.schema import schema


class Pets(BaseModel):
    __root__: List[str]


print(Pets(__root__=["dog", "cat"]))
# > __root__=['dog', 'cat']
print(Pets(__root__=["dog", "cat"]).json())
# > ["dog", "cat"]
print(Pets.parse_obj(["dog", "cat"]))
# > __root__=['dog', 'cat']
print(Pets.schema())
"""
{
    'title': 'Pets',
    'type': 'array',
    'items': {'type': 'string'},
}
"""
pets_schema = schema([Pets])
print(json.dumps(pets_schema, indent=2))
"""
{
  "definitions": {
    "Pets": {
      "title": "Pets",
      "type": "array",
      "items": {
        "type": "string"
      }
    }
  }
}
"""
如果parse_obj的第一个参数是一个字典
  • 如果自定义的根类型是映射类型(例如 Dict 或 Mapping),参数本身总是根据自定义根类型进行验证。
  • 对于其他自定义根类型,如果字典仅有一个键,且其名称为 __root__,则该键对应的值将根据自定义根类型进行验证。
from typing import List, Dict
from pydantic import BaseModel, ValidationError


class Pets(BaseModel):
    __root__: List[str]


print(Pets.parse_obj(["dog", "cat"]))
# > __root__=['dog', 'cat']
print(Pets.parse_obj({"__root__": ["dog", "cat"]}))  # not recommended
# > __root__=['dog', 'cat']


class PetsByName(BaseModel):
    __root__: Dict[str, str]


print(PetsByName.parse_obj({"Otis": "dog", "Milo": "cat"}))
# > __root__={'Otis': 'dog', 'Milo': 'cat'}
try:
    PetsByName.parse_obj({"__root__": {"Otis": "dog", "Milo": "cat"}})
except ValidationError as e:
    print(e)
    """
    1 validation error for PetsByName
    __root__ -> __root__
      str type expected (type=type_error.str)
    """
迭代List类型的Model需要自己定义__iter____getitem__
from typing import List
from pydantic import BaseModel


class Pets(BaseModel):
    __root__: List[str]

    def __iter__(self):
        return iter(self.__root__)

    def __getitem__(self, item):
        return self.__root__[item]


pets = Pets.parse_obj(["dog", "cat"])
print(pets[0])
# > dog
print([pet for pet in pets])
# > ['dog', 'cat']

伪不变性

让属性的值固定住.
但是这种固定是...防君子不防小人的.
from pydantic import BaseModel


class FooBarModel(BaseModel):
    a: str
    b: dict

    class Config:
        allow_mutation = False


foobar = FooBarModel(a="hello", b={"apple": "pear"})

try:
    foobar.a = "different"
except TypeError as e:
    print(e)
    # > "FooBarModel" is immutable and does not support item assignment

print(foobar.a)
# > hello
print(foobar.b)
# > {'apple': 'pear'}
foobar.b["apple"] = "grape"
print(foobar.b)
# > {'apple': 'grape'}

和抽象基类一起使用

import abc
from pydantic import BaseModel


class FooBarModel(BaseModel, abc.ABC):
    a: str
    b: int

    @abc.abstractmethod
    def my_abstract_method(self):
        pass

字段排序

  • 验证是按照字段的定义顺序执行的;可以访问前一个字段的值,但不能访问后一个字段的值。
  • 字段顺序保留在模型中
  • 字段顺序保留在验证错误中
  • 字段顺序被 .dict() 和 .json() 等保留。
💡
带注解的排在前面, 默认值的排在后面,并且赋值的时候也要参考前后顺序
from pydantic import BaseModel, ValidationError


class Model(BaseModel):
    a: int
    b = 2
    c: int = 1
    d = 0
    e: float


print(Model.__fields__.keys())
# > dict_keys(['a', 'c', 'e', 'b', 'd'])
m = Model(e=2, a=1)
print(m.dict())
# > {'a': 1, 'c': 1, 'e': 2.0, 'b': 2, 'd': 0}
try:
    Model(a="x", b="x", c="x", d="x", e="x")
except ValidationError as e:
    error_locations = [e["loc"] for e in e.errors()]

print(error_locations)
# > [('a',), ('c',), ('e',), ('b',), ('d',)]

可选必需字段

from typing import Optional
from pydantic import BaseModel, Field, ValidationError


class Model(BaseModel):
    # a: int
    # b: int = ...
    # c: int = Field(...)
    # 以上三种是必选字段
    a: Optional[int]  # 可选 int 或 None
    b: Optional[int] = ...  # 必选 int 或 None
    c: Optional[int] = Field(...)  # 必选 int 或 None


print(Model(b=1, c=2))
# > a=None b=1 c=2
try:
    Model(a=1, b=2)
except ValidationError as e:
    print(e)
    """
    1 validation error for Model
    c
      field required (type=value_error.missing)
    """

动态默认值的字段

default_factory
from datetime import datetime
from uuid import UUID, uuid4
from pydantic import BaseModel, Field


class Model(BaseModel):
    uid: UUID = Field(default_factory=uuid4)
    updated: datetime = Field(default_factory=datetime.utcnow)


m1 = Model()
m2 = Model()
print(f'{m1.uid} != {m2.uid}')
#> 27ce808f-9293-47ac-860e-aebe5d6ffac7 != a583211f-b357-4b92-9632-cd1b3bbd2e1b
print(f'{m1.updated} != {m2.updated}')
#> 2020-10-28 20:03:32.840916 != 2020-10-28 20:03:32.840934

私有模型属性

from datetime import datetime
from random import randint
from typing import Optional

from pydantic import BaseModel, PrivateAttr


class TimeAwareModel(BaseModel):
    _processed_at: datetime = PrivateAttr(default_factory=datetime.now)
    _secret_value: str = PrivateAttr()
    aaaaaaa: Optional[str]

    def __init__(self, **data):
        super().__init__(**data)
        # this could also be done with default_factory
        self._secret_value = randint(1, 5)


m = TimeAwareModel()
print(m._processed_at)
# > 2020-10-28 20:03:33.179004
print(m._secret_value)
# > 5
print(m.dict()) # 这里
# {'aaaaaaa': None} 
 

工具类解析函数

parse_obj_as parse_file_as parse_raw_as
类似于parse_obj, 但是更易用.
from typing import List

from pydantic import BaseModel, parse_obj_as


class Item(BaseModel):
    id: int
    name: str


# `item_data` could come from an API call, eg., via something like:
# item_data = requests.get('https://my-api.com/items').json()
item_data = [{'id': 1, 'name': 'My Item'}]

items = parse_obj_as(List[Item], item_data)
print(items)
#> [Item(id=1, name='My Item')]

强制转换

from pydantic import BaseModel


class Model(BaseModel):
    a: int
    b: float
    c: str


print(Model(a=3.1415, b=' 2.72 ', c=123).dict())
#> {'a': 3, 'b': 2.72, 'c': '123'}

模型签名

给fastapi使用. 应用层没想到有什么用. 主要用于自省.
import inspect

from pydantic import BaseModel


class MyModel(BaseModel):
    id: int
    info: str = "Foo"

    def __init__(self, id: int = 1, *, bar: str, **data) -> None:
        """My custom init!"""
        super().__init__(id=id, bar=bar, **data)


print(inspect.signature(MyModel))
# > (id: int = 1, *, bar: str, info: str = 'Foo') -> None

字段类型

可迭代类型

from typing import Deque, Dict, FrozenSet, List, Optional, Sequence, Set, Tuple, Union

from pydantic import BaseModel


class Model(BaseModel):
    simple_list: list = None
    list_of_ints: List[int] = None

    simple_tuple: tuple = None
    tuple_of_different_types: Tuple[int, float, str, bool] = None

    simple_dict: dict = None
    dict_str_float: Dict[str, float] = None

    simple_set: set = None
    set_bytes: Set[bytes] = None
    frozen_set: FrozenSet[int] = None

    str_or_bytes: Union[str, bytes] = None
    none_or_str: Optional[str] = None

    sequence_of_ints: Sequence[int] = None

    compound: Dict[Union[str, bytes], List[Set[int]]] = None

    deque: Deque[int] = None


print(Model(simple_list=["1", "2", "3"]).simple_list)
# > ['1', '2', '3']
print(Model(list_of_ints=["1", "2", "3"]).list_of_ints)
# > [1, 2, 3]

print(Model(simple_dict={"a": 1, b"b": 2}).simple_dict)
# > {'a': 1, b'b': 2}
print(Model(dict_str_float={"a": 1, b"b": 2}).dict_str_float)
# > {'a': 1.0, 'b': 2.0}

print(Model(simple_tuple=[1, 2, 3, 4]).simple_tuple)
# > (1, 2, 3, 4)
print(Model(tuple_of_different_types=[4, 3, 2, 1]).tuple_of_different_types)
# > (4, 3.0, '2', True)

print(Model(sequence_of_ints=[1, 2, 3, 4]).sequence_of_ints)
# > [1, 2, 3, 4]
print(Model(sequence_of_ints=(1, 2, 3, 4)).sequence_of_ints)
# > (1, 2, 3, 4)

print(Model(deque=[1, 2, 3]).deque)
# > deque([1, 2, 3])

无限生成器

默认情况下, 生成器将被转换成list存储.
如果需要一个类似流的生成器. 可以使用Iterable
from typing import Iterable
from pydantic import BaseModel


class Model(BaseModel):
    infinite: Iterable[int]


def infinite_ints():
    i = 0
    while True:
        yield i
        i += 1


m = Model(infinite=infinite_ints())
print(m)
# > infinite=<generator object infinite_ints at 0x7fcb8ff44580>

for i in m.infinite:
    print(i)
    # > 0
    # > 1
    # > 2
    # > 3
    # > 4
    # > 5
    # > 6
    # > 7
    # > 8
    # > 9
    # > 10
    if i == 10:
        break

Union

from uuid import UUID
from typing import Union, Optional
from pydantic import BaseModel


class User(BaseModel):
    id: Union[int, str, UUID]
    name: str
    id2: Optional[list[Union[int, str, UUID]]] # 这里是可以完全混用的.


user_01 = User(id=123, name="John Doe", id2=[1, "2", "cf57432e-809e-4353-adbd-9d5c0d733868"])
print(user_01)
# > id=123 name='John Doe'
print(user_01.id)
# > 123
user_02 = User(id="1234", name="John Doe")
print(user_02)
# > id=1234 name='John Doe'
print(user_02.id)
# > 1234
user_03_uuid = UUID("cf57432e-809e-4353-adbd-9d5c0d733868")
user_03 = User(id=user_03_uuid, name="John Doe")
print(user_03)
# > id=275603287559914445491632874575877060712 name='John Doe'
print(user_03.id)
# > 275603287559914445491632874575877060712
print(user_03_uuid.int)
# > 275603287559914445491632874575877060712

Enum 和 Choice

from enum import Enum, IntEnum

from pydantic import BaseModel, ValidationError


class FruitEnum(str, Enum):
    pear = "pear"
    banana = "banana"


class ToolEnum(IntEnum):
    spanner = 1
    wrench = 2


class CookingModel(BaseModel):
    fruit: FruitEnum = FruitEnum.pear
    tool: ToolEnum = ToolEnum.spanner


print(CookingModel())
# > fruit=<FruitEnum.pear: 'pear'> tool=<ToolEnum.spanner: 1>
print(CookingModel(tool=2, fruit="banana"))
# > fruit=<FruitEnum.banana: 'banana'> tool=<ToolEnum.wrench: 2>
try:
    CookingModel(fruit="other")
except ValidationError as e:
    print(e)
    """
    1 validation error for CookingModel
    fruit
      value is not a valid enumeration member; permitted: 'pear', 'banana'
    (type=type_error.enum; enum_values=[<FruitEnum.pear: 'pear'>,
    <FruitEnum.banana: 'banana'>])
    """

Datetime

datetime 字段可以是:
  • datetime:存在 datetime 对象时
  • int 或 float:假定为 Unix 时间时。
    • 例如,自1970年1月1日以来的秒数 (如果 >= -2e10 或 <= 2e10) 或毫秒数 (如果 < -2e10 或 > 2e10)。
  • str:下面的格式可用时:
    • YYYY-MM-DD[T]HH:MM[:SS[.ffffff]][Z or [±]HH[:]MM]]]
    • 作为字符串的 int 或 floats (假定为Unix time)
date 字段可以是:
  • date:存在 date 对象时
  • int 或float: 参见 datetime
  • str:下面的格式可用时:
    • YYYY-MM-DD
    • int 或 float 参见 datetime
time 字段可以是:
  • time:存在 time 对象时
  • str:下面的格式可用时:
    • HH:MM[:SS[.ffffff]][Z or [±]HH[:]MM]]]
timedelta 字段可以是:
  • timedelta:存在 timedelta 对象时
  • int 或 float:假定为"秒"
  • str:下面的格式可用时:
    • [-][DD ][HH:MM]SS[.ffffff]
    • [±]P[DD]DT[HH]H[MM]M[SS]S (ISO 8601 的 timedelta 格式)
from datetime import date, datetime, time, timedelta
from pydantic import BaseModel


class Model(BaseModel):
    d: date = None
    dt: datetime = None
    t: time = None
    td: timedelta = None


m = Model(
    d=1966280412345.6789,
    dt='2032-04-23T10:20:30.400+02:30',
    t=time(4, 8, 16),
    td='P3DT12H30M5S',
)

print(m.dict())
"""
{
    'd': datetime.date(2032, 4, 22),
    'dt': datetime.datetime(2032, 4, 23, 10, 20, 30, 400000,
tzinfo=datetime.timezone(datetime.timedelta(seconds=9000))),
    't': datetime.time(4, 8, 16),
    'td': datetime.timedelta(days=3, seconds=45005),
}
"""

Boolean

对应的还有一个更严格的StrictBool
  • 有效的布尔值(例如 True 或 False);
  • 整数 0 或 1
  • 一个字符串,在转换为小写后,等于 0offffalsenno1onttrueyyes 其中之一;
  • 一个 bytes,在解码为 str 后,依据上一个规则有效。
from pydantic import BaseModel, ValidationError


class BooleanModel(BaseModel):
    bool_value: bool


print(BooleanModel(bool_value=False))
# > bool_value=False
print(BooleanModel(bool_value="False"))
# > bool_value=False
try:
    BooleanModel(bool_value=[])
except ValidationError as e:
    print(str(e))
    """
    1 validation error for BooleanModel
    bool_value
      value could not be parsed to a boolean (type=type_error.bool)
    """

Callable

💡
类型为 Callble 的字段只执行简单的检查以确定参数是否可调用;不执行参数、参数类型或返回类型的验证。
from typing import Callable
from pydantic import BaseModel


class Foo(BaseModel):
    callback: Callable[[int], int] # 参数, 返回值 参考typing


m = Foo(callback=lambda x: x)
print(m)
#> callback=<function <lambda> at 0x7fcb90290dc0>

Type

from typing import Type

from pydantic import BaseModel
from pydantic import ValidationError


class Foo:
    pass


class Bar(Foo):
    pass


class Other:
    pass


class SimpleModel(BaseModel):
    just_subclasses: Type[Foo]


SimpleModel(just_subclasses=Foo)
SimpleModel(just_subclasses=Bar)
try:
    SimpleModel(just_subclasses=Other)
except ValidationError as e:
    print(e)
    """
    1 validation error for SimpleModel
    just_subclasses
      subclass of Foo expected (type=type_error.subclass; expected_class=Foo)
    """
或任意类
from typing import Type

from pydantic import BaseModel, ValidationError


class Foo:
    pass


class LenientSimpleModel(BaseModel):
    any_class_goes: Type


LenientSimpleModel(any_class_goes=int)
LenientSimpleModel(any_class_goes=Foo)
try:
    LenientSimpleModel(any_class_goes=Foo())
except ValidationError as e:
    print(e)
    """
    1 validation error for LenientSimpleModel
    any_class_goes
      a class is expected (type=type_error.class)
    """

TypeVar

元类型
from typing import TypeVar
from pydantic import BaseModel

Foobar = TypeVar("Foobar")
BoundFloat = TypeVar("BoundFloat", bound=float)
IntStr = TypeVar("IntStr", int, str)


class Model(BaseModel):
    a: Foobar  # equivalent of ": Any"
    b: BoundFloat  # equivalent of ": float"
    c: IntStr  # equivalent of ": Union[int, str]"


print(Model(a=[1], b=4.2, c="x"))
# > a=[1] b=4.2 c='x'

# a may be None and is therefore optional
print(Model(b=1, c=1))
# > a=None b=1.0 c=1

Literal类型

字面量类型
from typing import Literal

from pydantic import BaseModel, ValidationError


class Pie(BaseModel):
    flavor: Literal['apple', 'pumpkin']


Pie(flavor='apple')
Pie(flavor='pumpkin')
try:
    Pie(flavor='cherry')
except ValidationError as e:
    print(str(e))
    """
    1 validation error for Pie
    flavor
      unexpected value; permitted: 'apple', 'pumpkin'
    (type=value_error.const; given=cherry; permitted=('apple', 'pumpkin'))
    """

Pydantic 类型

FilePath DirectoryPath EmailStr NameEmail PyObject Color Json PaymentCardNumber AnyUrl AnyHttpUrl HttpUrl PostgresDsn RedisDsn stricturl
UUID1 UUID3 UUID4 UUID5 SecretBytes SecretStr IPvAnyAddress IPvAnyInterface IPvAnyNetwork NegativeFloat NegativeInt PositiveFloat PositiveInt
conbytes condecimal confloat conint conlist conset constr
 

约束类型

from decimal import Decimal

from pydantic import (
    BaseModel,
    NegativeFloat,
    NegativeInt,
    PositiveFloat,
    PositiveInt,
    conbytes,
    condecimal,
    confloat,
    conint,
    conlist,
    conset,
    constr,
    Field,
)


class Model(BaseModel):
    short_bytes: conbytes(min_length=2, max_length=10)
    strip_bytes: conbytes(strip_whitespace=True)

    short_str: constr(min_length=2, max_length=10)
    regex_str: constr(regex=r"^apple (pie|tart|sandwich)$")
    strip_str: constr(strip_whitespace=True)

    big_int: conint(gt=1000, lt=1024)
    mod_int: conint(multiple_of=5)
    pos_int: PositiveInt
    neg_int: NegativeInt

    big_float: confloat(gt=1000, lt=1024)
    unit_interval: confloat(ge=0, le=1)
    mod_float: confloat(multiple_of=0.5)
    pos_float: PositiveFloat
    neg_float: NegativeFloat

    short_list: conlist(int, min_items=1, max_items=4)
    short_set: conset(int, min_items=1, max_items=4)

    decimal_positive: condecimal(gt=0)
    decimal_negative: condecimal(lt=0)
    decimal_max_digits_and_places: condecimal(max_digits=2, decimal_places=2)
    mod_decimal: condecimal(multiple_of=Decimal("0.25"))

    bigger_int: int = Field(..., gt=10000)

Strict类型

不允许进行默认的强制转换
from pydantic import BaseModel, StrictBool, StrictInt, ValidationError, confloat


class StrictIntModel(BaseModel):
    strict_int: StrictInt


try:
    StrictIntModel(strict_int=3.14159)
except ValidationError as e:
    print(e)
    """
    1 validation error for StrictIntModel
    strict_int
      value is not a valid integer (type=type_error.integer)
    """


class ConstrainedFloatModel(BaseModel):
    constrained_float: confloat(strict=True, ge=0.0)


try:
    ConstrainedFloatModel(constrained_float=3)
except ValidationError as e:
    print(e)
    """
    1 validation error for ConstrainedFloatModel
    constrained_float
      value is not a valid float (type=type_error.float)
    """

try:
    ConstrainedFloatModel(constrained_float=-1.23)
except ValidationError as e:
    print(e)
    """
    1 validation error for ConstrainedFloatModel
    constrained_float
      ensure this value is greater than or equal to 0.0
    (type=value_error.number.not_ge; limit_value=0.0)
    """


class StrictBoolModel(BaseModel):
    strict_bool: StrictBool


try:
    StrictBoolModel(strict_bool='False')
except ValidationError as e:
    print(str(e))
    """
    1 validation error for StrictBoolModel
    strict_bool
      value is not a valid boolean (type=value_error.strictbool)

自定义数据类型

__get_validators__ 的类

import re
from pydantic import BaseModel

# https://en.wikipedia.org/wiki/Postcodes_in_the_United_Kingdom#Validation
post_code_regex = re.compile(
    r"(?:"
    r"([A-Z]{1,2}[0-9][A-Z0-9]?|ASCN|STHL|TDCU|BBND|[BFS]IQQ|PCRN|TKCA) ?"
    r"([0-9][A-Z]{2})|"
    r"(BFPO) ?([0-9]{1,4})|"
    r"(KY[0-9]|MSR|VG|AI)[ -]?[0-9]{4}|"
    r"([A-Z]{2}) ?([0-9]{2})|"
    r"(GE) ?(CX)|"
    r"(GIR) ?(0A{2})|"
    r"(SAN) ?(TA1)"
    r")"
)


class PostCode(str):
    """
    Partial UK postcode validation. Note: this is just an example, and is not
    intended for use in production; in particular this does NOT guarantee
    a postcode exists, just that it has a valid format.
    """

    @classmethod
    def __get_validators__(cls):
        # one or more validators may be yielded which will be called in the
        # order to validate the input, each validator will receive as an input
        # the value returned from the previous validator
        yield cls.validate

    @classmethod
    def __modify_schema__(cls, field_schema):
        # __modify_schema__ should mutate the dict it receives in place,
        # the returned value will be ignored
        field_schema.update(
            # simplified regex here for brevity, see the wikipedia link above
            pattern="^[A-Z]{1,2}[0-9][A-Z0-9]? ?[0-9][A-Z]{2}$",
            # some example postcodes
            examples=["SP11 9DG", "w1j7bu"],
        )

    @classmethod
    def validate(cls, v):
        if not isinstance(v, str):
            raise TypeError("string required")
        m = post_code_regex.fullmatch(v.upper())
        if not m:
            raise ValueError("invalid postcode format")
        # you could also return a string here which would mean model.post_code
        # would be a string, pydantic won't care but you could end up with some
        # confusion since the value's type won't match the type annotation
        # exactly
        return cls(f"{m.group(1)} {m.group(2)}")

    def __repr__(self):
        return f"PostCode({super().__repr__()})"


class Model(BaseModel):
    post_code: PostCode


model = Model(post_code="sw8 5el")
print(model)
# > post_code=PostCode('SW8 5EL')
print(model.post_code)
# > SW8 5EL
print(Model.schema())
"""
{
    'title': 'Model',
    'type': 'object',
    'properties': {
        'post_code': {
            'title': 'Post Code',
            'pattern': '^[A-Z]{1,2}[0-9][A-Z0-9]? ?[0-9][A-Z]{2}$',
            'examples': ['SP11 9DG', 'w1j7bu'],
            'type': 'string',
        },
    },
    'required': ['post_code'],
}
"""

允许任意类型

arbitrary_types_allowed
所有类型必须是可验证的, 随意定义的类型必须继承自BaseModel, 或者arbitrary_types_allowed=True
from pydantic import BaseModel, ValidationError


# This is not a pydantic model, it's an arbitrary class
class Pet:
    def __init__(self, name: str):
        self.name = name


class Model(BaseModel):
    pet: Pet
    owner: str

    class Config:
        arbitrary_types_allowed = True


pet = Pet(name="Hedwig")
# A simple check of instance type is used to validate the data
model = Model(owner="Harry", pet=pet)
print(model)
# > pet=<types_arbitrary_allowed.Pet object at 0x7fcb902cee50> owner='Harry'
print(model.pet)
# > <types_arbitrary_allowed.Pet object at 0x7fcb902cee50>
print(model.pet.name)
# > Hedwig
print(type(model.pet))
# > <class 'types_arbitrary_allowed.Pet'>
try:
    # If the value is not an instance of the type, it's invalid
    Model(owner="Harry", pet="Hedwig")
except ValidationError as e:
    print(e)
    """
    1 validation error for Model
    pet
      instance of Pet expected (type=type_error.arbitrary_type;
    expected_arbitrary_type=Pet)
    """
# Nothing in the instance of the arbitrary type is checked
# Here name probably should have been a str, but it's not validated
pet2 = Pet(name=42)
model2 = Model(owner="Harry", pet=pet2)
print(model2)
# > pet=<types_arbitrary_allowed.Pet object at 0x7fcb902ce4f0> owner='Harry'
print(model2.pet)
# > <types_arbitrary_allowed.Pet object at 0x7fcb902ce4f0>
print(model2.pet.name)
# > 42
print(type(model2.pet))
# > <class 'types_arbitrary_allowed.Pet'>

泛型类作为类型

前面介绍的是泛型类, 这里的是泛型类作为类型.
就是一个泛型类包含__get_validators__
from pydantic import BaseModel, ValidationError
from pydantic.fields import ModelField
from typing import TypeVar, Generic

AgedType = TypeVar("AgedType")
QualityType = TypeVar("QualityType")


# This is not a pydantic model, it's an arbitrary generic class
class TastingModel(Generic[AgedType, QualityType]):
    def __init__(self, name: str, aged: AgedType, quality: QualityType):
        self.name = name
        self.aged = aged
        self.quality = quality

    @classmethod
    def __get_validators__(cls):
        yield cls.validate

    @classmethod
    # You don't need to add the "ModelField", but it will help your
    # editor give you completion and catch errors
    def validate(cls, v, field: ModelField):
        if not isinstance(v, cls):
            # The value is not even a TastingModel
            raise TypeError("Invalid value")
        if not field.sub_fields:
            # Generic parameters were not provided so we don't try to validate
            # them and just return the value as is
            return v
        aged_f = field.sub_fields[0]
        quality_f = field.sub_fields[1]
        errors = []
        # Here we don't need the validated value, but we want the errors
        valid_value, error = aged_f.validate(v.aged, {}, loc="aged")
        if error:
            errors.append(error)
        # Here we don't need the validated value, but we want the errors
        valid_value, error = quality_f.validate(v.quality, {}, loc="quality")
        if error:
            errors.append(error)
        if errors:
            raise ValidationError(errors, cls)
        # Validation passed without errors, return the same instance received
        return v


class Model(BaseModel):
    # for wine, "aged" is an int with years, "quality" is a float
    wine: TastingModel[int, float]
    # for cheese, "aged" is a bool, "quality" is a str
    cheese: TastingModel[bool, str]
    # for thing, "aged" is a Any, "quality" is Any
    thing: TastingModel


model = Model(
    # This wine was aged for 20 years and has a quality of 85.6
    wine=TastingModel(name="Cabernet Sauvignon", aged=20, quality=85.6),
    # This cheese is aged (is mature) and has "Good" quality
    cheese=TastingModel(name="Gouda", aged=True, quality="Good"),
    # This Python thing has aged "Not much" and has a quality "Awesome"
    thing=TastingModel(name="Python", aged="Not much", quality="Awesome"),
)
print(model)
"""
wine=<types_generics.TastingModel object at 0x7fcb903271f0>
cheese=<types_generics.TastingModel object at 0x7fcb903272e0>
thing=<types_generics.TastingModel object at 0x7fcb903276a0>
"""
print(model.wine.aged)
# > 20
print(model.wine.quality)
# > 85.6
print(model.cheese.aged)
# > True
print(model.cheese.quality)
# > Good
print(model.thing.aged)
# > Not much
try:
    # If the values of the sub-types are invalid, we get an error
    Model(
        # For wine, aged should be an int with the years, and quality a float
        wine=TastingModel(name="Merlot", aged=True, quality="Kinda good"),
        # For cheese, aged should be a bool, and quality a str
        cheese=TastingModel(name="Gouda", aged="yeah", quality=5),
        # For thing, no type parameters are declared, and we skipped validation
        # in those cases in the Assessment.validate() function
        thing=TastingModel(name="Python", aged="Not much", quality="Awesome"),
    )
except ValidationError as e:
    print(e)
    """
    2 validation errors for Model
    wine -> quality
      value is not a valid float (type=type_error.float)
    cheese -> aged
      value could not be parsed to a boolean (type=type_error.bool)
    """

验证器

  • 验证器是类方法
  • 第二个参数总是要验证的字段值。
  • 可以将下面参数的任意子集添加到签名中(参数名必须匹配)
    • values 之前已验证字段的字段名到字段值的映射。注意字段定义的顺序.
    • config 模型配置。
    • field 要验证的字段。
    • *kwargs 如果提供,则包含上面这些参数中未在签名中显式列出的参数。
  • 验证器应该返回解析的值或者引发 ValueError、TypeError 或 AssertionError (可以使用 assert 语句)。
  • 验证按照字段被定义的顺序完成。
  • 如果在其他字段上验证失败 (或那个字段缺失),则它不会被包含在 values 中
from pydantic import BaseModel, ValidationError, validator


class UserModel(BaseModel):
    name: str
    username: str
    password1: str
    password2: str

    @validator("name")
    def name_must_contain_space(cls, v):
        if " " not in v:
            raise ValueError("must contain a space")
        return v.title()

    @validator("password2")
    def passwords_match(cls, v, values, **kwargs):
        if "password1" in values and v != values["password1"]:
            raise ValueError("passwords do not match")
        return v

    @validator("username")
    def username_alphanumeric(cls, v):
        assert v.isalnum(), "must be alphanumeric"
        return v


user = UserModel(
    name="samuel colvin",
    username="scolvin",
    password1="zxcvbn",
    password2="zxcvbn",
)
print(user)
# > name='Samuel Colvin' username='scolvin' password1='zxcvbn' password2='zxcvbn'

try:
    UserModel(
        name="samuel",
        username="scolvin",
        password1="zxcvbn",
        password2="zxcvbn2",
    )
except ValidationError as e:
    print(e)
    """
    2 validation errors for UserModel
    name
      must contain a space (type=value_error)
    password2
      passwords do not match (type=value_error)
    """

Pre 和 per-item 验证器

 
  • 可以通过给单个验证器传递多个字段名称将其应用于多个字段。
  • 可以通过给单个验证器传递特殊的 * 值将其应用于所有字段。
  • 关键字参数 pre 将会导致该验证器的调用优先于其他验证器。
  • 传递 each_item=True 将会导致验证器应用于可迭代对象中的每个值(例如,List、Dict 和 Set 等),而不是整个对象。
    • each_item 对子类不生效
from typing import List
from pydantic import BaseModel, ValidationError, validator


class DemoModel(BaseModel):
    square_numbers: List[int] = []
    cube_numbers: List[int] = []

    # '*' is the same as 'cube_numbers', 'square_numbers' here:
    @validator("*", pre=True)
    def split_str(cls, v):
        if isinstance(v, str):
            return v.split("|")
        return v

    @validator("cube_numbers", "square_numbers")
    def check_sum(cls, v):
        if sum(v) > 42:
            raise ValueError("sum of numbers greater than 42")
        return v

    @validator("square_numbers", each_item=True)
    def check_squares(cls, v):
        assert v ** 0.5 % 1 == 0, f"{v} is not a square number"
        return v

    @validator("cube_numbers", each_item=True)
    def check_cubes(cls, v):
        # 64 ** (1 / 3) == 3.9999999999999996 (!)
        # this is not a good way of checking cubes
        assert v ** (1 / 3) % 1 == 0, f"{v} is not a cubed number"
        return v


print(DemoModel(square_numbers=[1, 4, 9]))
# > square_numbers=[1, 4, 9] cube_numbers=[]
print(DemoModel(square_numbers="1|4|16"))
# > square_numbers=[1, 4, 16] cube_numbers=[]
print(DemoModel(square_numbers=[16], cube_numbers=[8, 27]))
# > square_numbers=[16] cube_numbers=[8, 27]
try:
    DemoModel(square_numbers=[1, 4, 2])
except ValidationError as e:
    print(e)
    """
    1 validation error for DemoModel
    square_numbers -> 2
      2 is not a square number (type=assertion_error)
    """

try:
    DemoModel(cube_numbers=[27, 27])
except ValidationError as e:
    print(e)
    """
    1 validation error for DemoModel
    cube_numbers
      sum of numbers greater than 42 (type=value_error)
    """



from typing import List
from pydantic import BaseModel, ValidationError, validator


class ParentModel(BaseModel):
    names: List[str]


class ChildModel(ParentModel):
    @validator('names', each_item=True)
    def check_names_not_empty(cls, v):
        assert v != '', 'Empty strings are not allowed.'
        return v


# This will NOT raise a ValidationError because the validator was not called
try:
    child = ChildModel(names=['Alice', 'Bob', 'Eve', ''])
except ValidationError as e:
    print(e)
else:
    print('No ValidationError caught.')
    #> No ValidationError caught.


class ChildModel2(ParentModel):
    @validator('names')
    def check_names_not_empty(cls, v):
        for name in v:
            assert name != '', 'Empty strings are not allowed.'
        return v


try:
    child = ChildModel2(names=['Alice', 'Bob', 'Eve', ''])
except ValidationError as e:
    print(e)
    """
    1 validation error for ChildModel2
    names
      Empty strings are not allowed. (type=assertion_error)
    """

Always验证

  • 出于性能原因,在没有提供值时,默认情况下不会为字段调用验证器。
  • always=True 会强制进行.
  • 最好和 pre=True 一起使用,否则,当 always=True 时,pydantic会尝试对默认值 None 进行验证,从而导致错误。
    • validator是额外的验证. 还会有默认的格式验证, 但是默认的None格式错误.所以需要和pre一起使用.
from datetime import datetime

from pydantic import BaseModel, validator


class DemoModel(BaseModel):
    ts: datetime = None

    @validator("ts", pre=True, always=True)
    def set_ts_now(cls, v):
        return v or datetime.now()


print(DemoModel())
# > ts=datetime.datetime(2020, 10, 28, 20, 3, 33, 424317)
print(DemoModel(ts="2017-11-08T14:00"))
# > ts=datetime.datetime(2017, 11, 8, 14, 0)

复用验证器

然而我并没有觉得很有用.. 除了省了一个cls
from pydantic import BaseModel, validator


def normalize(name: str) -> str:
    return ' '.join((word.capitalize()) for word in name.split(' '))


class Producer(BaseModel):
    name: str

    # validators
    _normalize_name = validator('name', allow_reuse=True)(normalize)# validator是一个装饰器和前文中的一样.


class Consumer(BaseModel):
    name: str

    # validators
    _normalize_name = validator('name', allow_reuse=True)(normalize)


jane_doe = Producer(name='JaNe DOE')
john_doe = Consumer(name='joHN dOe')
assert jane_doe.name == 'Jane Doe'
assert john_doe.name == 'John Doe'
或者这样写
from pydantic import validator, BaseModel


@validator('name', allow_reuse=True)
def normalize(name: str) -> str:
  return ' '.join((word.capitalize()) for word in name.split(' '))


class Producer(BaseModel):
  name: str

  # validators
  _normalize_name = normalize


class Consumer(BaseModel):
  name: str

  # validators
  _normalize_name = normalize


jane_doe = Producer(name='JaNe DOE')
john_doe = Consumer(name='joHN dOe')
assert jane_doe.name == 'Jane Doe'
assert john_doe.name == 'John Doe'

根验证器

root_validator
顺序是
  • pre=True
    • root_validator —> 各字段validator
  • pre=False
    • 各字段validator —> root_validator
 
如果 pre=True 时根验证器引发错误,则不会进行字段验证。
当 pre=False 时,即使之前的验证器失败,默认情况下也会调用根验证器;
可以通过为验证器设置 skip_on_failure=True 关键字参数来改变这种行为。
from pydantic import BaseModel, ValidationError, root_validator


class UserModel(BaseModel):
    username: str
    password1: str
    password2: str

    @root_validator(pre=True)
    def check_card_number_omitted(cls, values):
        assert "card_number" not in values, "card_number should not be included"
        return values

    @root_validator
    def check_passwords_match(cls, values):
        pw1, pw2 = values.get("password1"), values.get("password2")
        if pw1 is not None and pw2 is not None and pw1 != pw2:
            raise ValueError("passwords do not match")
        return values


print(UserModel(username="scolvin", password1="zxcvbn", password2="zxcvbn"))
# > username='scolvin' password1='zxcvbn' password2='zxcvbn'
try:
    UserModel(username="scolvin", password1="zxcvbn", password2="zxcvbn2")
except ValidationError as e:
    print(e)
    """
    1 validation error for UserModel
    __root__
      passwords do not match (type=value_error)
    """

try:
    UserModel(
        username="scolvin",
        password1="zxcvbn",
        password2="zxcvbn",
        card_number="1234",
    )
except ValidationError as e:
    print(e)
    """
    1 validation error for UserModel
    __root__
      card_number should not be included (type=assertion_error)
    """

字段检查

默认情况下, 创建一个对象的时候,会去检查validators里面要检查的字段是否都存在. 但是如果一个子类的validator 要去检查父类中的某个字段. 默认情况下会报错.
可以通过check_fields=False解决

Dataclass 验证器

Dataclass 是python3标准库里面的一个类库.
可以自动生成类的方法 根据 typehint
这里的验证器可以和Dataclass配合,而不需要继承 DataModel
from datetime import datetime

from pydantic import validator
from pydantic.dataclasses import dataclass


@dataclass
class DemoDataclass:
    ts: datetime = None

    @validator("ts", pre=True, always=True)
    def set_ts_now(cls, v):
        return v or datetime.now()


print(DemoDataclass())
# > DemoDataclass(ts=datetime.datetime(2020, 11, 1, 15, 49, 0, 182639))
print(DemoDataclass(ts="2017-11-08T14:00"))
# > DemoDataclass(ts=datetime.datetime(2017, 11, 8, 14, 0))

模型配置

class Config
  • title
    • 生成的 JSON 模式的标题。
  • anystr_strip_whitespace
    • 是否移除 str 和 bytes 类型中前导和尾随的空白字符(默认值为 False)。
  • min_anystr_length
    • str 和 bytes 类型的最小长度 (默认为 0)。
  • max_anystr_length
    • str 和 bytes 类型的最小长度 (默认为 2 ** 16)。
  • validate_all
    • 是否验证字段的默认值 (默认为 False)。
  • extra
    • 在模型初始化时是否忽略、允许或禁止额外的属性
      可接受字符串值 ignore、allow 或 forbid 以及 Extra 枚举 (例如, Extra.ignore)。
      如果模型包含了额外的属性,则 forbid 将会导致验证失败。
      ignore 将静默忽略任何额外属性。
      allow 将属性分配给模型。
  • allow_mutation
    • 模型是否是伪不可变的。例如,是否允许 setattr (默认为 True)。
  • use_enum_values
    • 默认值False
  • fields
    • 包含每个字段的模式信息的字典; 这等效于使用 [Field 类] (默认为 None)。
  • validate_assignment
    • 是否对属性的赋值执行验证 (默认为 False)。
  • allow_population_by_field_name
    • 是否可以用模型属性给出的名称填充别名字段,以及别名 (默认为 False )。
  • error_msg_templates
    • 用于覆盖默认错误消息模板的字典。传入一个字典,其中的键与您想要覆盖的错误消息相匹配 (默认为 {})。
  • arbitrary_types_allowed
    • Field 必须是 baseModel的子类 或 不是
  • orm_mode
    • 是否允许使用 [ORM 模式]
  • getter_dict
    • 一个自定义类 (应该继承自 GetterDict),用于分解ORM类进行验证,并与 orm_mode 一起使用
  • alias_generator
    • 接受字段名并返回其别名的可调用对象。
  • keep_untouched
    •  
  • schema_extra
    • 用于 拓展/更新 生成的 JSON 模式的字典,或用于对其进行后处理(post-process)的可调用对象。
  • json_loads
    • 用于解码 JSON 的自定义函数。
  • json_dumps
    • 用于编码 JSON的自定义函数。
  • json_encoders
    • 用于自定义类型被编码成JSON的方式的字典。
  • underscore_attrs_are_private
    • 是否将任何下划线非类 (non-class) 变量属性当做私有属性,或让它们保持原样。
也可以使用@dataclass
from datetime import datetime

from pydantic import ValidationError
from pydantic.dataclasses import dataclass


class MyConfig:
    max_anystr_length = 10
    validate_assignment = True
    error_msg_templates = {
        'value_error.any_str.max_length': 'max_length:{limit_value}',
    }


@dataclass(config=MyConfig)
class User:
    id: int
    name: str = 'John Doe'
    signup_ts: datetime = None


user = User(id='42', signup_ts='2032-06-21T12:00')
try:
    user.name = 'x' * 20
except ValidationError as e:
    print(e)
    """
    1 validation error for User
    name
      max_length:10 (type=value_error.any_str.max_length; limit_value=10)
    """

别名生成器

果数据源字段名称与您的代码样式不匹配 (例如CamelCase字段),则可以使用 alias_generator 自动生成别名:
from pydantic import BaseModel


def to_camel(string: str) -> str:
    return "".join(word.capitalize() for word in string.split("_"))


class Voice(BaseModel):
    name: str
    language_code: str

    class Config:
        alias_generator = to_camel


voice = Voice(Name="Filiz", LanguageCode="tr-TR")
print(voice.language_code)
# > tr-TR
print(voice.dict(by_alias=True))
# > {'Name': 'Filiz', 'LanguageCode': 'tr-TR'}

别名优先

在一个字段的别名可能被定义在多个地方的情况下,选择的值按如下规则确定(按优先级降序):
  1. 在模型上直接通过 Field(..., alias=<alias>) 设置。
  1. 在模型上的 Config.fields 中定义。
  1. 在父模型上通过 Field(..., alias=<alias>)。
  1. 在父模型上的 Config.fields 中定义。
  1. 由 alias_generator 生成,无论它是在模型上还是在父模型上。
from pydantic import BaseModel, Field


class Voice(BaseModel):
    name: str = Field(None, alias="ActorName")
    language_code: str = None
    mood: str = None


class Character(Voice):
    act: int = 1

    class Config:
        fields = {"language_code": "lang"}

        @classmethod
        def alias_generator(cls, string: str) -> str:
            # this is the same as `alias_generator = to_camel` above
            return "".join(word.capitalize() for word in string.split("_"))


print(Character.schema(by_alias=True))
"""
{
    'title': 'Character',
    'type': 'object',
    'properties': {
        'ActorName': {'title': 'Actorname', 'type': 'string'},
        'lang': {'title': 'Lang', 'type': 'string'},
        'Mood': {'title': 'Mood', 'type': 'string'},
        'Act': {'title': 'Act', 'default': 1, 'type': 'integer'},
    },
}
"""

模式

没想明白哪里会用到..
BaseModel.schema —> 字典
BaseModel.schema_json
Pydantic 允许从模型自动创建 JSON 模式:
from enum import Enum
from pydantic import BaseModel, Field


class FooBar(BaseModel):
    count: int
    size: float = None


class Gender(str, Enum):
    male = "male"
    female = "female"
    other = "other"
    not_given = "not_given"


class MainModel(BaseModel):
    """
    This is the description of the main model
    """

    foo_bar: FooBar = Field(...)
    gender: Gender = Field(None, alias="Gender")
    snap: int = Field(
        42,
        title="The Snap",
        description="this is the value of snap",
        gt=30,
        lt=50,
    )

    class Config:
        title = "Main"


# this is equivalent to json.dumps(MainModel.schema(), indent=2):
print(MainModel.schema_json(indent=2))




{
  "title": "Main",
  "description": "This is the description of the main model",
  "type": "object",
  "properties": {
    "foo_bar": {
      "$ref": "#/definitions/FooBar"
    },
    "Gender": {
      "$ref": "#/definitions/Gender"
    },
    "snap": {
      "title": "The Snap",
      "description": "this is the value of snap",
      "default": 42,
      "exclusiveMinimum": 30,
      "exclusiveMaximum": 50,
      "type": "integer"
    }
  },
  "required": [
    "foo_bar"
  ],
  "definitions": {
    "FooBar": {
      "title": "FooBar",
      "type": "object",
      "properties": {
        "count": {
          "title": "Count",
          "type": "integer"
        },
        "size": {
          "title": "Size",
          "type": "number"
        }
      },
      "required": [
        "count"
      ]
    },
    "Gender": {
      "title": "Gender",
      "description": "An enumeration.",
      "enum": [
        "male",
        "female",
        "other",
        "not_given"
      ],
      "type": "string"
    }
  }
}

导出模型

除了 model.foobar 之外的访问model属性的其他几种方式.

model.dict

  • include
    • 包含在返回的字典中的字段。
  • exclude
    • 从返回的字典中排除的字段。
  • by_alias
    • 字段别名是否应该在返回的字典中作为键;默认为 False。
  • exclude_unset
    • 创建模型时未显式设置的字段是否应从返回的字典中排除;默认为 False。
  • exclude_defaults
    • 是否应从返回的字典中排除等于其默认值的字段 (无论是否设置);默认为 False。
  • exclude_none
    • 是否应从返回的字典中排除等于 None 的字段;默认为 False。

dict(model) 和迭代

使用这种方法,将返回原始字段值,因此子模型(BarModel)不会被转换为字典
from pydantic import BaseModel


class BarModel(BaseModel):
    whatever: int


class FooBarModel(BaseModel):
    banana: float
    foo: str
    bar: BarModel


m = FooBarModel(banana=3.14, foo='hello', bar={'whatever': 123})

print(dict(m))
"""
{
    'banana': 3.14,
    'foo': 'hello',
    'bar': BarModel(
        whatever=123, # 这里
    ),
}
"""
for name, value in m:
    print(f'{name}: {value}')
    #> banana: 3.14
    #> foo: hello
    #> bar: whatever=123 # 这里

model.copy(…)

  • include
    • 要包含在返回的字典中的字段
  • exclude
    • 要从返回的字典中排序的字典。
  • update
    • 创建复制的模型时要更改的值的字典。
  • deep
    • 是否对新模型进行深复制;默认为 False。
from pydantic import BaseModel


class BarModel(BaseModel):
    whatever: int


class FooBarModel(BaseModel):
    banana: float
    foo: str
    bar: BarModel


m = FooBarModel(banana=3.14, foo='hello', bar={'whatever': 123})

print(m.copy(include={'foo', 'bar'}))
#> foo='hello' bar=BarModel(whatever=123)
print(m.copy(exclude={'foo', 'bar'}))
#> banana=3.14
print(m.copy(update={'banana': 0}))
#> banana=0 foo='hello' bar=BarModel(whatever=123)
print(id(m.bar), id(m.copy().bar))
#> 140512307789344 140512307789344
# normal copy gives the same object reference for `bar`
print(id(m.bar), id(m.copy(deep=True).bar))
#> 140512307789344 140512307819952
# deep copy gives a new object reference for `bar`

model.json

💡
.json() 方法会将模型序列化为 JSON。通常,.json() 依次调用 .dict() 并序列化其结果。(对于具有自定义根类型的模型,在调用 .dict() 之后,仅序列化 __root__ 键的值)。
 
  • include
    • 要包含在返回的字典中的字段。
  • exclude
    • 要从返回的字典中排序的字典。
  • by_alias
    • 字段别名是否应该在返回的字典中作为键;默认为 False。
  • exclude_unset
    • 创建模型时未显式设置的字段是否应从返回的字典中排除;默认为 False。
  • exclude_defaults
    • 是否应从返回的字典中排除等于其默认值的字段 (无论是否设置);默认为 False。
  • exclude_none
    • 是否应从返回的字典中排除等于 None 的字段;默认为 False。
  • encoder
    • 传递给 json.dumps() 的 default 参数的自义编码器函数,默认为设计用于所有常见类型的自定义编码器。
  • *dumps_kwargs
    • 传递给 json.dumps() 的其他关键字参数。例如,indent。

json_encoders

自定义序列化方法
from datetime import datetime, timedelta
from pydantic import BaseModel
from pydantic.json import timedelta_isoformat


class WithCustomEncoders(BaseModel):
    dt: datetime
    diff: timedelta

    class Config:
        json_encoders = {
            datetime: lambda v: v.timestamp(),
            timedelta: timedelta_isoformat,
        }


m = WithCustomEncoders(dt=datetime(2032, 6, 1), diff=timedelta(hours=100))
print(m.json())
#> {"dt": 1969660800.0, "diff": "P4DT4H0M0.000000S"}

自定义 JSON反序列化

from datetime import datetime
import ujson
from pydantic import BaseModel


class User(BaseModel):
    id: int
    name = "John Doe"
    signup_ts: datetime = None

    class Config:
        json_loads = ujson.loads


user = User.parse_raw('{"id": 123,"signup_ts":1234567890,"name":"John Doe"}')
print(user)
# > id=123 signup_ts=datetime.datetime(2009, 2, 13, 23, 31, 30,
# > tzinfo=datetime.timezone.utc) name='John Doe'


from datetime import datetime
import orjson
from pydantic import BaseModel


def orjson_dumps(v, *, default):
    # orjson.dumps returns bytes, to match standard json.dumps we need to decode
    return orjson.dumps(v, default=default).decode()


class User(BaseModel):
    id: int
    name = "John Doe"
    signup_ts: datetime = None

    class Config:
        json_loads = orjson.loads
        json_dumps = orjson_dumps


user = User.parse_raw('{"id":123,"signup_ts":1234567890,"name":"John Doe"}')
print(user.json())
# > {"id":123,"signup_ts":"2009-02-13T23:31:30+00:00","name":"John Doe"}

pickle.dumps(model)

pydantic 支持pickle和unpickle

高级用法

  • include和exclude 是可以字典类型嵌套的.
  • ... 表示全部都要
  • __all__ 表示递归找某个键
from pydantic import BaseModel, SecretStr


class User(BaseModel):
    id: int
    username: str
    password: SecretStr


class Transaction(BaseModel):
    id: str
    user: User
    value: int


t = Transaction(
    id='1234567890',
    user=User(
        id=42,
        username='JohnDoe',
        password='hashedpassword'
    ),
    value=9876543210,
)

# using a set:
print(t.dict(exclude={'user', 'value'}))
#> {'id': '1234567890'}

# using a dict:
print(t.dict(exclude={'user': {'username', 'password'}, 'value': ...}))
#> {'id': '1234567890', 'user': {'id': 42}}

print(t.dict(include={'id': ..., 'user': {'id'}}))
#> {'id': '1234567890', 'user': {'id': 42}}


---------------------------------------------------------


import datetime
from typing import List

from pydantic import BaseModel, SecretStr


class Country(BaseModel):
    name: str
    phone_code: int


class Address(BaseModel):
    post_code: int
    country: Country


class CardDetails(BaseModel):
    number: SecretStr
    expires: datetime.date


class Hobby(BaseModel):
    name: str
    info: str


class User(BaseModel):
    first_name: str
    second_name: str
    address: Address
    card_details: CardDetails
    hobbies: List[Hobby]


user = User(
    first_name='John',
    second_name='Doe',
    address=Address(
        post_code=123456,
        country=Country(
            name='USA',
            phone_code=1
        )
    ),
    card_details=CardDetails(
        number=4212934504460000,
        expires=datetime.date(2020, 5, 1)
    ),
    hobbies=[
        Hobby(name='Programming', info='Writing code and stuff'),
        Hobby(name='Gaming', info='Hell Yeah!!!'),
    ],
)

exclude_keys = {
    'second_name': ...,
    'address': {'post_code': ..., 'country': {'phone_code'}},
    'card_details': ...,
    # You can exclude fields from specific members of a tuple/list by index:
    'hobbies': {-1: {'info'}},
}

include_keys = {
    'first_name': ...,
    'address': {'country': {'name'}},
    'hobbies': {0: ..., -1: {'name'}},
}

# would be the same as user.dict(exclude=exclude_keys) in this case:
print(user.dict(include=include_keys))
"""
{
    'first_name': 'John',
    'address': {'country': {'name': 'USA'}},
    'hobbies': [
        {
            'name': 'Programming',
            'info': 'Writing code and stuff',
        },
        {'name': 'Gaming'},
    ],
}
"""

# To exclude a field from all members of a nested list or tuple, use "__all__":
print(user.dict(exclude={'hobbies': {'__all__': {'info'}}}))
"""
{
    'first_name': 'John',
    'second_name': 'Doe',
    'address': {
        'post_code': 123456,
        'country': {'name': 'USA', 'phone_code': 1},
    },
    'card_details': {
        'number': SecretStr('**********'),
        'expires': datetime.date(2020, 5, 1),
    },
    'hobbies': [{'name': 'Programming'}, {'name': 'Gaming'}],
}
"""
 
 

© chaleaoch 2021