Pydantic入门(2)

date
Mar 31, 2021
slug
23
status
Published
tags
Python
summary
type
Post

Dataclass

此dataclass 非标准库中的dataclass
from datetime import datetime
from pydantic.dataclasses import dataclass


@dataclass
class User:
    id: int
    name: str = 'John Doe'
    signup_ts: datetime = None


user = User(id='42', signup_ts='2032-06-21T12:00')
print(user)
#> User(id=42, name='John Doe', signup_ts=datetime.datetime(2032, 6, 21, 12, 0))

验证装饰器

validate_arguments
默认会对参数进行强制类型转换,但是不会处理返回值(强转和验证都不处理)
以装饰器的方式验证函数参数. 更方便一些.
from pydantic import validate_arguments, ValidationError


@validate_arguments
def repeat(s: str, count: int, *, separator: bytes = b'') -> bytes:
    b = s.encode()
    return separator.join(b for _ in range(count))


a = repeat('hello', 3)
print(a)
#> b'hellohellohello'

b = repeat('x', '4', separator=' ')
print(b)
#> b'x x x x'

try:
    c = repeat('hello', 'wrong')
except ValidationError as exc:
    print(exc)
    """
    1 validation error for Repeat
    count
      value is not a valid integer (type=type_error.integer)
    """
from pydantic import validate_arguments


@validate_arguments
def pos_or_kw(a: int, b: int = 2) -> str:
    return f"a={a} b={b}"


print(pos_or_kw(1))
# > a=1 b=2
print(pos_or_kw(a=1))
# > a=1 b=2
print(pos_or_kw(1, 3))
# > a=1 b=3
print(pos_or_kw(a=1, b=3))
# > a=1 b=3


@validate_arguments
def kw_only(*, a: int, b: int = 2) -> str:
    return f"a={a} b={b}"


print(kw_only(a=1))
# > a=1 b=2
print(kw_only(a=1, b=3))
# > a=1 b=3


@validate_arguments
def pos_only(a: int, b: int = 2, /) -> str:  # python 3.8 only
    return f"a={a} b={b}"


print(pos_only(1))
# > a=1 b=2
print(pos_only(1, 2))
# > a=1 b=2


@validate_arguments
def var_args(*args: int) -> str:
    return str(args)


print(var_args(1))
# > (1,)
print(var_args(1, 2))
# > (1, 2)
print(var_args(1, 2, 3))
# > (1, 2, 3)


@validate_arguments
def var_kwargs(**kwargs: int) -> str:
    return str(kwargs)


print(var_kwargs(a=1))
# > {'a': 1}
print(var_kwargs(a=1, b=2))
# > {'a': 1, 'b': 2}


@validate_arguments
def armageddon(
    a: int,
    /,  # python 3.8 only
    b: int,
    c: int = None,
    *d: int,
    e: int,
    f: int = None,
    **g: int,
) -> str:
    return f"a={a} b={b} c={c} d={d} e={e} f={f} g={g}"


print(armageddon(1, 2, e=3))
# > a=1 b=2 c=None d=() e=3 f=None g={}
print(armageddon(1, 2, 3, 4, 5, 6, c=7, e=8, f=9, g=10, spam=11))
# > a=1 b=2 c=7 d=(4, 5, 6) e=8 f=9 g={'spam': 11}

访问原始函数

为了提高性能.
from pydantic import validate_arguments


@validate_arguments
def repeat(s: str, count: int, *, separator: bytes = b'') -> bytes:
    b = s.encode()
    return separator.join(b for _ in range(count))


a = repeat('hello', 3)
print(a)
#> b'hellohellohello'

b = repeat.raw_function('good bye', 2, separator=b', ')
print(b)
#> b'good bye, good bye'

异步函数也没问题

import asyncio
from pydantic import PositiveInt, ValidationError, validate_arguments


@validate_arguments
async def get_user_email(user_id: PositiveInt):
    # `conn` is some fictional connection to a database
    email = await conn.execute('select email from users where id=$1', user_id)
    if email is None:
        raise RuntimeError('user not found')
    else:
        return email


async def main():
    email = await get_user_email(123)
    print(email)
    #> testing@example.com
    try:
        await get_user_email(-4)
    except ValidationError as exc:
        print(exc.errors())
        """
        [
            {
                'loc': ('user_id',),
                'msg': 'ensure this value is greater than 0',
                'type': 'value_error.number.not_gt',
                'ctx': {'limit_value': 0},
            },
        ]
        """


asyncio.run(main())

自定义配置

等同于在普通模型中设置 Config 子类
不支持自定义的 Config 上的 fields 和 alias_generator
from pydantic import ValidationError, validate_arguments


class Foobar:
    def __init__(self, v: str):
        self.v = v

    def __add__(self, other: 'Foobar') -> str:
        return f'{self} + {other}'

    def __str__(self) -> str:
        return f'Foobar({self.v})'


@validate_arguments(config=dict(arbitrary_types_allowed=True))
def add_foobars(a: Foobar, b: Foobar):
    return a + b


c = add_foobars(Foobar('a'), Foobar('b'))
print(c)
#> Foobar(a) + Foobar(b)

try:
    add_foobars(1, 2)
except ValidationError as e:
    print(e)
    """
    2 validation errors for AddFoobars
    a
      instance of Foobar expected (type=type_error.arbitrary_type;
    expected_arbitrary_type=Foobar)
    b
      instance of Foobar expected (type=type_error.arbitrary_type;
    expected_arbitrary_type=Foobar)
    """

设置管理

做配置类, 读环境变量赋值给一个类. 后面会解释如何解析这段代码.
import os
from typing import Set

from pydantic import (
    BaseModel,
    BaseSettings,
    PyObject,
    RedisDsn,
    PostgresDsn,
    Field,
)


class SubModel(BaseModel):
    foo = "bar"
    apple = 1


class Settings(BaseSettings):
    auth_key: str
    api_key: str = Field(..., env="my_api_key")

    redis_dsn: RedisDsn = "redis://user:pass@localhost:6379/1"
    pg_dsn: PostgresDsn = "postgres://user:pass@localhost:5432/foobar"

    special_function: PyObject = "math.cos"

    # to override domains:
    # export my_prefix_domains='["foo.com", "bar.com"]'
    domains: Set[str] = set()

    # to override more_settings:
    # export my_prefix_more_settings='{"foo": "x", "apple": 1}'
    more_settings: SubModel = SubModel()

    class Config:
				case_sensitive = True  # 默认忽略大小写
        env_prefix = "my_prefix_"  # defaults to no prefix, i.e. ""
        fields = {
            "auth_key": {
                "env": "my_auth_key",
            },
            "redis_dsn": {"env": ["service_redis_dsn", "redis_url"]},
        }


os.environ["my_api_key"] = "abc123"
os.environ["my_auth_key"] = "123abc"
print(Settings().dict())
"""
{
    'auth_key': '123abc',
    'api_key': 'abc123',
    'redis_dsn': RedisDsn('redis://user:pass@localhost:6379/1', scheme='redis', user='user', password='pass', host='localhost', host_type='int_domain', port='6379', path='/1'),
    'pg_dsn': PostgresDsn('postgres://user:pass@localhost:5432/foobar', scheme='postgres', user='user', password='pass', host='localhost', host_type='int_domain', port='5432', path='/foobar'),
    'special_function': <built-in function cos>,
    'domains': set(),
    'more_settings': {'foo': 'bar', 'apple': 1},
}
"""

环境变量名称

  • 默认, 前缀+字段名
    • 前缀由config中的env_prefix 定义 默认是空字符串
    • 举例 export my_prefix_special_function='foo.bar'
  • 自定义环境变量名称可以使用两种方法设置:
    • Config.fields['field_name']['env'] (参见上面的 auth_key 和 redis_dsn )
    • Field(..., env=...) (参见上面的 api_key )
  • 当指定了自定义环境变量名称时,可以提供一个字符串或者一个字符串列表。
    • 当指定一个字符串列表时,顺序很重要:第一个发现的值将被使用。
    • 例如,对于上面的 redis_dnsservice_redis_dns 比 redis_url 的优先级要高。

Dotenv (.env)支持

Dotenv文件 (通常命名为 .env ) 是一种常见的模式,它可以方便地以独立于平台的方式使用环境变量。
加载.env文件:
  • 在 BaseSetting 类中的 Config 上设置 env_file (和 env_file_encoding ,如果你不想使用 OS 的默认编码的话)。
class Settings(BaseSettings):
    ...

    class Config:
        env_file = '.env'
        env_file_encoding = 'utf-8'
  • 使用 _env_file 关键字参数 (和 _env_file_encoding,如果需要) 实例化 BaseSettings 派生类:
    • 可以是相对路径
    • 环境变量将始终优先于从dotenv文件加载的值
    • _env_file='prod.env' 优先级高于 config env_file = '.env'
      • settings = Settings(_env_file = None) 可以覆盖config中的env_file = '.env'
settings = Settings(_env_file='prod.env', _env_file_encoding='utf-8')

加载机密文件 机密数据

  • 机密文件加载优先级同普通文件
  • 机密文件加载环境变量名同普通文件的话,机密文件优先级更低
假设机密文件的路径和内容如下
/var/run/database_password
super_secret_database_password
两种方式加载:
  • secrets_dir
class Settings(BaseSettings):
    ...
    database_password: str

    class Config:
        secrets_dir = '/var/run'
  • settings = Settings(_secrets_dir='/var/run')

字段值优先级

  1. 传递给 Settings 类的初始化器的参数。__init__
  1. 环境变量,例如上面描述的 my_prefix_special_function 。
  1. 从 dotenv( .env 文件) 加载的变量。
  1. 从机密目录加载的变量。
  1. Settings 模型的默认字段值。

延迟注解

主要用于自引用
Foo = ForwardRef('Foo')
update_forward_refs
from typing import ForwardRef
from pydantic import BaseModel

Foo = ForwardRef('Foo')


class Foo(BaseModel):
    a: int = 123
    b: Foo = None


Foo.update_forward_refs()

print(Foo())
#> a=123 b=None
print(Foo(b={'a': '321'}))
#> a=123 b=Foo(a=321, b=None)

from __future__ import annotations

让注解在运行时延迟生效.同时将字符串(类型名称)解析为注解(类型).导致
from __future__ import annotations
from typing import List  # <-- List is defined in the module's global scope
from pydantic import BaseModel


def this_works():
    class Model(BaseModel):
        a: List[int]

    print(Model(a=(1, 2)))
可以运行. 而下面不可运行
from __future__ import annotations
from pydantic import BaseModel


def this_is_broken():
    # List is defined inside the function so is not in the module's
    # global scope!
    from typing import List

    class Model(BaseModel):
        a: List[int]

    print(Model(a=(1, 2)))
就是model必须在全局作用于下.

自引用模型

from pydantic import BaseModel


class Foo(BaseModel):
    a: int = 123
    #: The sibling of `Foo` is referenced by string
    sibling: 'Foo' = None # 这里实现自引用


Foo.update_forward_refs()

print(Foo())
#> a=123 sibling=None
print(Foo(sibling={'a': '321'}))
#> a=123 sibling=Foo(a=321, sibling=None)
这样也可以
from __future__ import annotations # 这里
from pydantic import BaseModel


class Foo(BaseModel):
    a: int = 123
    #: The sibling of `Foo` is referenced directly by type
    sibling: Foo = None


Foo.update_forward_refs()

print(Foo())
#> a=123 sibling=None
print(Foo(sibling={'a': '321'}))
#> a=123 sibling=Foo(a=321, sibling=None)

© chaleaoch 2021