typing、Ellipsis
pydantic

众所周知,Python 是动态类型语言,运行时不需要指定变量类型。由于Python的这一语言特性,在之前的文章中就介绍了python的注解和类型检查。本篇文章主要记录typing和pydantic的进一步学习

typing

官方文档

变量注解和函数注解

  • 使用:语句将信息附加到变量或函数参数中
  • ->运算符用于将信息附加到函数/方法的返回值中

常用类型及示例

  • 先来看看tying模块中的__all__
__all__ = [
    # Super-special typing primitives.
    'Any',
    'Callable',
    'ClassVar',
    'Final',
    'ForwardRef',
    'Generic',
    'Literal',
    'Optional',
    'Protocol',
    'Tuple',
    'Type',
    'TypeVar',
    'Union',

    # ABCs (from collections.abc).
    'AbstractSet',  # collections.abc.Set.
    'ByteString',
    'Container',
    'ContextManager',
    'Hashable',
    'ItemsView',
    'Iterable',
    'Iterator',
    'KeysView',
    'Mapping',
    'MappingView',
    'MutableMapping',
    'MutableSequence',
    'MutableSet',
    'Sequence',
    'Sized',
    'ValuesView',
    'Awaitable',
    'AsyncIterator',
    'AsyncIterable',
    'Coroutine',
    'Collection',
    'AsyncGenerator',
    'AsyncContextManager',

    # Structural checks, a.k.a. protocols.
    'Reversible',
    'SupportsAbs',
    'SupportsBytes',
    'SupportsComplex',
    'SupportsFloat',
    'SupportsIndex',
    'SupportsInt',
    'SupportsRound',

    # Concrete collection types.
    'ChainMap',
    'Counter',
    'Deque',
    'Dict',
    'DefaultDict',
    'List',
    'OrderedDict',
    'Set',
    'FrozenSet',
    'NamedTuple',  # Not really a type.
    'TypedDict',  # Not really a type.
    'Generator',

    # One-off things.
    'AnyStr',
    'cast',
    'final',
    'get_args',
    'get_origin',
    'get_type_hints',
    'NewType',
    'no_type_check',
    'no_type_check_decorator',
    'NoReturn',
    'overload',
    'runtime_checkable',
    'Text',
    'TYPE_CHECKING',
]
  • 常用类型
int, float
bool
str, list, tuple, dict, set
List, Tuple, Dict, Set, NamedTuple
Optional, Any
Iterable, Iterator
Generator
Deque, Sequence, Union, Literal
Callable
Type, TypeVar, Generic
  1. List
    • 如List[str]、List[int] 表示list中的元素类型
    • 如List[obj] 表示list中的元素为class对象
    • 如List[str] = ["jerry"] 表示list中元素为str且默认值为["jerry"]
def selected_categories(whitelist: List[str], blacklist: List[str]) -> List[str]:
    """Create the selected categories from the whitelist/blacklist set to use in filtering.

    Args:
        whitelist: list of categories
        blacklist: list of categories

    Returns:
        Selection set of mutation categories
    """
    all_mutations = {m.category for m in transformers.get_compatible_operation_sets()}
    w_set = set(whitelist)
    b_set = set(blacklist)

    if w_set:
        return list(w_set - b_set)

    return list(all_mutations - b_set) 
  1. Optional
    • Optional[X] 等效于 X | None (或 Union[X, None])
    • 可选类型与含默认值的可选参数不同:含默认值的可选参数不需要在类型注解上添加 Optional 限定符,因为它仅是可选的
def get_src_location(src_loc: Optional[Path] = None) -> Path:
    """Find packages is used if the ``src_loc`` is not set

    Args:
        src_loc: current source location, defaults to None

    Returns:
        Path to the source location

    Raises:
        FileNoeFoundError: if the source location doesn't exist.
    """
    if not src_loc:
        find_pkgs = find_packages()
        if find_pkgs:
            src_loc = Path(find_pkgs[0])
            return src_loc
    else:
        if src_loc.exists():
            return src_loc

    raise FileNotFoundError(
        "No source directory specified or automatically detected. "
        "Use --src or --help to see options."
    ) 
  1. Dict
    • 如Dict[str, str] 表示dict的key为str,value为str
    • 如Dict[str, int] = {'age': 20} 表示dict的key为str,value为int,默认值为 {'age': 20}
def write_cov_file(line_data: Dict[str, List[int]], fname: str) -> None:
    """Write a coverage file supporting both Coverage v4 and v5.

    Args:
        line_data: Dictionary of line data for the coverage file.
        fname: string filename for output location (absolute path)

    Returns:
        None
    """
    if coverage.version_info[0] == 4:
        covdata = coverage.CoverageData()
        covdata.add_lines(line_data)
        covdata.write_file(fname)

    else:
        # assume coverage v 5
        covdata = coverage.CoverageData(basename=fname)
        covdata.add_lines(line_data)
        covdata.write()s
  1. Tuple
    • Tuple[X, Y] 是二项元组类型,第一个元素的类型是 X,第二个元素的类型是 Y
    • Tuple[int, float, str] 是由整数、浮点数、字符串组成的三项元组
    • 空元组的类型可写为 Tuple[()]
    • 可用省略号字面量指定同质变长元组,例如,Tuple[int, ...] 。Tuple 与 Tuple[Any, ...] 等价,也与 tuple 等价
def calculate_page_limit_and_offset(paginate: bool, page_size: int, page: int, result_length: int,
                                    offset: int, limit: int) -> Tuple[int, int]:
    """Calculate page limit and offset for pagination.
    :param paginate: Showing whether pagination is enable/disable.
    :param page_size: Number maximum elements showed in a page.
    :param page: page number.
    :param result_length: Length of the list containing desired elements.
    :param offset: offset value.
    :param limit: page limit.
    :return: page limit and offset.
    """
    if limit is not None:
        page_size = limit
    if paginate is True:
        if offset is None:
            offset = (page - 1) * page_size
        limit = page_size
    else:
        offset = 0
        limit = result_length

    return limit, offset 
  1. Union
    • 如Union[int],等效为int
    • 如Union[int, str] 等效为 int | str,即 int or str
def filename_to_url(filename: str, cache_dir: Union[str, Path] = None) -> Tuple[str, str]:
    """
    Return the url and etag (which may be ``None``) stored for `filename`.
    Raise ``FileNotFoundError`` if `filename` or its stored metadata do not exist.
    """
    if cache_dir is None:
        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)

    cache_path = os.path.join(cache_dir, filename)
    if not os.path.exists(cache_path):
        raise FileNotFoundError("file {} not found".format(cache_path))

    meta_path = cache_path + '.json'
    if not os.path.exists(meta_path):
        raise FileNotFoundError("file {} not found".format(meta_path))

    with open(meta_path) as meta_file:
        metadata = json.load(meta_file)
    url = metadata['url']
    etag = metadata['etag']

    return url, etag 
  1. Any
    • 所有类型都与 Any 兼容,Any 与所有类型都兼容
    • 可对 Any 类型的值执行任何操作或方法调用,并赋值给任意变量
    • 使用 Any,说明值是动态类型
def apply_filter(object_id: str, search_props: Dict[str, Any],
                 triples: Graph, session: scoped_session) -> bool:
    """Check whether objects has properties with query values or not.
    :param object_id: Id of the instance.
    :param search_props: Dictionary of query parameters with property id and values.
    :param triples: All triples.
    :param session: sqlalchemy scoped session.
    :return: True if the instance has properties with given values, False otherwise.
    """
    for prop in search_props:
        # For nested properties
        if isinstance(search_props[prop], dict):
            data = session.query(triples).filter(
                triples.GraphIII.subject == object_id, triples.GraphIII.predicate == prop).one()
            if apply_filter(data.object_, search_props[prop], triples, session) is False:
                return False
        else:
            data = session.query(triples).filter(
                triples.GraphIIT.subject == object_id, triples.GraphIIT.predicate == prop).one()
            terminal = session.query(Terminal).filter(
                Terminal.id == data.object_).one()
            if terminal.value != search_props[prop]:
                return False
    return True 
  1. 其他:Callable、Iterable、Iterator、Set等
    • 预期特定签名回调函数的框架可以用 Callable[[Arg1Type, Arg2Type], ReturnType] 实现类型提示
    • 无需指定调用签名,用省略号字面量替换类型提示里的参数列表: Callable[..., ReturnType],就可以声明可调对象的返回类型
def set_api_name(application: Flask, api_name: str) -> Iterator:
    """
    Set the server name or EntryPoint for the app (before it is run in main.py).
    :param application: Flask app object
            <flask.app.Flask>
    :param api_name : API/Server name or EntryPoint
            <str>

    Raises:
        TypeError: If `api_name` is not a string.

    """
    if not isinstance(api_name, str):
        raise TypeError("The api_name is not of type <str>")

    def handler(sender: Flask, **kwargs: Any) -> None:
        g.api_name = api_name
    with appcontext_pushed.connected_to(handler, application):
        yield 

Ellipsis对象

翻译成中文就是省略号(...)对象

print(...)
print(type(...))

def foo():
    ...

try:
    1 / 0
except :
    ...

#######输出结果#######
Ellipsis
<class 'ellipsis'>

应用

  • Numpy 中的切片
  • FastAPI 中的必选参数
  • Type Hint 类型注解
    • 无需指定调用签名,用省略号字面量替换类型提示里的参数列表: Callable[..., ReturnType],就可以声明可调对象的返回类型
    • 可用省略号字面量指定同质变长元组,例如,Tuple[int, ...] 。Tuple 与 Tuple[Any, ...] 等价,也与 tuple 等价
  1. 在类型提示中使用 Callable,不确定参数签名时,可以用 Ellipsis 占位
from typing import Callable
def foo() -> Callable[..., int]:
    return lambda x: 1
  1. 使用 Tuple 时返回不定长的 tuple,用 Ellipsis 进行指定
from typing import Tuple

def bar() -> Tuple[int, ...]:
    return (1, 2, 3)

def buzz() -> Tuple[int, ...]:
    return (1, 2, 3, 4)

可进一步阅读Python 中有趣的 Ellipsis 对象Python 的 Ellipsis 对象

pydantic

Data validation and settings management using python type annotations.
pydantic enforces type hints at runtime, and provides user friendly errors when data is invalid.
Define how data should be in pure, canonical python; validate it with pydantic.

官方文档

安装

pip install pydantic
  • 可选插件
    • 如果需要校验邮箱,可以额外安装 email-validatorpip install email-validator
    • 如果支持.env设置,需要额外安装 python-dotenv, pip install python-dotenv
# 跟pydantic一起安装
pip install pydantic[email]
pip install pydantic[dotenv]
# 或
pip install pydantic[email,dotenv]
  • pycharm 插件:在pycharm中搜索pydantic安装并重启即

小试牛刀

from datetime import datetime
from typing import List, Optional
from pydantic import BaseModel, ValidationError


class User(BaseModel):
    id: int
    name = 'John Doe'
    signup_ts: Optional[datetime] = None
    friends: List[int] = []


external_data = {
    'id': '123',
    'signup_ts': '2019-06-01 12:22',
    'friends': [1, 2, '3'],
    # 'name': 'jerry'
}
user = User(**external_data)

print(user, type(user))
print(user.__dict__)
print(User.__dict__)
print(user.__fields__)
print(User.__base__.__dict__)

print('==' * 20)

print(user.id, user.name, user.signup_ts, user.friends)
print(user.dict())  # {'id': 123, 'signup_ts': datetime.datetime(2019, 6, 1, 12, 22), 'friends': [1, 2, 3], 'name': 'John Doe'}
print(dict(user))  # 同上,id , signup_ts, friends 中不符合类型的数据可进行类型转换(前提是可转换否则报错)
print(user.__fields_set__)  # {'id', 'friends', 'signup_ts'}

print('==' * 20)

try:
    User(signup_ts='broken', friends=[1, 2, 'not number'])
    print('+++++')
except Exception as e:
    print('~~~~~')
    print(e)

print('==' * 20)

try:
    User(signup_ts='broken', friends=[1, 2, 'not number'])
except ValidationError as e:
    print('~~~~~')
    print(e.json())
####################输出结果#####################
id=123 signup_ts=datetime.datetime(2019, 6, 1, 12, 22) friends=[1, 2, 3] name='John Doe' <class '__main__.User'>
{'id': 123, 'signup_ts': datetime.datetime(2019, 6, 1, 12, 22), 'friends': [1, 2, 3], 'name': 'John Doe'}
{'__config__': <class '__main__.Config'>, '__fields__': {'id': ModelField(name='id', type=int, required=True), 'signup_ts': ModelField(name='signup_ts', type=Optional[datetime], required=False, default=None), 'friends': ModelField(name='friends', type=List[int], required=False, default=[]), 'name': ModelField(name='name', type=str, required=False, default='John Doe')}, '__validators__': {}, '__pre_root_validators__': [], '__post_root_validators__': [], '__schema_cache__': {}, '__json_encoder__': <staticmethod object at 0x103ef69d0>, '__custom_root_type__': False, '__private_attributes__': {}, '__slots__': set(), '__module__': '__main__', '__annotations__': {'id': <class 'int'>, 'signup_ts': typing.Union[datetime.datetime, NoneType], 'friends': typing.List[int]}, '__doc__': None, '__abstractmethods__': frozenset(), '_abc_impl': <_abc_data object at 0x1048d92a0>, '__signature__': <pydantic.utils.ClassAttribute object at 0x1048d93a0>}
{'id': ModelField(name='id', type=int, required=True), 'signup_ts': ModelField(name='signup_ts', type=Optional[datetime], required=False, default=None), 'friends': ModelField(name='friends', type=List[int], required=False, default=[]), 'name': ModelField(name='name', type=str, required=False, default='John Doe')}
{'__config__': <class 'importlib._bootstrap.Config'>, '__fields__': {}, '__validators__': {}, '__pre_root_validators__': [], '__post_root_validators__': [], '__schema_cache__': {}, '__json_encoder__': <staticmethod object at 0x104875490>, '__custom_root_type__': False, '__private_attributes__': {}, '__slots__': {'__fields_set__', '__dict__'}, '__module__': 'pydantic.main', 'Config': <class 'pydantic.main.BaseConfig'>, '__doc__': '', '__init__': <cyfunction BaseModel.__init__ at 0x104709520>, '__setattr__': <cyfunction BaseModel.__setattr__ at 0x1047095f0>, '__getstate__': <cyfunction BaseModel.__getstate__ at 0x1047096c0>, '__setstate__': <cyfunction BaseModel.__setstate__ at 0x104709790>, '_init_private_attributes': <cyfunction BaseModel._init_private_attributes at 0x104709860>, 'dict': <cyfunction BaseModel.dict at 0x104709930>, 'json': <cyfunction BaseModel.json at 0x104709a00>, 'parse_obj': <classmethod object at 0x104554a90>, 'parse_raw': <classmethod object at 0x1046ecf70>, 'parse_file': <classmethod object at 0x1046ec2e0>, 'from_orm': <classmethod object at 0x10470d040>, 'construct': <classmethod object at 0x10470d070>, 'copy': <cyfunction BaseModel.copy at 0x104709ee0>, 'schema': <classmethod object at 0x10470d0d0>, 'schema_json': <classmethod object at 0x10470d100>, '__get_validators__': <classmethod object at 0x10470d160>, 'validate': <classmethod object at 0x10470d190>, '_decompose_class': <classmethod object at 0x10470d1c0>, '_get_value': <classmethod object at 0x10470d1f0>, 'update_forward_refs': <classmethod object at 0x10470d220>, '__iter__': <cyfunction BaseModel.__iter__ at 0x10470e5f0>, '_iter': <cyfunction BaseModel._iter at 0x10470e6c0>, '_calculate_keys': <cyfunction BaseModel._calculate_keys at 0x10470e790>, '__eq__': <cyfunction BaseModel.__eq__ at 0x10470e860>, '__repr_args__': <cyfunction BaseModel.__repr_args__ at 0x10470e930>, 'fields': <property object at 0x1046fbe00>, 'to_string': <cyfunction BaseModel.to_string at 0x10470ead0>, '__values__': <property object at 0x1046fbe50>, '__fields_set__': <member '__fields_set__' of 'BaseModel' objects>, '__dict__': <attribute '__dict__' of 'BaseModel' objects>, '__hash__': None, '__abstractmethods__': frozenset(), '_abc_impl': <_abc_data object at 0x104875450>, '__signature__': <pydantic.utils.ClassAttribute object at 0x10470d910>}
========================================
123 John Doe 2019-06-01 12:22:00 [1, 2, 3]
{'id': 123, 'signup_ts': datetime.datetime(2019, 6, 1, 12, 22), 'friends': [1, 2, 3], 'name': 'John Doe'}
{'id': 123, 'signup_ts': datetime.datetime(2019, 6, 1, 12, 22), 'friends': [1, 2, 3], 'name': 'John Doe'}
{'id', 'signup_ts', 'friends'}
========================================
~~~~~
3 validation errors for User
id
  field required (type=value_error.missing)
signup_ts
  invalid datetime format (type=value_error.datetime)
friends -> 2
  value is not a valid integer (type=type_error.integer)
========================================
~~~~~
[
  {
    "loc": [
      "id"
    ],
    "msg": "field required",
    "type": "value_error.missing"
  },
  {
    "loc": [
      "signup_ts"
    ],
    "msg": "invalid datetime format",
    "type": "value_error.datetime"
  },
  {
    "loc": [
      "friends",
      2
    ],
    "msg": "value is not a valid integer",
    "type": "type_error.integer"
  }
]

模型

  • 递归模型
from typing import List
from pydantic import BaseModel


class Foo(BaseModel):
    count: int
    size: float = None

class Bar(BaseModel):
    apple = 'x'
    banana = 'y'

class Spam(BaseModel):
    foo: Foo
    bars: List[Bar]

m = Spam(foo={'count': 4}, bars=[{'apple': 'x1'}, {'apple': 'x2'}])
print(m)
print(m.dict())
#####################输出结果#################
foo=Foo(count=4, size=None) bars=[Bar(apple='x1', banana='y'), Bar(apple='x2', banana='y')]
{'foo': {'count': 4, 'size': None}, 'bars': [{'apple': 'x1', 'banana': 'y'}, {'apple': 'x2', 'banana': 'y'}]}

Pydantic models can be created from arbitrary class instances to support models that map to ORM objects.

To do this:

  • The Config property orm_mode must be set to True.
  • The special constructor from_orm must be used to create the model instance.
  • Config属性orm_mode必须设置成True
  • from_orm必须用于创建模型实例
from sqlalchemy import Column, Integer, String
from sqlalchemy.orm import declarative_base
from pydantic import BaseModel, constr

Base = declarative_base()


class StudentOrm(Base):
    __tablename__ = 'student'

    id = Column(Integer, primary_key=True, autoincrement=True)  # 第一参数是字段名,如果和属性名一致,则可不填写(一般一致),否则必须指定
    name = Column(String(64), nullable=False)
    age = Column(Integer, nullable=False)

    # def __repr__(self):
    #     return f'<{self.__class__.__name__}>, id={self.id}, name={self.name}, age={self.age}'


class StudentModel(BaseModel):
    id: int
    name: constr(max_length=64)
    age: int

    class Config:
        orm_mode = True


stu_orm = StudentOrm(id=1, name='jerry', age=20)
print(stu_orm)

stu_model = StudentModel.from_orm(stu_orm)
print(stu_model)
print(stu_model.id)
print(stu_model.dict())
#####################输出结果###############
<__main__.StudentOrm object at 0x1098ce370>
id=1 name='jerry' age=20
1
{'id': 1, 'name': 'jerry', 'age': 20}
  • 错误处理
    • e.errors()、e.json()、str(e)
    • 每个异常包含loc、type、msg、ctx(当有数值类错误需要暴露时)
# 在上面小试牛刀的例子已经演示过,以下再补充一个官方示例
from typing import List
from pydantic import BaseModel, ValidationError, conint


class Location(BaseModel):
    lat = 0.1
    lng = 10.1


class Model(BaseModel):
    is_required: float
    gt_int: conint(gt=42)
    list_of_ints: List[int] = None
    a_float: float = None
    recursive_model: Location = None


data = dict(
    list_of_ints=['1', 2, 'bad'],
    a_float='not a float',
    recursive_model={'lat': 4.2, 'lng': 'New York'},
    gt_int=21,
)

try:
    Model(**data)
except ValidationError as e:
    print(e)
    print('==' * 20)
    print(e.json())
    print('==' * 20)
    print(e.errors())
    print('==' * 20)
    print(str(e))
########################输出结果######################
5 validation errors for Model
is_required
  field required (type=value_error.missing)
gt_int
  ensure this value is greater than 42 (type=value_error.number.not_gt; limit_value=42)
list_of_ints -> 2
  value is not a valid integer (type=type_error.integer)
a_float
  value is not a valid float (type=type_error.float)
recursive_model -> lng
  value is not a valid float (type=type_error.float)
========================================
[
  {
    "loc": [
      "is_required"
    ],
    "msg": "field required",
    "type": "value_error.missing"
  },
  {
    "loc": [
      "gt_int"
    ],
    "msg": "ensure this value is greater than 42",
    "type": "value_error.number.not_gt",
    "ctx": {
      "limit_value": 42
    }
  },
  {
    "loc": [
      "list_of_ints",
      2
    ],
    "msg": "value is not a valid integer",
    "type": "type_error.integer"
  },
  {
    "loc": [
      "a_float"
    ],
    "msg": "value is not a valid float",
    "type": "type_error.float"
  },
  {
    "loc": [
      "recursive_model",
      "lng"
    ],
    "msg": "value is not a valid float",
    "type": "type_error.float"
  }
]
========================================
[{'loc': ('is_required',), 'msg': 'field required', 'type': 'value_error.missing'}, {'loc': ('gt_int',), 'msg': 'ensure this value is greater than 42', 'type': 'value_error.number.not_gt', 'ctx': {'limit_value': 42}}, {'loc': ('list_of_ints', 2), 'msg': 'value is not a valid integer', 'type': 'type_error.integer'}, {'loc': ('a_float',), 'msg': 'value is not a valid float', 'type': 'type_error.float'}, {'loc': ('recursive_model', 'lng'), 'msg': 'value is not a valid float', 'type': 'type_error.float'}]
========================================
5 validation errors for Model
is_required
  field required (type=value_error.missing)
gt_int
  ensure this value is greater than 42 (type=value_error.number.not_gt; limit_value=42)
list_of_ints -> 2
  value is not a valid integer (type=type_error.integer)
a_float
  value is not a valid float (type=type_error.float)
recursive_model -> lng
  value is not a valid float (type=type_error.float)

Field Types

pydantic除了支持标准库类型来定义字段,还实现了许多常用的类型。并且可以自己实现与pydantic兼容的自定义属性和校验

from typing import Literal, Union

from pydantic import BaseModel, Field, ValidationError


class Cat(BaseModel):
    pet_type: Literal['cat']
    meows: int


class Dog(BaseModel):
    pet_type: Literal['dog']
    barks: float


class Lizard(BaseModel):
    pet_type: Literal['reptile', 'lizard']
    scales: bool


class Model(BaseModel):
    pet: Union[Cat, Dog, Lizard] = Field(..., discriminator='pet_type')
    n: int


print(Model(pet={'pet_type': 'dog', 'barks': 3.14}, n=1))

try:
    Model(pet={'pet_type': 'dog'}, n=1)
except ValidationError as e:
    print(e)
def Field(
    default: Any = Undefined,    # 第一个参数,用于设置默认值,也可使用ellipsis (...) 来表示是必填字段
    *,     # *后使用关键字传参
    default_factory: Optional[NoArgAnyCallable] = None,  # 无参函数用于设置默认值,不可与default字段同时设置
    alias: str = None,   
    title: str = None,  
    description: str = None,  
    const: bool = None,   
    gt: float = None,  # greater than
    ge: float = None,   # greater than or equal
    lt: float = None,    # less than
    le: float = None,    #  less than or equal
    multiple_of: float = None,  
    min_items: int = None,  
    max_items: int = None,  
    min_length: int = None,    #  对于string值,最小长度
    max_length: int = None,    # 对于string值,最大长度
    regex: str = None,  
    **extra: Any,
) -> Any:
...
...

Model Config

通过pydantic model类中的Config类来控制

from pydantic import BaseModel, ValidationError


class Model(BaseModel):
    v: str

    class Config:
        max_anystr_length = 10
        error_msg_templates = {
            'value_error.any_str.max_length': 'max_length:{limit_value}',
        }
        
try:
    Model(v='x' * 20)
except ValidationError as e:
    print(e)
###############输出结果############
1 validation error for Model
v
  max_length:10 (type=value_error.any_str.max_length; limit_value=10)
  • orm_mode:是否允许使用ORM mode,请参考以上orm模式示例

更多高级功能请参看官方文档

扩展阅读