@mytec: pushed back before 1.1

This commit is contained in:
lazard36
2026-01-30 20:12:52 +00:00
parent d6988e370e
commit e8ae5bc1db
5228 changed files with 1191766 additions and 0 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,181 @@
/*
* Copyright 2009-present MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "bson-endian.h"
#ifndef _CBSONMODULE_H
#define _CBSONMODULE_H
#if defined(WIN32) || defined(_MSC_VER)
/*
* This macro is basically an implementation of asprintf for win32
* We print to the provided buffer to get the string value as an int.
* USE LL2STR. This is kept only to test LL2STR.
*/
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
#define INT2STRING(buffer, i) \
_snprintf_s((buffer), \
_scprintf("%lld", (i)) + 1, \
_scprintf("%lld", (i)) + 1, \
"%lld", \
(i))
#define STRCAT(dest, n, src) strcat_s((dest), (n), (src))
#else
#define INT2STRING(buffer, i) \
_snprintf((buffer), \
_scprintf("%lld", (i)) + 1, \
"%lld", \
(i))
#define STRCAT(dest, n, src) strcat((dest), (src))
#endif
#else
#define INT2STRING(buffer, i) snprintf((buffer), sizeof((buffer)), "%lld", (i))
#define STRCAT(dest, n, src) strcat((dest), (src))
#endif
/* Just enough space in char array to hold LLONG_MIN and null terminator */
#define BUF_SIZE 21
/* Converts integer to its string representation in decimal notation. */
extern int cbson_long_long_to_str(long long int num, char* str, size_t size);
#define LL2STR(buffer, i) cbson_long_long_to_str((i), (buffer), sizeof(buffer))
typedef struct type_registry_t {
PyObject* encoder_map;
PyObject* decoder_map;
PyObject* fallback_encoder;
PyObject* registry_obj;
unsigned char is_encoder_empty;
unsigned char is_decoder_empty;
unsigned char has_fallback_encoder;
} type_registry_t;
typedef struct codec_options_t {
PyObject* document_class;
unsigned char tz_aware;
unsigned char uuid_rep;
char* unicode_decode_error_handler;
PyObject* tzinfo;
type_registry_t type_registry;
unsigned char datetime_conversion;
PyObject* options_obj;
unsigned char is_raw_bson;
} codec_options_t;
/* C API functions */
#define _cbson_buffer_write_bytes_INDEX 0
#define _cbson_buffer_write_bytes_RETURN int
#define _cbson_buffer_write_bytes_PROTO (buffer_t buffer, const char* data, int size)
#define _cbson_write_dict_INDEX 1
#define _cbson_write_dict_RETURN int
#define _cbson_write_dict_PROTO (PyObject* self, buffer_t buffer, PyObject* dict, unsigned char check_keys, const codec_options_t* options, unsigned char top_level)
#define _cbson_write_pair_INDEX 2
#define _cbson_write_pair_RETURN int
#define _cbson_write_pair_PROTO (PyObject* self, buffer_t buffer, const char* name, int name_length, PyObject* value, unsigned char check_keys, const codec_options_t* options, unsigned char allow_id)
#define _cbson_decode_and_write_pair_INDEX 3
#define _cbson_decode_and_write_pair_RETURN int
#define _cbson_decode_and_write_pair_PROTO (PyObject* self, buffer_t buffer, PyObject* key, PyObject* value, unsigned char check_keys, const codec_options_t* options, unsigned char top_level)
#define _cbson_convert_codec_options_INDEX 4
#define _cbson_convert_codec_options_RETURN int
#define _cbson_convert_codec_options_PROTO (PyObject* self, PyObject* options_obj, codec_options_t* options)
#define _cbson_destroy_codec_options_INDEX 5
#define _cbson_destroy_codec_options_RETURN void
#define _cbson_destroy_codec_options_PROTO (codec_options_t* options)
#define _cbson_buffer_write_double_INDEX 6
#define _cbson_buffer_write_double_RETURN int
#define _cbson_buffer_write_double_PROTO (buffer_t buffer, double data)
#define _cbson_buffer_write_int32_INDEX 7
#define _cbson_buffer_write_int32_RETURN int
#define _cbson_buffer_write_int32_PROTO (buffer_t buffer, int32_t data)
#define _cbson_buffer_write_int64_INDEX 8
#define _cbson_buffer_write_int64_RETURN int
#define _cbson_buffer_write_int64_PROTO (buffer_t buffer, int64_t data)
#define _cbson_buffer_write_int32_at_position_INDEX 9
#define _cbson_buffer_write_int32_at_position_RETURN void
#define _cbson_buffer_write_int32_at_position_PROTO (buffer_t buffer, int position, int32_t data)
#define _cbson_downcast_and_check_INDEX 10
#define _cbson_downcast_and_check_RETURN int
#define _cbson_downcast_and_check_PROTO (Py_ssize_t size, uint8_t extra)
/* Total number of C API pointers */
#define _cbson_API_POINTER_COUNT 11
#ifdef _CBSON_MODULE
/* This section is used when compiling _cbsonmodule */
static _cbson_buffer_write_bytes_RETURN buffer_write_bytes _cbson_buffer_write_bytes_PROTO;
static _cbson_write_dict_RETURN write_dict _cbson_write_dict_PROTO;
static _cbson_write_pair_RETURN write_pair _cbson_write_pair_PROTO;
static _cbson_decode_and_write_pair_RETURN decode_and_write_pair _cbson_decode_and_write_pair_PROTO;
static _cbson_convert_codec_options_RETURN convert_codec_options _cbson_convert_codec_options_PROTO;
static _cbson_destroy_codec_options_RETURN destroy_codec_options _cbson_destroy_codec_options_PROTO;
static _cbson_buffer_write_double_RETURN buffer_write_double _cbson_buffer_write_double_PROTO;
static _cbson_buffer_write_int32_RETURN buffer_write_int32 _cbson_buffer_write_int32_PROTO;
static _cbson_buffer_write_int64_RETURN buffer_write_int64 _cbson_buffer_write_int64_PROTO;
static _cbson_buffer_write_int32_at_position_RETURN buffer_write_int32_at_position _cbson_buffer_write_int32_at_position_PROTO;
static _cbson_downcast_and_check_RETURN _downcast_and_check _cbson_downcast_and_check_PROTO;
#else
/* This section is used in modules that use _cbsonmodule's API */
static void **_cbson_API;
#define buffer_write_bytes (*(_cbson_buffer_write_bytes_RETURN (*)_cbson_buffer_write_bytes_PROTO) _cbson_API[_cbson_buffer_write_bytes_INDEX])
#define write_dict (*(_cbson_write_dict_RETURN (*)_cbson_write_dict_PROTO) _cbson_API[_cbson_write_dict_INDEX])
#define write_pair (*(_cbson_write_pair_RETURN (*)_cbson_write_pair_PROTO) _cbson_API[_cbson_write_pair_INDEX])
#define decode_and_write_pair (*(_cbson_decode_and_write_pair_RETURN (*)_cbson_decode_and_write_pair_PROTO) _cbson_API[_cbson_decode_and_write_pair_INDEX])
#define convert_codec_options (*(_cbson_convert_codec_options_RETURN (*)_cbson_convert_codec_options_PROTO) _cbson_API[_cbson_convert_codec_options_INDEX])
#define destroy_codec_options (*(_cbson_destroy_codec_options_RETURN (*)_cbson_destroy_codec_options_PROTO) _cbson_API[_cbson_destroy_codec_options_INDEX])
#define buffer_write_double (*(_cbson_buffer_write_double_RETURN (*)_cbson_buffer_write_double_PROTO) _cbson_API[_cbson_buffer_write_double_INDEX])
#define buffer_write_int32 (*(_cbson_buffer_write_int32_RETURN (*)_cbson_buffer_write_int32_PROTO) _cbson_API[_cbson_buffer_write_int32_INDEX])
#define buffer_write_int64 (*(_cbson_buffer_write_int64_RETURN (*)_cbson_buffer_write_int64_PROTO) _cbson_API[_cbson_buffer_write_int64_INDEX])
#define buffer_write_int32_at_position (*(_cbson_buffer_write_int32_at_position_RETURN (*)_cbson_buffer_write_int32_at_position_PROTO) _cbson_API[_cbson_buffer_write_int32_at_position_INDEX])
#define _downcast_and_check (*(_cbson_downcast_and_check_RETURN (*)_cbson_downcast_and_check_PROTO) _cbson_API[_cbson_downcast_and_check_INDEX])
#define _cbson_IMPORT _cbson_API = (void **)PyCapsule_Import("_cbson._C_API", 0)
#endif
#endif // _CBSONMODULE_H

View File

@@ -0,0 +1,43 @@
# Copyright 2021-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Setstate and getstate functions for objects with __slots__, allowing
compatibility with default pickling protocol
"""
from __future__ import annotations
from typing import Any, Mapping
def _setstate_slots(self: Any, state: Any) -> None:
for slot, value in state.items():
setattr(self, slot, value)
def _mangle_name(name: str, prefix: str) -> str:
if name.startswith("__"):
prefix = "_" + prefix
else:
prefix = ""
return prefix + name
def _getstate_slots(self: Any) -> Mapping[Any, Any]:
prefix = self.__class__.__name__
ret = {}
for name in self.__slots__:
mangled_name = _mangle_name(name, prefix)
if hasattr(self, mangled_name):
ret[mangled_name] = getattr(self, mangled_name)
return ret

View File

@@ -0,0 +1,665 @@
# Copyright 2009-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import struct
import warnings
from enum import Enum
from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, Union, overload
from uuid import UUID
"""Tools for representing BSON binary data.
"""
BINARY_SUBTYPE = 0
"""BSON binary subtype for binary data.
This is the default subtype for binary data.
"""
FUNCTION_SUBTYPE = 1
"""BSON binary subtype for functions.
"""
OLD_BINARY_SUBTYPE = 2
"""Old BSON binary subtype for binary data.
This is the old default subtype, the current
default is :data:`BINARY_SUBTYPE`.
"""
OLD_UUID_SUBTYPE = 3
"""Old BSON binary subtype for a UUID.
:class:`uuid.UUID` instances will automatically be encoded
by :mod:`bson` using this subtype when using
:data:`UuidRepresentation.PYTHON_LEGACY`,
:data:`UuidRepresentation.JAVA_LEGACY`, or
:data:`UuidRepresentation.CSHARP_LEGACY`.
.. versionadded:: 2.1
"""
UUID_SUBTYPE = 4
"""BSON binary subtype for a UUID.
This is the standard BSON binary subtype for UUIDs.
:class:`uuid.UUID` instances will automatically be encoded
by :mod:`bson` using this subtype when using
:data:`UuidRepresentation.STANDARD`.
"""
if TYPE_CHECKING:
from array import array as _array
from mmap import mmap as _mmap
import numpy as np
import numpy.typing as npt
class UuidRepresentation:
UNSPECIFIED = 0
"""An unspecified UUID representation.
When configured, :class:`uuid.UUID` instances will **not** be
automatically encoded to or decoded from :class:`~bson.binary.Binary`.
When encoding a :class:`uuid.UUID` instance, an error will be raised.
To encode a :class:`uuid.UUID` instance with this configuration, it must
be wrapped in the :class:`~bson.binary.Binary` class by the application
code. When decoding a BSON binary field with a UUID subtype, a
:class:`~bson.binary.Binary` instance will be returned instead of a
:class:`uuid.UUID` instance.
See `unspecified representation details <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#unspecified>`_ for details.
.. versionadded:: 3.11
"""
STANDARD = UUID_SUBTYPE
"""The standard UUID representation.
:class:`uuid.UUID` instances will automatically be encoded to
and decoded from BSON binary, using RFC-4122 byte order with
binary subtype :data:`UUID_SUBTYPE`.
See `standard representation details <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#standard>`_ for details.
.. versionadded:: 3.11
"""
PYTHON_LEGACY = OLD_UUID_SUBTYPE
"""The Python legacy UUID representation.
:class:`uuid.UUID` instances will automatically be encoded to
and decoded from BSON binary, using RFC-4122 byte order with
binary subtype :data:`OLD_UUID_SUBTYPE`.
See `python legacy representation details <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#python_legacy>`_ for details.
.. versionadded:: 3.11
"""
JAVA_LEGACY = 5
"""The Java legacy UUID representation.
:class:`uuid.UUID` instances will automatically be encoded to
and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`,
using the Java driver's legacy byte order.
See `Java Legacy UUID <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#java_legacy>`_ for details.
.. versionadded:: 3.11
"""
CSHARP_LEGACY = 6
"""The C#/.net legacy UUID representation.
:class:`uuid.UUID` instances will automatically be encoded to
and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`,
using the C# driver's legacy byte order.
See `C# Legacy UUID <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#csharp_legacy>`_ for details.
.. versionadded:: 3.11
"""
STANDARD = UuidRepresentation.STANDARD
"""An alias for :data:`UuidRepresentation.STANDARD`.
.. versionadded:: 3.0
"""
PYTHON_LEGACY = UuidRepresentation.PYTHON_LEGACY
"""An alias for :data:`UuidRepresentation.PYTHON_LEGACY`.
.. versionadded:: 3.0
"""
JAVA_LEGACY = UuidRepresentation.JAVA_LEGACY
"""An alias for :data:`UuidRepresentation.JAVA_LEGACY`.
.. versionchanged:: 3.6
BSON binary subtype 4 is decoded using RFC-4122 byte order.
.. versionadded:: 2.3
"""
CSHARP_LEGACY = UuidRepresentation.CSHARP_LEGACY
"""An alias for :data:`UuidRepresentation.CSHARP_LEGACY`.
.. versionchanged:: 3.6
BSON binary subtype 4 is decoded using RFC-4122 byte order.
.. versionadded:: 2.3
"""
ALL_UUID_SUBTYPES = (OLD_UUID_SUBTYPE, UUID_SUBTYPE)
ALL_UUID_REPRESENTATIONS = (
UuidRepresentation.UNSPECIFIED,
UuidRepresentation.STANDARD,
UuidRepresentation.PYTHON_LEGACY,
UuidRepresentation.JAVA_LEGACY,
UuidRepresentation.CSHARP_LEGACY,
)
UUID_REPRESENTATION_NAMES = {
UuidRepresentation.UNSPECIFIED: "UuidRepresentation.UNSPECIFIED",
UuidRepresentation.STANDARD: "UuidRepresentation.STANDARD",
UuidRepresentation.PYTHON_LEGACY: "UuidRepresentation.PYTHON_LEGACY",
UuidRepresentation.JAVA_LEGACY: "UuidRepresentation.JAVA_LEGACY",
UuidRepresentation.CSHARP_LEGACY: "UuidRepresentation.CSHARP_LEGACY",
}
MD5_SUBTYPE = 5
"""BSON binary subtype for an MD5 hash.
"""
COLUMN_SUBTYPE = 7
"""BSON binary subtype for columns.
.. versionadded:: 4.0
"""
SENSITIVE_SUBTYPE = 8
"""BSON binary subtype for sensitive data.
.. versionadded:: 4.5
"""
VECTOR_SUBTYPE = 9
"""BSON binary subtype for densely packed vector data.
.. versionadded:: 4.10
"""
USER_DEFINED_SUBTYPE = 128
"""BSON binary subtype for any user defined structure.
"""
class BinaryVectorDtype(Enum):
"""Datatypes of vector subtype.
:param FLOAT32: (0x27) Pack list of :class:`float` as float32
:param INT8: (0x03) Pack list of :class:`int` in [-128, 127] as signed int8
:param PACKED_BIT: (0x10) Pack list of :class:`int` in [0, 255] as unsigned uint8
The `PACKED_BIT` value represents a special case where vector values themselves
can only be of two values (0 or 1) but these are packed together into groups of 8,
a byte. In Python, these are displayed as ints in range [0, 255]
Each value is of type bytes with a length of one.
.. versionadded:: 4.10
"""
INT8 = b"\x03"
FLOAT32 = b"\x27"
PACKED_BIT = b"\x10"
class BinaryVector:
"""Vector of numbers along with metadata for binary interoperability.
.. versionadded:: 4.10
"""
__slots__ = ("data", "dtype", "padding")
def __init__(
self,
data: Union[Sequence[float | int], npt.NDArray[np.number]],
dtype: BinaryVectorDtype,
padding: int = 0,
):
"""
:param data: Sequence of numbers representing the mathematical vector.
:param dtype: The data type stored in binary
:param padding: The number of bits in the final byte that are to be ignored
when a vector element's size is less than a byte
and the length of the vector is not a multiple of 8.
(Padding is equivalent to a negative value of `count` in
`numpy.unpackbits <https://numpy.org/doc/stable/reference/generated/numpy.unpackbits.html>`_)
"""
self.data = data
self.dtype = dtype
self.padding = padding
def __repr__(self) -> str:
return f"BinaryVector(dtype={self.dtype}, padding={self.padding}, data={self.data})"
def __eq__(self, other: Any) -> bool:
if not isinstance(other, BinaryVector):
return False
return (
self.dtype == other.dtype and self.padding == other.padding and self.data == other.data
)
def __len__(self) -> int:
return len(self.data)
class Binary(bytes):
"""Representation of BSON binary data.
We want to represent Python strings as the BSON string type.
We need to wrap binary data so that we can tell
the difference between what should be considered binary data and
what should be considered a string when we encode to BSON.
Subtype 9 provides a space-efficient representation of 1-dimensional vector data.
Its data is prepended with two bytes of metadata.
The first (dtype) describes its data type, such as float32 or int8.
The second (padding) prescribes the number of bits to ignore in the final byte.
This is relevant when the element size of the dtype is not a multiple of 8.
Raises TypeError if `subtype` is not an instance of :class:`int`.
Raises ValueError if `subtype` is not in [0, 256).
.. note::
Instances of Binary with subtype 0 will be decoded directly to :class:`bytes`.
:param data: the binary data to represent. Can be any bytes-like type
that implements the buffer protocol.
:param subtype: the `binary subtype
<https://bsonspec.org/spec.html>`_
to use
.. versionchanged:: 3.9
Support any bytes-like type that implements the buffer protocol.
.. versionchanged:: 4.10
Addition of vector subtype.
"""
_type_marker = 5
__subtype: int
def __new__(
cls: Type[Binary],
data: Union[memoryview, bytes, bytearray, _mmap, _array[Any]],
subtype: int = BINARY_SUBTYPE,
) -> Binary:
if not isinstance(subtype, int):
raise TypeError(f"subtype must be an instance of int, not {type(subtype)}")
if subtype >= 256 or subtype < 0:
raise ValueError("subtype must be contained in [0, 256)")
# Support any type that implements the buffer protocol.
self = bytes.__new__(cls, memoryview(data).tobytes())
self.__subtype = subtype
return self
@classmethod
def from_uuid(
cls: Type[Binary], uuid: UUID, uuid_representation: int = UuidRepresentation.STANDARD
) -> Binary:
"""Create a BSON Binary object from a Python UUID.
Creates a :class:`~bson.binary.Binary` object from a
:class:`uuid.UUID` instance. Assumes that the native
:class:`uuid.UUID` instance uses the byte-order implied by the
provided ``uuid_representation``.
Raises :exc:`TypeError` if `uuid` is not an instance of
:class:`~uuid.UUID`.
:param uuid: A :class:`uuid.UUID` instance.
:param uuid_representation: A member of
:class:`~bson.binary.UuidRepresentation`. Default:
:const:`~bson.binary.UuidRepresentation.STANDARD`.
See `UUID representations <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#universally-unique-ids--uuids->`_ for details.
.. versionadded:: 3.11
"""
if not isinstance(uuid, UUID):
raise TypeError(f"uuid must be an instance of uuid.UUID, not {type(uuid)}")
if uuid_representation not in ALL_UUID_REPRESENTATIONS:
raise ValueError(
"uuid_representation must be a value from bson.binary.UuidRepresentation"
)
if uuid_representation == UuidRepresentation.UNSPECIFIED:
raise ValueError(
"cannot encode native uuid.UUID with "
"UuidRepresentation.UNSPECIFIED. UUIDs can be manually "
"converted to bson.Binary instances using "
"bson.Binary.from_uuid() or a different UuidRepresentation "
"can be configured. See the documentation for "
"UuidRepresentation for more information."
)
subtype = OLD_UUID_SUBTYPE
if uuid_representation == UuidRepresentation.PYTHON_LEGACY:
payload = uuid.bytes
elif uuid_representation == UuidRepresentation.JAVA_LEGACY:
from_uuid = uuid.bytes
payload = from_uuid[0:8][::-1] + from_uuid[8:16][::-1]
elif uuid_representation == UuidRepresentation.CSHARP_LEGACY:
payload = uuid.bytes_le
else:
# uuid_representation == UuidRepresentation.STANDARD
subtype = UUID_SUBTYPE
payload = uuid.bytes
return cls(payload, subtype)
def as_uuid(self, uuid_representation: int = UuidRepresentation.STANDARD) -> UUID:
"""Create a Python UUID from this BSON Binary object.
Decodes this binary object as a native :class:`uuid.UUID` instance
with the provided ``uuid_representation``.
Raises :exc:`ValueError` if this :class:`~bson.binary.Binary` instance
does not contain a UUID.
:param uuid_representation: A member of
:class:`~bson.binary.UuidRepresentation`. Default:
:const:`~bson.binary.UuidRepresentation.STANDARD`.
See `UUID representations <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#universally-unique-ids--uuids->`_ for details.
.. versionadded:: 3.11
"""
if self.subtype not in ALL_UUID_SUBTYPES:
raise ValueError(f"cannot decode subtype {self.subtype} as a uuid")
if uuid_representation not in ALL_UUID_REPRESENTATIONS:
raise ValueError(
"uuid_representation must be a value from bson.binary.UuidRepresentation"
)
if uuid_representation == UuidRepresentation.UNSPECIFIED:
raise ValueError("uuid_representation cannot be UNSPECIFIED")
elif uuid_representation == UuidRepresentation.PYTHON_LEGACY:
if self.subtype == OLD_UUID_SUBTYPE:
return UUID(bytes=self)
elif uuid_representation == UuidRepresentation.JAVA_LEGACY:
if self.subtype == OLD_UUID_SUBTYPE:
return UUID(bytes=self[0:8][::-1] + self[8:16][::-1])
elif uuid_representation == UuidRepresentation.CSHARP_LEGACY:
if self.subtype == OLD_UUID_SUBTYPE:
return UUID(bytes_le=self)
else:
# uuid_representation == UuidRepresentation.STANDARD
if self.subtype == UUID_SUBTYPE:
return UUID(bytes=self)
raise ValueError(
f"cannot decode subtype {self.subtype} to {UUID_REPRESENTATION_NAMES[uuid_representation]}"
)
@classmethod
@overload
def from_vector(cls: Type[Binary], vector: BinaryVector) -> Binary:
...
@classmethod
@overload
def from_vector(
cls: Type[Binary],
vector: Union[list[int], list[float]],
dtype: BinaryVectorDtype,
padding: int = 0,
) -> Binary:
...
@classmethod
@overload
def from_vector(
cls: Type[Binary],
vector: npt.NDArray[np.number],
dtype: BinaryVectorDtype,
padding: int = 0,
) -> Binary:
...
@classmethod
def from_vector(
cls: Type[Binary],
vector: Union[BinaryVector, list[int], list[float], npt.NDArray[np.number]],
dtype: Optional[BinaryVectorDtype] = None,
padding: Optional[int] = None,
) -> Binary:
"""Create a BSON :class:`~bson.binary.Binary` of Vector subtype.
To interpret the representation of the numbers, a data type must be included.
See :class:`~bson.binary.BinaryVectorDtype` for available types and descriptions.
The dtype and padding are prepended to the binary data's value.
:param vector: Either a List of values, or a :class:`~bson.binary.BinaryVector` dataclass.
:param dtype: Data type of the values
:param padding: For fractional bytes, number of bits to ignore at end of vector.
:return: Binary packed data identified by dtype and padding.
.. versionchanged:: 4.14
When padding is non-zero, ignored bits should be zero. Raise exception on encoding, warn on decoding.
.. versionadded:: 4.10
"""
if isinstance(vector, BinaryVector):
if dtype or padding:
raise ValueError(
"The first argument, vector, has type BinaryVector. "
"dtype or padding cannot be separately defined, but were."
)
dtype = vector.dtype
padding = vector.padding
vector = vector.data # type: ignore
padding = 0 if padding is None else padding
if not isinstance(dtype, BinaryVectorDtype):
raise TypeError(
"dtype must be a bson.BinaryVectorDtype of BinaryVectorDType.INT8, PACKED_BIT, FLOAT32"
)
metadata = struct.pack("<sB", dtype.value, padding)
if isinstance(vector, list):
if dtype == BinaryVectorDtype.INT8: # pack ints in [-128, 127] as signed int8
format_str = "b"
if padding:
raise ValueError(f"padding does not apply to {dtype=}")
elif dtype == BinaryVectorDtype.PACKED_BIT: # pack ints in [0, 255] as unsigned uint8
format_str = "B"
if 0 <= padding > 7:
raise ValueError(f"{padding=}. It must be in [0,1, ..7].")
if padding and not vector:
raise ValueError("Empty vector with non-zero padding.")
elif dtype == BinaryVectorDtype.FLOAT32: # pack floats as float32
format_str = "f"
if padding:
raise ValueError(f"padding does not apply to {dtype=}")
else:
raise NotImplementedError("%s not yet supported" % dtype)
data = struct.pack(f"<{len(vector)}{format_str}", *vector)
else: # vector is numpy array or incorrect type.
try:
import numpy as np
except ImportError as exc:
raise ImportError(
"Failed to create binary from vector. Check type. If numpy array, numpy must be installed."
) from exc
if not isinstance(vector, np.ndarray):
raise TypeError(
"Could not create Binary. Vector must be a BinaryVector, list[int], list[float] or numpy ndarray."
)
if vector.ndim != 1:
raise ValueError(
"from_numpy_vector only supports 1D arrays as it creates a single vector."
)
if dtype == BinaryVectorDtype.FLOAT32:
vector = vector.astype(np.dtype("float32"), copy=False)
elif dtype == BinaryVectorDtype.INT8:
if vector.min() >= -128 and vector.max() <= 127:
vector = vector.astype(np.dtype("int8"), copy=False)
else:
raise ValueError("Values found outside INT8 range.")
elif dtype == BinaryVectorDtype.PACKED_BIT:
if vector.min() >= 0 and vector.max() <= 127:
vector = vector.astype(np.dtype("uint8"), copy=False)
else:
raise ValueError("Values found outside UINT8 range.")
else:
raise NotImplementedError("%s not yet supported" % dtype)
data = vector.tobytes()
if padding and len(vector) and not (data[-1] & ((1 << padding) - 1)) == 0:
raise ValueError(
"Vector has a padding P, but bits in the final byte lower than P are non-zero. They must be zero."
)
return cls(metadata + data, subtype=VECTOR_SUBTYPE)
def as_vector(self, return_numpy: bool = False) -> BinaryVector:
"""From the Binary, create a list or 1-d numpy array of numbers, along with dtype and padding.
:param return_numpy: If True, BinaryVector.data will be a one-dimensional numpy array. By default, it is a list.
:return: BinaryVector
.. versionadded:: 4.10
"""
if self.subtype != VECTOR_SUBTYPE:
raise ValueError(f"Cannot decode subtype {self.subtype} as a vector")
dtype, padding = struct.unpack_from("<sB", self)
dtype = BinaryVectorDtype(dtype)
offset = 2
n_bytes = len(self) - offset
if padding and dtype != BinaryVectorDtype.PACKED_BIT:
raise ValueError(
f"Corrupt data. Padding ({padding}) must be 0 for all but PACKED_BIT dtypes. ({dtype=})"
)
if not return_numpy:
if dtype == BinaryVectorDtype.INT8:
dtype_format = "b"
format_string = f"<{n_bytes}{dtype_format}"
vector = list(struct.unpack_from(format_string, self, offset))
return BinaryVector(vector, dtype, padding)
elif dtype == BinaryVectorDtype.FLOAT32:
n_values = n_bytes // 4
if n_bytes % 4:
raise ValueError(
"Corrupt data. N bytes for a float32 vector must be a multiple of 4."
)
dtype_format = "f"
format_string = f"<{n_values}{dtype_format}"
vector = list(struct.unpack_from(format_string, self, offset))
return BinaryVector(vector, dtype, padding)
elif dtype == BinaryVectorDtype.PACKED_BIT:
# data packed as uint8
if padding and not n_bytes:
raise ValueError("Corrupt data. Vector has a padding P, but no data.")
if padding > 7 or padding < 0:
raise ValueError(f"Corrupt data. Padding ({padding}) must be between 0 and 7.")
dtype_format = "B"
format_string = f"<{n_bytes}{dtype_format}"
unpacked_uint8s = list(struct.unpack_from(format_string, self, offset))
if padding and n_bytes and unpacked_uint8s[-1] & (1 << padding) - 1 != 0:
warnings.warn(
"Vector has a padding P, but bits in the final byte lower than P are non-zero. For pymongo>=5.0, they must be zero.",
DeprecationWarning,
stacklevel=2,
)
return BinaryVector(unpacked_uint8s, dtype, padding)
else:
raise NotImplementedError("Binary Vector dtype %s not yet supported" % dtype.name)
else: # create a numpy array
try:
import numpy as np
except ImportError as exc:
raise ImportError(
"Converting binary to numpy.ndarray requires numpy to be installed."
) from exc
if dtype == BinaryVectorDtype.INT8:
data = np.frombuffer(self[offset:], dtype="int8")
elif dtype == BinaryVectorDtype.FLOAT32:
if n_bytes % 4:
raise ValueError(
"Corrupt data. N bytes for a float32 vector must be a multiple of 4."
)
data = np.frombuffer(self[offset:], dtype="float32")
elif dtype == BinaryVectorDtype.PACKED_BIT:
# data packed as uint8
if padding and not n_bytes:
raise ValueError("Corrupt data. Vector has a padding P, but no data.")
if padding > 7 or padding < 0:
raise ValueError(f"Corrupt data. Padding ({padding}) must be between 0 and 7.")
data = np.frombuffer(self[offset:], dtype="uint8")
if padding and np.unpackbits(data[-1])[-padding:].sum() > 0:
warnings.warn(
"Vector has a padding P, but bits in the final byte lower than P are non-zero. For pymongo>=5.0, they must be zero.",
DeprecationWarning,
stacklevel=2,
)
else:
raise NotImplementedError("Binary Vector dtype %s not yet supported" % dtype.name)
return BinaryVector(data, dtype, padding)
@property
def subtype(self) -> int:
"""Subtype of this binary data."""
return self.__subtype
def __getnewargs__(self) -> Tuple[bytes, int]: # type: ignore[override]
# Work around http://bugs.python.org/issue7382
data = super().__getnewargs__()[0]
if not isinstance(data, bytes):
data = data.encode("latin-1")
return data, self.__subtype
def __eq__(self, other: Any) -> bool:
if isinstance(other, Binary):
return (self.__subtype, bytes(self)) == (other.subtype, bytes(other))
# We don't return NotImplemented here because if we did then
# Binary("foo") == "foo" would return True, since Binary is a
# subclass of str...
return False
def __hash__(self) -> int:
return super().__hash__() ^ hash(self.__subtype)
def __ne__(self, other: Any) -> bool:
return not self == other
def __repr__(self) -> str:
if self.__subtype == SENSITIVE_SUBTYPE:
return f"<Binary(REDACTED, {self.__subtype})>"
else:
return f"Binary({bytes.__repr__(self)}, {self.__subtype})"

View File

@@ -0,0 +1,233 @@
/*
* Copyright 2013-2016 MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef BSON_ENDIAN_H
#define BSON_ENDIAN_H
#if defined(__sun)
# include <sys/byteorder.h>
#endif
#ifdef _MSC_VER
# define BSON_INLINE __inline
#else
# include <stdint.h>
# define BSON_INLINE __inline__
#endif
#define BSON_BIG_ENDIAN 4321
#define BSON_LITTLE_ENDIAN 1234
/* WORDS_BIGENDIAN from pyconfig.h / Python.h */
#ifdef WORDS_BIGENDIAN
# define BSON_BYTE_ORDER BSON_BIG_ENDIAN
#else
# define BSON_BYTE_ORDER BSON_LITTLE_ENDIAN
#endif
#if defined(__sun)
# define BSON_UINT16_SWAP_LE_BE(v) BSWAP_16((uint16_t)v)
# define BSON_UINT32_SWAP_LE_BE(v) BSWAP_32((uint32_t)v)
# define BSON_UINT64_SWAP_LE_BE(v) BSWAP_64((uint64_t)v)
#elif defined(__clang__) && defined(__clang_major__) && defined(__clang_minor__) && \
(__clang_major__ >= 3) && (__clang_minor__ >= 1)
# if __has_builtin(__builtin_bswap16)
# define BSON_UINT16_SWAP_LE_BE(v) __builtin_bswap16(v)
# endif
# if __has_builtin(__builtin_bswap32)
# define BSON_UINT32_SWAP_LE_BE(v) __builtin_bswap32(v)
# endif
# if __has_builtin(__builtin_bswap64)
# define BSON_UINT64_SWAP_LE_BE(v) __builtin_bswap64(v)
# endif
#elif defined(__GNUC__) && (__GNUC__ >= 4)
# if __GNUC__ >= 4 && defined (__GNUC_MINOR__) && __GNUC_MINOR__ >= 3
# define BSON_UINT32_SWAP_LE_BE(v) __builtin_bswap32 ((uint32_t)v)
# define BSON_UINT64_SWAP_LE_BE(v) __builtin_bswap64 ((uint64_t)v)
# endif
# if __GNUC__ >= 4 && defined (__GNUC_MINOR__) && __GNUC_MINOR__ >= 8
# define BSON_UINT16_SWAP_LE_BE(v) __builtin_bswap16 ((uint32_t)v)
# endif
#endif
#ifndef BSON_UINT16_SWAP_LE_BE
# define BSON_UINT16_SWAP_LE_BE(v) __bson_uint16_swap_slow ((uint16_t)v)
#endif
#ifndef BSON_UINT32_SWAP_LE_BE
# define BSON_UINT32_SWAP_LE_BE(v) __bson_uint32_swap_slow ((uint32_t)v)
#endif
#ifndef BSON_UINT64_SWAP_LE_BE
# define BSON_UINT64_SWAP_LE_BE(v) __bson_uint64_swap_slow ((uint64_t)v)
#endif
#if BSON_BYTE_ORDER == BSON_LITTLE_ENDIAN
# define BSON_UINT16_FROM_LE(v) ((uint16_t)v)
# define BSON_UINT16_TO_LE(v) ((uint16_t)v)
# define BSON_UINT16_FROM_BE(v) BSON_UINT16_SWAP_LE_BE (v)
# define BSON_UINT16_TO_BE(v) BSON_UINT16_SWAP_LE_BE (v)
# define BSON_UINT32_FROM_LE(v) ((uint32_t)v)
# define BSON_UINT32_TO_LE(v) ((uint32_t)v)
# define BSON_UINT32_FROM_BE(v) BSON_UINT32_SWAP_LE_BE (v)
# define BSON_UINT32_TO_BE(v) BSON_UINT32_SWAP_LE_BE (v)
# define BSON_UINT64_FROM_LE(v) ((uint64_t)v)
# define BSON_UINT64_TO_LE(v) ((uint64_t)v)
# define BSON_UINT64_FROM_BE(v) BSON_UINT64_SWAP_LE_BE (v)
# define BSON_UINT64_TO_BE(v) BSON_UINT64_SWAP_LE_BE (v)
# define BSON_DOUBLE_FROM_LE(v) ((double)v)
# define BSON_DOUBLE_TO_LE(v) ((double)v)
#elif BSON_BYTE_ORDER == BSON_BIG_ENDIAN
# define BSON_UINT16_FROM_LE(v) BSON_UINT16_SWAP_LE_BE (v)
# define BSON_UINT16_TO_LE(v) BSON_UINT16_SWAP_LE_BE (v)
# define BSON_UINT16_FROM_BE(v) ((uint16_t)v)
# define BSON_UINT16_TO_BE(v) ((uint16_t)v)
# define BSON_UINT32_FROM_LE(v) BSON_UINT32_SWAP_LE_BE (v)
# define BSON_UINT32_TO_LE(v) BSON_UINT32_SWAP_LE_BE (v)
# define BSON_UINT32_FROM_BE(v) ((uint32_t)v)
# define BSON_UINT32_TO_BE(v) ((uint32_t)v)
# define BSON_UINT64_FROM_LE(v) BSON_UINT64_SWAP_LE_BE (v)
# define BSON_UINT64_TO_LE(v) BSON_UINT64_SWAP_LE_BE (v)
# define BSON_UINT64_FROM_BE(v) ((uint64_t)v)
# define BSON_UINT64_TO_BE(v) ((uint64_t)v)
# define BSON_DOUBLE_FROM_LE(v) (__bson_double_swap_slow (v))
# define BSON_DOUBLE_TO_LE(v) (__bson_double_swap_slow (v))
#else
# error "The endianness of target architecture is unknown."
#endif
/*
*--------------------------------------------------------------------------
*
* __bson_uint16_swap_slow --
*
* Fallback endianness conversion for 16-bit integers.
*
* Returns:
* The endian swapped version.
*
* Side effects:
* None.
*
*--------------------------------------------------------------------------
*/
static BSON_INLINE uint16_t
__bson_uint16_swap_slow (uint16_t v) /* IN */
{
return ((v & 0x00FF) << 8) |
((v & 0xFF00) >> 8);
}
/*
*--------------------------------------------------------------------------
*
* __bson_uint32_swap_slow --
*
* Fallback endianness conversion for 32-bit integers.
*
* Returns:
* The endian swapped version.
*
* Side effects:
* None.
*
*--------------------------------------------------------------------------
*/
static BSON_INLINE uint32_t
__bson_uint32_swap_slow (uint32_t v) /* IN */
{
return ((v & 0x000000FFU) << 24) |
((v & 0x0000FF00U) << 8) |
((v & 0x00FF0000U) >> 8) |
((v & 0xFF000000U) >> 24);
}
/*
*--------------------------------------------------------------------------
*
* __bson_uint64_swap_slow --
*
* Fallback endianness conversion for 64-bit integers.
*
* Returns:
* The endian swapped version.
*
* Side effects:
* None.
*
*--------------------------------------------------------------------------
*/
static BSON_INLINE uint64_t
__bson_uint64_swap_slow (uint64_t v) /* IN */
{
return ((v & 0x00000000000000FFULL) << 56) |
((v & 0x000000000000FF00ULL) << 40) |
((v & 0x0000000000FF0000ULL) << 24) |
((v & 0x00000000FF000000ULL) << 8) |
((v & 0x000000FF00000000ULL) >> 8) |
((v & 0x0000FF0000000000ULL) >> 24) |
((v & 0x00FF000000000000ULL) >> 40) |
((v & 0xFF00000000000000ULL) >> 56);
}
/*
*--------------------------------------------------------------------------
*
* __bson_double_swap_slow --
*
* Fallback endianness conversion for double floating point.
*
* Returns:
* The endian swapped version.
*
* Side effects:
* None.
*
*--------------------------------------------------------------------------
*/
static BSON_INLINE double
__bson_double_swap_slow (double v) /* IN */
{
uint64_t uv;
memcpy(&uv, &v, sizeof(v));
uv = BSON_UINT64_SWAP_LE_BE(uv);
memcpy(&v, &uv, sizeof(v));
return v;
}
#endif /* BSON_ENDIAN_H */

View File

@@ -0,0 +1,157 @@
/*
* Copyright 2009-2015 MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Include Python.h so we can set Python's error indicator. */
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include <stdlib.h>
#include <string.h>
#include "buffer.h"
#define INITIAL_BUFFER_SIZE 256
struct buffer {
char* buffer;
int size;
int position;
};
/* Set Python's error indicator to MemoryError.
* Called after allocation failures. */
static void set_memory_error(void) {
PyErr_NoMemory();
}
/* Allocate and return a new buffer.
* Return NULL and sets MemoryError on allocation failure. */
buffer_t pymongo_buffer_new(void) {
buffer_t buffer;
buffer = (buffer_t)malloc(sizeof(struct buffer));
if (buffer == NULL) {
set_memory_error();
return NULL;
}
buffer->size = INITIAL_BUFFER_SIZE;
buffer->position = 0;
buffer->buffer = (char*)malloc(sizeof(char) * INITIAL_BUFFER_SIZE);
if (buffer->buffer == NULL) {
free(buffer);
set_memory_error();
return NULL;
}
return buffer;
}
/* Free the memory allocated for `buffer`.
* Return non-zero on failure. */
int pymongo_buffer_free(buffer_t buffer) {
if (buffer == NULL) {
return 1;
}
/* Buffer will be NULL when buffer_grow fails. */
if (buffer->buffer != NULL) {
free(buffer->buffer);
}
free(buffer);
return 0;
}
/* Grow `buffer` to at least `min_length`.
* Return non-zero and sets MemoryError on allocation failure. */
static int buffer_grow(buffer_t buffer, int min_length) {
int old_size = 0;
int size = buffer->size;
char* old_buffer = buffer->buffer;
if (size >= min_length) {
return 0;
}
while (size < min_length) {
old_size = size;
size *= 2;
if (size <= old_size) {
/* Size did not increase. Could be an overflow
* or size < 1. Just go with min_length. */
size = min_length;
}
}
buffer->buffer = (char*)realloc(buffer->buffer, sizeof(char) * size);
if (buffer->buffer == NULL) {
free(old_buffer);
set_memory_error();
return 1;
}
buffer->size = size;
return 0;
}
/* Assure that `buffer` has at least `size` free bytes (and grow if needed).
* Return non-zero and sets MemoryError on allocation failure.
* Return non-zero and sets ValueError if `size` would exceed 2GiB. */
static int buffer_assure_space(buffer_t buffer, int size) {
int new_size = buffer->position + size;
/* Check for overflow. */
if (new_size < buffer->position) {
PyErr_SetString(PyExc_ValueError,
"Document would overflow BSON size limit");
return 1;
}
if (new_size <= buffer->size) {
return 0;
}
return buffer_grow(buffer, new_size);
}
/* Save `size` bytes from the current position in `buffer` (and grow if needed).
* Return offset for writing, or -1 on failure.
* Sets MemoryError or ValueError on failure. */
buffer_position pymongo_buffer_save_space(buffer_t buffer, int size) {
int position = buffer->position;
if (buffer_assure_space(buffer, size) != 0) {
return -1;
}
buffer->position += size;
return position;
}
/* Write `size` bytes from `data` to `buffer` (and grow if needed).
* Return non-zero on failure.
* Sets MemoryError or ValueError on failure. */
int pymongo_buffer_write(buffer_t buffer, const char* data, int size) {
if (buffer_assure_space(buffer, size) != 0) {
return 1;
}
memcpy(buffer->buffer + buffer->position, data, size);
buffer->position += size;
return 0;
}
int pymongo_buffer_get_position(buffer_t buffer) {
return buffer->position;
}
char* pymongo_buffer_get_buffer(buffer_t buffer) {
return buffer->buffer;
}
void pymongo_buffer_update_position(buffer_t buffer, buffer_position new_position) {
buffer->position = new_position;
}

View File

@@ -0,0 +1,51 @@
/*
* Copyright 2009-2015 MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef BUFFER_H
#define BUFFER_H
/* Note: if any of these functions return a failure condition then the buffer
* has already been freed. */
/* A buffer */
typedef struct buffer* buffer_t;
/* A position in the buffer */
typedef int buffer_position;
/* Allocate and return a new buffer.
* Return NULL on allocation failure. */
buffer_t pymongo_buffer_new(void);
/* Free the memory allocated for `buffer`.
* Return non-zero on failure. */
int pymongo_buffer_free(buffer_t buffer);
/* Save `size` bytes from the current position in `buffer` (and grow if needed).
* Return offset for writing, or -1 on allocation failure. */
buffer_position pymongo_buffer_save_space(buffer_t buffer, int size);
/* Write `size` bytes from `data` to `buffer` (and grow if needed).
* Return non-zero on allocation failure. */
int pymongo_buffer_write(buffer_t buffer, const char* data, int size);
/* Getters for the internals of a buffer_t.
* Should try to avoid using these as much as possible
* since they break the abstraction. */
buffer_position pymongo_buffer_get_position(buffer_t buffer);
char* pymongo_buffer_get_buffer(buffer_t buffer);
void pymongo_buffer_update_position(buffer_t buffer, buffer_position new_position);
#endif

View File

@@ -0,0 +1,100 @@
# Copyright 2009-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for representing JavaScript code in BSON."""
from __future__ import annotations
from collections.abc import Mapping as _Mapping
from typing import Any, Mapping, Optional, Type, Union
class Code(str):
"""BSON's JavaScript code type.
Raises :class:`TypeError` if `code` is not an instance of
:class:`str` or `scope` is not ``None`` or an instance
of :class:`dict`.
Scope variables can be set by passing a dictionary as the `scope`
argument or by using keyword arguments. If a variable is set as a
keyword argument it will override any setting for that variable in
the `scope` dictionary.
:param code: A string containing JavaScript code to be evaluated or another
instance of Code. In the latter case, the scope of `code` becomes this
Code's :attr:`scope`.
:param scope: dictionary representing the scope in which
`code` should be evaluated - a mapping from identifiers (as
strings) to values. Defaults to ``None``. This is applied after any
scope associated with a given `code` above.
:param kwargs: scope variables can also be passed as
keyword arguments. These are applied after `scope` and `code`.
.. versionchanged:: 3.4
The default value for :attr:`scope` is ``None`` instead of ``{}``.
"""
_type_marker = 13
__scope: Union[Mapping[str, Any], None]
def __new__(
cls: Type[Code],
code: Union[str, Code],
scope: Optional[Mapping[str, Any]] = None,
**kwargs: Any,
) -> Code:
if not isinstance(code, str):
raise TypeError(f"code must be an instance of str, not {type(code)}")
self = str.__new__(cls, code)
try:
self.__scope = code.scope # type: ignore
except AttributeError:
self.__scope = None
if scope is not None:
if not isinstance(scope, _Mapping):
raise TypeError(f"scope must be an instance of dict, not {type(scope)}")
if self.__scope is not None:
self.__scope.update(scope) # type: ignore
else:
self.__scope = scope
if kwargs:
if self.__scope is not None:
self.__scope.update(kwargs) # type: ignore
else:
self.__scope = kwargs
return self
@property
def scope(self) -> Optional[Mapping[str, Any]]:
"""Scope dictionary for this instance or ``None``."""
return self.__scope
def __repr__(self) -> str:
return f"Code({str.__repr__(self)}, {self.__scope!r})"
def __eq__(self, other: Any) -> bool:
if isinstance(other, Code):
return (self.__scope, str(self)) == (other.__scope, str(other))
return False
__hash__: Any = None
def __ne__(self, other: Any) -> bool:
return not self == other

View File

@@ -0,0 +1,505 @@
# Copyright 2014-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for specifying BSON codec options."""
from __future__ import annotations
import abc
import datetime
import enum
from collections.abc import MutableMapping as _MutableMapping
from typing import (
TYPE_CHECKING,
Any,
Callable,
Generic,
Iterable,
Mapping,
NamedTuple,
Optional,
Tuple,
Type,
Union,
cast,
)
from bson.binary import (
ALL_UUID_REPRESENTATIONS,
UUID_REPRESENTATION_NAMES,
UuidRepresentation,
)
from bson.typings import _DocumentType
_RAW_BSON_DOCUMENT_MARKER = 101
def _raw_document_class(document_class: Any) -> bool:
"""Determine if a document_class is a RawBSONDocument class."""
marker = getattr(document_class, "_type_marker", None)
return marker == _RAW_BSON_DOCUMENT_MARKER
class TypeEncoder(abc.ABC):
"""Base class for defining type codec classes which describe how a
custom type can be transformed to one of the types BSON understands.
Codec classes must implement the ``python_type`` attribute, and the
``transform_python`` method to support encoding.
See `encode data with type codecs <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/custom-types/type-codecs/#encode-data-with-type-codecs>`_ documentation for an example.
"""
@abc.abstractproperty
def python_type(self) -> Any:
"""The Python type to be converted into something serializable."""
@abc.abstractmethod
def transform_python(self, value: Any) -> Any:
"""Convert the given Python object into something serializable."""
class TypeDecoder(abc.ABC):
"""Base class for defining type codec classes which describe how a
BSON type can be transformed to a custom type.
Codec classes must implement the ``bson_type`` attribute, and the
``transform_bson`` method to support decoding.
See `encode data with type codecs <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/custom-types/type-codecs/#encode-data-with-type-codecs>`_ documentation for an example.
"""
@abc.abstractproperty
def bson_type(self) -> Any:
"""The BSON type to be converted into our own type."""
@abc.abstractmethod
def transform_bson(self, value: Any) -> Any:
"""Convert the given BSON value into our own type."""
class TypeCodec(TypeEncoder, TypeDecoder):
"""Base class for defining type codec classes which describe how a
custom type can be transformed to/from one of the types :mod:`bson`
can already encode/decode.
Codec classes must implement the ``python_type`` attribute, and the
``transform_python`` method to support encoding, as well as the
``bson_type`` attribute, and the ``transform_bson`` method to support
decoding.
See `encode data with type codecs <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/custom-types/type-codecs/#encode-data-with-type-codecs>`_ documentation for an example.
"""
_Codec = Union[TypeEncoder, TypeDecoder, TypeCodec]
_Fallback = Callable[[Any], Any]
class TypeRegistry:
"""Encapsulates type codecs used in encoding and / or decoding BSON, as
well as the fallback encoder. Type registries cannot be modified after
instantiation.
``TypeRegistry`` can be initialized with an iterable of type codecs, and
a callable for the fallback encoder::
>>> from bson.codec_options import TypeRegistry
>>> type_registry = TypeRegistry([Codec1, Codec2, Codec3, ...],
... fallback_encoder)
See `add codec to the type registry <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/custom-types/type-codecs/#add-codec-to-the-type-registry>`_ documentation for an example.
:param type_codecs: iterable of type codec instances. If
``type_codecs`` contains multiple codecs that transform a single
python or BSON type, the transformation specified by the type codec
occurring last prevails. A TypeError will be raised if one or more
type codecs modify the encoding behavior of a built-in :mod:`bson`
type.
:param fallback_encoder: callable that accepts a single,
unencodable python value and transforms it into a type that
:mod:`bson` can encode. See `define a fallback encoder <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/custom-types/type-codecs/#define-a-fallback-encoder>`_
documentation for an example.
"""
def __init__(
self,
type_codecs: Optional[Iterable[_Codec]] = None,
fallback_encoder: Optional[_Fallback] = None,
) -> None:
self.__type_codecs = list(type_codecs or [])
self._fallback_encoder = fallback_encoder
self._encoder_map: dict[Any, Any] = {}
self._decoder_map: dict[Any, Any] = {}
if self._fallback_encoder is not None:
if not callable(fallback_encoder):
raise TypeError("fallback_encoder %r is not a callable" % (fallback_encoder))
for codec in self.__type_codecs:
is_valid_codec = False
if isinstance(codec, TypeEncoder):
self._validate_type_encoder(codec)
is_valid_codec = True
self._encoder_map[codec.python_type] = codec.transform_python
if isinstance(codec, TypeDecoder):
is_valid_codec = True
self._decoder_map[codec.bson_type] = codec.transform_bson
if not is_valid_codec:
raise TypeError(
f"Expected an instance of {TypeEncoder.__name__}, {TypeDecoder.__name__}, or {TypeCodec.__name__}, got {codec!r} instead"
)
@property
def codecs(self) -> list[TypeEncoder | TypeDecoder | TypeCodec]:
"""The list of type codecs in this registry."""
return self.__type_codecs
@property
def fallback_encoder(self) -> Optional[_Fallback]:
"""The fallback encoder in this registry."""
return self._fallback_encoder
def _validate_type_encoder(self, codec: _Codec) -> None:
from bson import _BUILT_IN_TYPES
for pytype in _BUILT_IN_TYPES:
if issubclass(cast(TypeCodec, codec).python_type, pytype):
err_msg = (
"TypeEncoders cannot change how built-in types are "
f"encoded (encoder {codec} transforms type {pytype})"
)
raise TypeError(err_msg)
def __repr__(self) -> str:
return "{}(type_codecs={!r}, fallback_encoder={!r})".format(
self.__class__.__name__,
self.__type_codecs,
self._fallback_encoder,
)
def __eq__(self, other: Any) -> Any:
if not isinstance(other, type(self)):
return NotImplemented
return (
(self._decoder_map == other._decoder_map)
and (self._encoder_map == other._encoder_map)
and (self._fallback_encoder == other._fallback_encoder)
)
class DatetimeConversion(int, enum.Enum):
"""Options for decoding BSON datetimes."""
DATETIME = 1
"""Decode a BSON UTC datetime as a :class:`datetime.datetime`.
BSON UTC datetimes that cannot be represented as a
:class:`~datetime.datetime` will raise an :class:`OverflowError`
or a :class:`ValueError`.
.. versionadded 4.3
"""
DATETIME_CLAMP = 2
"""Decode a BSON UTC datetime as a :class:`datetime.datetime`, clamping
to :attr:`~datetime.datetime.min` and :attr:`~datetime.datetime.max`.
.. versionadded 4.3
"""
DATETIME_MS = 3
"""Decode a BSON UTC datetime as a :class:`~bson.datetime_ms.DatetimeMS`
object.
.. versionadded 4.3
"""
DATETIME_AUTO = 4
"""Decode a BSON UTC datetime as a :class:`datetime.datetime` if possible,
and a :class:`~bson.datetime_ms.DatetimeMS` if not.
.. versionadded 4.3
"""
class _BaseCodecOptions(NamedTuple):
document_class: Type[Mapping[str, Any]]
tz_aware: bool
uuid_representation: int
unicode_decode_error_handler: str
tzinfo: Optional[datetime.tzinfo]
type_registry: TypeRegistry
datetime_conversion: Optional[DatetimeConversion]
if TYPE_CHECKING:
class CodecOptions(Tuple[_DocumentType], Generic[_DocumentType]):
document_class: Type[_DocumentType]
tz_aware: bool
uuid_representation: int
unicode_decode_error_handler: Optional[str]
tzinfo: Optional[datetime.tzinfo]
type_registry: TypeRegistry
datetime_conversion: Optional[int]
def __new__(
cls: Type[CodecOptions[_DocumentType]],
document_class: Optional[Type[_DocumentType]] = ...,
tz_aware: bool = ...,
uuid_representation: Optional[int] = ...,
unicode_decode_error_handler: Optional[str] = ...,
tzinfo: Optional[datetime.tzinfo] = ...,
type_registry: Optional[TypeRegistry] = ...,
datetime_conversion: Optional[int] = ...,
) -> CodecOptions[_DocumentType]:
...
# CodecOptions API
def with_options(self, **kwargs: Any) -> CodecOptions[Any]:
...
def _arguments_repr(self) -> str:
...
# NamedTuple API
@classmethod
def _make(cls, obj: Iterable[Any]) -> CodecOptions[_DocumentType]:
...
def _asdict(self) -> dict[str, Any]:
...
def _replace(self, **kwargs: Any) -> CodecOptions[_DocumentType]:
...
_source: str
_fields: Tuple[str]
else:
class CodecOptions(_BaseCodecOptions):
"""Encapsulates options used encoding and / or decoding BSON."""
def __init__(self, *args, **kwargs):
"""Encapsulates options used encoding and / or decoding BSON.
The `document_class` option is used to define a custom type for use
decoding BSON documents. Access to the underlying raw BSON bytes for
a document is available using the :class:`~bson.raw_bson.RawBSONDocument`
type::
>>> from bson.raw_bson import RawBSONDocument
>>> from bson.codec_options import CodecOptions
>>> codec_options = CodecOptions(document_class=RawBSONDocument)
>>> coll = db.get_collection('test', codec_options=codec_options)
>>> doc = coll.find_one()
>>> doc.raw
'\\x16\\x00\\x00\\x00\\x07_id\\x00[0\\x165\\x91\\x10\\xea\\x14\\xe8\\xc5\\x8b\\x93\\x00'
The document class can be any type that inherits from
:class:`~collections.abc.MutableMapping`::
>>> class AttributeDict(dict):
... # A dict that supports attribute access.
... def __getattr__(self, key):
... return self[key]
... def __setattr__(self, key, value):
... self[key] = value
...
>>> codec_options = CodecOptions(document_class=AttributeDict)
>>> coll = db.get_collection('test', codec_options=codec_options)
>>> doc = coll.find_one()
>>> doc._id
ObjectId('5b3016359110ea14e8c58b93')
See `Dates and Times <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/dates-and-times/#dates-and-times>`_ for examples using the `tz_aware` and
`tzinfo` options.
See `UUID <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#universally-unique-ids--uuids->`_ for examples using the `uuid_representation`
option.
:param document_class: BSON documents returned in queries will be decoded
to an instance of this class. Must be a subclass of
:class:`~collections.abc.MutableMapping`. Defaults to :class:`dict`.
:param tz_aware: If ``True``, BSON datetimes will be decoded to timezone
aware instances of :class:`~datetime.datetime`. Otherwise they will be
naive. Defaults to ``False``.
:param uuid_representation: The BSON representation to use when encoding
and decoding instances of :class:`~uuid.UUID`. Defaults to
:data:`~bson.binary.UuidRepresentation.UNSPECIFIED`. New
applications should consider setting this to
:data:`~bson.binary.UuidRepresentation.STANDARD` for cross language
compatibility. See `UUID representations <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#universally-unique-ids--uuids->`_ for details.
:param unicode_decode_error_handler: The error handler to apply when
a Unicode-related error occurs during BSON decoding that would
otherwise raise :exc:`UnicodeDecodeError`. Valid options include
'strict', 'replace', 'backslashreplace', 'surrogateescape', and
'ignore'. Defaults to 'strict'.
:param tzinfo: A :class:`~datetime.tzinfo` subclass that specifies the
timezone to/from which :class:`~datetime.datetime` objects should be
encoded/decoded.
:param type_registry: Instance of :class:`TypeRegistry` used to customize
encoding and decoding behavior.
:param datetime_conversion: Specifies how UTC datetimes should be decoded
within BSON. Valid options include 'datetime_ms' to return as a
DatetimeMS, 'datetime' to return as a datetime.datetime and
raising a ValueError for out-of-range values, 'datetime_auto' to
return DatetimeMS objects when the underlying datetime is
out-of-range and 'datetime_clamp' to clamp to the minimum and
maximum possible datetimes. Defaults to 'datetime'.
.. versionchanged:: 4.0
The default for `uuid_representation` was changed from
:const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` to
:const:`~bson.binary.UuidRepresentation.UNSPECIFIED`.
.. versionadded:: 3.8
`type_registry` attribute.
.. warning:: Care must be taken when changing
`unicode_decode_error_handler` from its default value ('strict').
The 'replace' and 'ignore' modes should not be used when documents
retrieved from the server will be modified in the client application
and stored back to the server.
"""
super().__init__()
def __new__(
cls: Type[CodecOptions],
document_class: Optional[Type[Mapping[str, Any]]] = None,
tz_aware: bool = False,
uuid_representation: Optional[int] = UuidRepresentation.UNSPECIFIED,
unicode_decode_error_handler: str = "strict",
tzinfo: Optional[datetime.tzinfo] = None,
type_registry: Optional[TypeRegistry] = None,
datetime_conversion: Optional[DatetimeConversion] = DatetimeConversion.DATETIME,
) -> CodecOptions:
doc_class = document_class or dict
# issubclass can raise TypeError for generic aliases like SON[str, Any].
# In that case we can use the base class for the comparison.
is_mapping = False
try:
is_mapping = issubclass(doc_class, _MutableMapping)
except TypeError:
if hasattr(doc_class, "__origin__"):
is_mapping = issubclass(doc_class.__origin__, _MutableMapping)
if not (is_mapping or _raw_document_class(doc_class)):
raise TypeError(
"document_class must be dict, bson.son.SON, "
"bson.raw_bson.RawBSONDocument, or a "
"subclass of collections.abc.MutableMapping"
)
if not isinstance(tz_aware, bool):
raise TypeError(f"tz_aware must be True or False, was: tz_aware={tz_aware}")
if uuid_representation not in ALL_UUID_REPRESENTATIONS:
raise ValueError(
"uuid_representation must be a value from bson.binary.UuidRepresentation"
)
if not isinstance(unicode_decode_error_handler, str):
raise ValueError(
f"unicode_decode_error_handler must be a string, not {type(unicode_decode_error_handler)}"
)
if tzinfo is not None:
if not isinstance(tzinfo, datetime.tzinfo):
raise TypeError(
f"tzinfo must be an instance of datetime.tzinfo, not {type(tzinfo)}"
)
if not tz_aware:
raise ValueError("cannot specify tzinfo without also setting tz_aware=True")
type_registry = type_registry or TypeRegistry()
if not isinstance(type_registry, TypeRegistry):
raise TypeError(
f"type_registry must be an instance of TypeRegistry, not {type(type_registry)}"
)
return tuple.__new__(
cls,
(
doc_class,
tz_aware,
uuid_representation,
unicode_decode_error_handler,
tzinfo,
type_registry,
datetime_conversion,
),
)
def _arguments_repr(self) -> str:
"""Representation of the arguments used to create this object."""
document_class_repr = (
"dict" if self.document_class is dict else repr(self.document_class)
)
uuid_rep_repr = UUID_REPRESENTATION_NAMES.get(
self.uuid_representation, self.uuid_representation
)
return (
"document_class={}, tz_aware={!r}, uuid_representation={}, "
"unicode_decode_error_handler={!r}, tzinfo={!r}, "
"type_registry={!r}, datetime_conversion={!s}".format(
document_class_repr,
self.tz_aware,
uuid_rep_repr,
self.unicode_decode_error_handler,
self.tzinfo,
self.type_registry,
self.datetime_conversion,
)
)
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self._arguments_repr()})"
def with_options(self, **kwargs: Any) -> CodecOptions:
"""Make a copy of this CodecOptions, overriding some options::
>>> from bson.codec_options import DEFAULT_CODEC_OPTIONS
>>> DEFAULT_CODEC_OPTIONS.tz_aware
False
>>> options = DEFAULT_CODEC_OPTIONS.with_options(tz_aware=True)
>>> options.tz_aware
True
.. versionadded:: 3.5
"""
opts = self._asdict()
opts.update(kwargs)
return CodecOptions(**opts)
DEFAULT_CODEC_OPTIONS: CodecOptions[dict[str, Any]] = CodecOptions()
def _parse_codec_options(options: Any) -> CodecOptions[Any]:
"""Parse BSON codec options."""
kwargs = {}
for k in set(options) & {
"document_class",
"tz_aware",
"uuidrepresentation",
"unicode_decode_error_handler",
"tzinfo",
"type_registry",
"datetime_conversion",
}:
if k == "uuidrepresentation":
kwargs["uuid_representation"] = options[k]
else:
kwargs[k] = options[k]
return CodecOptions(**kwargs)

View File

@@ -0,0 +1,182 @@
# Copyright 2022-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you
# may not use this file except in compliance with the License. You
# may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.
"""Tools for representing the BSON datetime type.
.. versionadded:: 4.3
"""
from __future__ import annotations
import calendar
import datetime
from typing import Any, Union, cast
from bson.codec_options import DEFAULT_CODEC_OPTIONS, CodecOptions, DatetimeConversion
from bson.errors import InvalidBSON
from bson.tz_util import utc
EPOCH_AWARE = datetime.datetime.fromtimestamp(0, utc)
EPOCH_NAIVE = EPOCH_AWARE.replace(tzinfo=None)
_DATETIME_ERROR_SUGGESTION = (
"(Consider Using CodecOptions(datetime_conversion=DATETIME_AUTO)"
" or MongoClient(datetime_conversion='DATETIME_AUTO'))."
" See: https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/dates-and-times/#handling-out-of-range-datetimes"
)
class DatetimeMS:
"""Represents a BSON UTC datetime."""
__slots__ = ("_value",)
def __init__(self, value: Union[int, datetime.datetime]):
"""Represents a BSON UTC datetime.
BSON UTC datetimes are defined as an int64 of milliseconds since the
Unix epoch. The principal use of DatetimeMS is to represent
datetimes outside the range of the Python builtin
:class:`~datetime.datetime` class when
encoding/decoding BSON.
To decode UTC datetimes as a ``DatetimeMS``, `datetime_conversion` in
:class:`~bson.codec_options.CodecOptions` must be set to 'datetime_ms' or
'datetime_auto'. See `handling out of range datetimes <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/dates-and-times/#handling-out-of-range-datetimes>`_ for
details.
:param value: An instance of :class:`datetime.datetime` to be
represented as milliseconds since the Unix epoch, or int of
milliseconds since the Unix epoch.
"""
if isinstance(value, int):
if not (-(2**63) <= value <= 2**63 - 1):
raise OverflowError("Must be a 64-bit integer of milliseconds")
self._value = value
elif isinstance(value, datetime.datetime):
self._value = _datetime_to_millis(value)
else:
raise TypeError(f"{type(value)} is not a valid type for DatetimeMS")
def __hash__(self) -> int:
return hash(self._value)
def __repr__(self) -> str:
return type(self).__name__ + "(" + str(self._value) + ")"
def __lt__(self, other: Union[DatetimeMS, int]) -> bool:
return self._value < other
def __le__(self, other: Union[DatetimeMS, int]) -> bool:
return self._value <= other
def __eq__(self, other: Any) -> bool:
if isinstance(other, DatetimeMS):
return self._value == other._value
return False
def __ne__(self, other: Any) -> bool:
if isinstance(other, DatetimeMS):
return self._value != other._value
return True
def __gt__(self, other: Union[DatetimeMS, int]) -> bool:
return self._value > other
def __ge__(self, other: Union[DatetimeMS, int]) -> bool:
return self._value >= other
_type_marker = 9
def as_datetime(
self, codec_options: CodecOptions[Any] = DEFAULT_CODEC_OPTIONS
) -> datetime.datetime:
"""Create a Python :class:`~datetime.datetime` from this DatetimeMS object.
:param codec_options: A CodecOptions instance for specifying how the
resulting DatetimeMS object will be formatted using ``tz_aware``
and ``tz_info``. Defaults to
:const:`~bson.codec_options.DEFAULT_CODEC_OPTIONS`.
"""
return cast(datetime.datetime, _millis_to_datetime(self._value, codec_options))
def __int__(self) -> int:
return self._value
def _datetime_to_millis(dtm: datetime.datetime) -> int:
"""Convert datetime to milliseconds since epoch UTC."""
if dtm.utcoffset() is not None:
dtm = dtm - dtm.utcoffset() # type: ignore
return int(calendar.timegm(dtm.timetuple()) * 1000 + dtm.microsecond // 1000)
_MIN_UTC = datetime.datetime.min.replace(tzinfo=utc)
_MAX_UTC = datetime.datetime.max.replace(tzinfo=utc)
_MIN_UTC_MS = _datetime_to_millis(_MIN_UTC)
_MAX_UTC_MS = _datetime_to_millis(_MAX_UTC)
# Inclusive min and max for timezones.
def _min_datetime_ms(tz: datetime.tzinfo = utc) -> int:
delta = tz.utcoffset(_MIN_UTC)
if delta is not None:
offset_millis = (delta.days * 86400 + delta.seconds) * 1000 + delta.microseconds // 1000
else:
offset_millis = 0
return max(_MIN_UTC_MS, _MIN_UTC_MS - offset_millis)
def _max_datetime_ms(tz: datetime.tzinfo = utc) -> int:
delta = tz.utcoffset(_MAX_UTC)
if delta is not None:
offset_millis = (delta.days * 86400 + delta.seconds) * 1000 + delta.microseconds // 1000
else:
offset_millis = 0
return min(_MAX_UTC_MS, _MAX_UTC_MS - offset_millis)
def _millis_to_datetime(
millis: int, opts: CodecOptions[Any]
) -> Union[datetime.datetime, DatetimeMS]:
"""Convert milliseconds since epoch UTC to datetime."""
if (
opts.datetime_conversion == DatetimeConversion.DATETIME
or opts.datetime_conversion == DatetimeConversion.DATETIME_CLAMP
or opts.datetime_conversion == DatetimeConversion.DATETIME_AUTO
):
tz = opts.tzinfo or utc
if opts.datetime_conversion == DatetimeConversion.DATETIME_CLAMP:
millis = max(_min_datetime_ms(tz), min(millis, _max_datetime_ms(tz)))
elif opts.datetime_conversion == DatetimeConversion.DATETIME_AUTO:
if not (_min_datetime_ms(tz) <= millis <= _max_datetime_ms(tz)):
return DatetimeMS(millis)
diff = ((millis % 1000) + 1000) % 1000
seconds = (millis - diff) // 1000
micros = diff * 1000
try:
if opts.tz_aware:
dt = EPOCH_AWARE + datetime.timedelta(seconds=seconds, microseconds=micros)
if opts.tzinfo:
dt = dt.astimezone(tz)
return dt
else:
return EPOCH_NAIVE + datetime.timedelta(seconds=seconds, microseconds=micros)
except ArithmeticError as err:
raise InvalidBSON(f"{err} {_DATETIME_ERROR_SUGGESTION}") from err
elif opts.datetime_conversion == DatetimeConversion.DATETIME_MS:
return DatetimeMS(millis)
else:
raise ValueError("datetime_conversion must be an element of DatetimeConversion")

View File

@@ -0,0 +1,133 @@
# Copyright 2009-2015 MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for manipulating DBRefs (references to MongoDB documents)."""
from __future__ import annotations
from copy import deepcopy
from typing import Any, Mapping, Optional
from bson._helpers import _getstate_slots, _setstate_slots
from bson.son import SON
class DBRef:
"""A reference to a document stored in MongoDB."""
__slots__ = "__collection", "__id", "__database", "__kwargs"
__getstate__ = _getstate_slots
__setstate__ = _setstate_slots
# DBRef isn't actually a BSON "type" so this number was arbitrarily chosen.
_type_marker = 100
def __init__(
self,
collection: str,
id: Any,
database: Optional[str] = None,
_extra: Optional[Mapping[str, Any]] = None,
**kwargs: Any,
) -> None:
"""Initialize a new :class:`DBRef`.
Raises :class:`TypeError` if `collection` or `database` is not
an instance of :class:`str`. `database` is optional and allows
references to documents to work across databases. Any additional
keyword arguments will create additional fields in the resultant
embedded document.
:param collection: name of the collection the document is stored in
:param id: the value of the document's ``"_id"`` field
:param database: name of the database to reference
:param kwargs: additional keyword arguments will
create additional, custom fields
.. seealso:: The MongoDB documentation on `dbrefs <https://dochub.mongodb.org/core/dbrefs>`_.
"""
if not isinstance(collection, str):
raise TypeError(f"collection must be an instance of str, not {type(collection)}")
if database is not None and not isinstance(database, str):
raise TypeError(f"database must be an instance of str, not {type(database)}")
self.__collection = collection
self.__id = id
self.__database = database
kwargs.update(_extra or {})
self.__kwargs = kwargs
@property
def collection(self) -> str:
"""Get the name of this DBRef's collection."""
return self.__collection
@property
def id(self) -> Any:
"""Get this DBRef's _id."""
return self.__id
@property
def database(self) -> Optional[str]:
"""Get the name of this DBRef's database.
Returns None if this DBRef doesn't specify a database.
"""
return self.__database
def __getattr__(self, key: Any) -> Any:
try:
return self.__kwargs[key]
except KeyError:
raise AttributeError(key) from None
def as_doc(self) -> SON[str, Any]:
"""Get the SON document representation of this DBRef.
Generally not needed by application developers
"""
doc = SON([("$ref", self.collection), ("$id", self.id)])
if self.database is not None:
doc["$db"] = self.database
doc.update(self.__kwargs)
return doc
def __repr__(self) -> str:
extra = "".join([f", {k}={v!r}" for k, v in self.__kwargs.items()])
if self.database is None:
return f"DBRef({self.collection!r}, {self.id!r}{extra})"
return f"DBRef({self.collection!r}, {self.id!r}, {self.database!r}{extra})"
def __eq__(self, other: Any) -> bool:
if isinstance(other, DBRef):
us = (self.__database, self.__collection, self.__id, self.__kwargs)
them = (other.__database, other.__collection, other.__id, other.__kwargs)
return us == them
return NotImplemented
def __ne__(self, other: Any) -> bool:
return not self == other
def __hash__(self) -> int:
"""Get a hash value for this :class:`DBRef`."""
return hash(
(self.__collection, self.__id, self.__database, tuple(sorted(self.__kwargs.items())))
)
def __deepcopy__(self, memo: Any) -> DBRef:
"""Support function for `copy.deepcopy()`."""
return DBRef(
deepcopy(self.__collection, memo),
deepcopy(self.__id, memo),
deepcopy(self.__database, memo),
deepcopy(self.__kwargs, memo),
)

View File

@@ -0,0 +1,351 @@
# Copyright 2016-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for working with the BSON decimal128 type.
.. versionadded:: 3.4
"""
from __future__ import annotations
import decimal
import struct
from decimal import Decimal
from typing import Any, Sequence, Tuple, Type, Union
from bson.codec_options import TypeDecoder, TypeEncoder
_PACK_64 = struct.Struct("<Q").pack
_UNPACK_64 = struct.Struct("<Q").unpack
_EXPONENT_MASK = 3 << 61
_EXPONENT_BIAS = 6176
_EXPONENT_MAX = 6144
_EXPONENT_MIN = -6143
_MAX_DIGITS = 34
_INF = 0x7800000000000000
_NAN = 0x7C00000000000000
_SNAN = 0x7E00000000000000
_SIGN = 0x8000000000000000
_NINF = (_INF + _SIGN, 0)
_PINF = (_INF, 0)
_NNAN = (_NAN + _SIGN, 0)
_PNAN = (_NAN, 0)
_NSNAN = (_SNAN + _SIGN, 0)
_PSNAN = (_SNAN, 0)
_CTX_OPTIONS = {
"prec": _MAX_DIGITS,
"rounding": decimal.ROUND_HALF_EVEN,
"Emin": _EXPONENT_MIN,
"Emax": _EXPONENT_MAX,
"capitals": 1,
"flags": [],
"traps": [decimal.InvalidOperation, decimal.Overflow, decimal.Inexact],
"clamp": 1,
}
_DEC128_CTX = decimal.Context(**_CTX_OPTIONS.copy()) # type: ignore
_VALUE_OPTIONS = Union[decimal.Decimal, float, str, Tuple[int, Sequence[int], int]]
class DecimalEncoder(TypeEncoder):
"""Converts Python :class:`decimal.Decimal` to BSON :class:`Decimal128`.
For example::
opts = CodecOptions(type_registry=TypeRegistry([DecimalEncoder()]))
bson.encode({"d": decimal.Decimal('1.0')}, codec_options=opts)
.. versionadded:: 4.15
"""
@property
def python_type(self) -> Type[Decimal]:
return Decimal
def transform_python(self, value: Any) -> Decimal128:
return Decimal128(value)
class DecimalDecoder(TypeDecoder):
"""Converts BSON :class:`Decimal128` to Python :class:`decimal.Decimal`.
For example::
opts = CodecOptions(type_registry=TypeRegistry([DecimalDecoder()]))
bson.decode(data, codec_options=opts)
.. versionadded:: 4.15
"""
@property
def bson_type(self) -> Type[Decimal128]:
return Decimal128
def transform_bson(self, value: Any) -> decimal.Decimal:
return value.to_decimal()
def create_decimal128_context() -> decimal.Context:
"""Returns an instance of :class:`decimal.Context` appropriate
for working with IEEE-754 128-bit decimal floating point values.
"""
opts = _CTX_OPTIONS.copy()
opts["traps"] = []
return decimal.Context(**opts) # type: ignore
def _decimal_to_128(value: _VALUE_OPTIONS) -> Tuple[int, int]:
"""Converts a decimal.Decimal to BID (high bits, low bits).
:param value: An instance of decimal.Decimal
"""
with decimal.localcontext(_DEC128_CTX) as ctx:
value = ctx.create_decimal(value)
if value.is_infinite():
return _NINF if value.is_signed() else _PINF
sign, digits, exponent = value.as_tuple()
if value.is_nan():
if digits:
raise ValueError("NaN with debug payload is not supported")
if value.is_snan():
return _NSNAN if value.is_signed() else _PSNAN
return _NNAN if value.is_signed() else _PNAN
significand = int("".join([str(digit) for digit in digits]))
bit_length = significand.bit_length()
high = 0
low = 0
for i in range(min(64, bit_length)):
if significand & (1 << i):
low |= 1 << i
for i in range(64, bit_length):
if significand & (1 << i):
high |= 1 << (i - 64)
biased_exponent = exponent + _EXPONENT_BIAS # type: ignore[operator]
if high >> 49 == 1:
high = high & 0x7FFFFFFFFFFF
high |= _EXPONENT_MASK
high |= (biased_exponent & 0x3FFF) << 47
else:
high |= biased_exponent << 49
if sign:
high |= _SIGN
return high, low
class Decimal128:
"""BSON Decimal128 type::
>>> Decimal128(Decimal("0.0005"))
Decimal128('0.0005')
>>> Decimal128("0.0005")
Decimal128('0.0005')
>>> Decimal128((3474527112516337664, 5))
Decimal128('0.0005')
:param value: An instance of :class:`decimal.Decimal`, string, or tuple of
(high bits, low bits) from Binary Integer Decimal (BID) format.
.. note:: :class:`~Decimal128` uses an instance of :class:`decimal.Context`
configured for IEEE-754 Decimal128 when validating parameters.
Signals like :class:`decimal.InvalidOperation`, :class:`decimal.Inexact`,
and :class:`decimal.Overflow` are trapped and raised as exceptions::
>>> Decimal128(".13.1")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
...
decimal.InvalidOperation: [<class 'decimal.ConversionSyntax'>]
>>>
>>> Decimal128("1E-6177")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
...
decimal.Inexact: [<class 'decimal.Inexact'>]
>>>
>>> Decimal128("1E6145")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
...
decimal.Overflow: [<class 'decimal.Overflow'>, <class 'decimal.Rounded'>]
To ensure the result of a calculation can always be stored as BSON
Decimal128 use the context returned by
:func:`create_decimal128_context`::
>>> import decimal
>>> decimal128_ctx = create_decimal128_context()
>>> with decimal.localcontext(decimal128_ctx) as ctx:
... Decimal128(ctx.create_decimal(".13.3"))
...
Decimal128('NaN')
>>>
>>> with decimal.localcontext(decimal128_ctx) as ctx:
... Decimal128(ctx.create_decimal("1E-6177"))
...
Decimal128('0E-6176')
>>>
>>> with decimal.localcontext(DECIMAL128_CTX) as ctx:
... Decimal128(ctx.create_decimal("1E6145"))
...
Decimal128('Infinity')
To match the behavior of MongoDB's Decimal128 implementation
str(Decimal(value)) may not match str(Decimal128(value)) for NaN values::
>>> Decimal128(Decimal('NaN'))
Decimal128('NaN')
>>> Decimal128(Decimal('-NaN'))
Decimal128('NaN')
>>> Decimal128(Decimal('sNaN'))
Decimal128('NaN')
>>> Decimal128(Decimal('-sNaN'))
Decimal128('NaN')
However, :meth:`~Decimal128.to_decimal` will return the exact value::
>>> Decimal128(Decimal('NaN')).to_decimal()
Decimal('NaN')
>>> Decimal128(Decimal('-NaN')).to_decimal()
Decimal('-NaN')
>>> Decimal128(Decimal('sNaN')).to_decimal()
Decimal('sNaN')
>>> Decimal128(Decimal('-sNaN')).to_decimal()
Decimal('-sNaN')
Two instances of :class:`Decimal128` compare equal if their Binary
Integer Decimal encodings are equal::
>>> Decimal128('NaN') == Decimal128('NaN')
True
>>> Decimal128('NaN').bid == Decimal128('NaN').bid
True
This differs from :class:`decimal.Decimal` comparisons for NaN::
>>> Decimal('NaN') == Decimal('NaN')
False
"""
__slots__ = ("__high", "__low")
_type_marker = 19
def __init__(self, value: _VALUE_OPTIONS) -> None:
if isinstance(value, (str, decimal.Decimal)):
self.__high, self.__low = _decimal_to_128(value)
elif isinstance(value, (list, tuple)):
if len(value) != 2:
raise ValueError(
"Invalid size for creation of Decimal128 "
"from list or tuple. Must have exactly 2 "
"elements."
)
self.__high, self.__low = value
else:
raise TypeError(f"Cannot convert {value!r} to Decimal128")
def to_decimal(self) -> decimal.Decimal:
"""Returns an instance of :class:`decimal.Decimal` for this
:class:`Decimal128`.
"""
high = self.__high
low = self.__low
sign = 1 if (high & _SIGN) else 0
if (high & _SNAN) == _SNAN:
return decimal.Decimal((sign, (), "N")) # type: ignore
elif (high & _NAN) == _NAN:
return decimal.Decimal((sign, (), "n")) # type: ignore
elif (high & _INF) == _INF:
return decimal.Decimal((sign, (), "F")) # type: ignore
if (high & _EXPONENT_MASK) == _EXPONENT_MASK:
exponent = ((high & 0x1FFFE00000000000) >> 47) - _EXPONENT_BIAS
return decimal.Decimal((sign, (0,), exponent))
else:
exponent = ((high & 0x7FFF800000000000) >> 49) - _EXPONENT_BIAS
arr = bytearray(15)
mask = 0x00000000000000FF
for i in range(14, 6, -1):
arr[i] = (low & mask) >> ((14 - i) << 3)
mask = mask << 8
mask = 0x00000000000000FF
for i in range(6, 0, -1):
arr[i] = (high & mask) >> ((6 - i) << 3)
mask = mask << 8
mask = 0x0001000000000000
arr[0] = (high & mask) >> 48
# cdecimal only accepts a tuple for digits.
digits = tuple(int(digit) for digit in str(int.from_bytes(arr, "big")))
with decimal.localcontext(_DEC128_CTX) as ctx:
return ctx.create_decimal((sign, digits, exponent))
@classmethod
def from_bid(cls: Type[Decimal128], value: bytes) -> Decimal128:
"""Create an instance of :class:`Decimal128` from Binary Integer
Decimal string.
:param value: 16 byte string (128-bit IEEE 754-2008 decimal floating
point in Binary Integer Decimal (BID) format).
"""
if not isinstance(value, bytes):
raise TypeError(f"value must be an instance of bytes, not {type(value)}")
if len(value) != 16:
raise ValueError("value must be exactly 16 bytes")
return cls((_UNPACK_64(value[8:])[0], _UNPACK_64(value[:8])[0])) # type: ignore
@property
def bid(self) -> bytes:
"""The Binary Integer Decimal (BID) encoding of this instance."""
return _PACK_64(self.__low) + _PACK_64(self.__high)
def __str__(self) -> str:
dec = self.to_decimal()
if dec.is_nan():
# Required by the drivers spec to match MongoDB behavior.
return "NaN"
return str(dec)
def __repr__(self) -> str:
return f"Decimal128('{self!s}')"
def __setstate__(self, value: Tuple[int, int]) -> None:
self.__high, self.__low = value
def __getstate__(self) -> Tuple[int, int]:
return self.__high, self.__low
def __eq__(self, other: Any) -> bool:
if isinstance(other, Decimal128):
return self.bid == other.bid
return NotImplemented
def __ne__(self, other: Any) -> bool:
return not self == other

View File

@@ -0,0 +1,49 @@
# Copyright 2009-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Exceptions raised by the BSON package."""
from __future__ import annotations
from typing import Any, Optional
class BSONError(Exception):
"""Base class for all BSON exceptions."""
class InvalidBSON(BSONError):
"""Raised when trying to create a BSON object from invalid data."""
class InvalidStringData(BSONError):
"""Raised when trying to encode a string containing non-UTF8 data."""
class InvalidDocument(BSONError):
"""Raised when trying to create a BSON object from an invalid document."""
def __init__(self, message: str, document: Optional[Any] = None) -> None:
super().__init__(message)
self._document = document
@property
def document(self) -> Any:
"""The invalid document that caused the error.
..versionadded:: 4.16"""
return self._document
class InvalidId(BSONError):
"""Raised when trying to create an ObjectId from invalid data."""

View File

@@ -0,0 +1,39 @@
# Copyright 2014-2015 MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A BSON wrapper for long (int in python3)"""
from __future__ import annotations
from typing import Any
class Int64(int):
"""Representation of the BSON int64 type.
This is necessary because every integral number is an :class:`int` in
Python 3. Small integral numbers are encoded to BSON int32 by default,
but Int64 numbers will always be encoded to BSON int64.
:param value: the numeric value to represent
"""
__slots__ = ()
_type_marker = 18
def __getstate__(self) -> Any:
return {}
def __setstate__(self, state: Any) -> None:
pass

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,56 @@
# Copyright 2010-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Representation for the MongoDB internal MaxKey type."""
from __future__ import annotations
from typing import Any
class MaxKey:
"""MongoDB internal MaxKey type."""
__slots__ = ()
_type_marker = 127
def __getstate__(self) -> Any:
return {}
def __setstate__(self, state: Any) -> None:
pass
def __eq__(self, other: Any) -> bool:
return isinstance(other, MaxKey)
def __hash__(self) -> int:
return hash(self._type_marker)
def __ne__(self, other: Any) -> bool:
return not self == other
def __le__(self, other: Any) -> bool:
return isinstance(other, MaxKey)
def __lt__(self, dummy: Any) -> bool:
return False
def __ge__(self, dummy: Any) -> bool:
return True
def __gt__(self, other: Any) -> bool:
return not isinstance(other, MaxKey)
def __repr__(self) -> str:
return "MaxKey()"

View File

@@ -0,0 +1,56 @@
# Copyright 2010-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Representation for the MongoDB internal MinKey type."""
from __future__ import annotations
from typing import Any
class MinKey:
"""MongoDB internal MinKey type."""
__slots__ = ()
_type_marker = 255
def __getstate__(self) -> Any:
return {}
def __setstate__(self, state: Any) -> None:
pass
def __eq__(self, other: Any) -> bool:
return isinstance(other, MinKey)
def __hash__(self) -> int:
return hash(self._type_marker)
def __ne__(self, other: Any) -> bool:
return not self == other
def __le__(self, dummy: Any) -> bool:
return True
def __lt__(self, other: Any) -> bool:
return not isinstance(other, MinKey)
def __ge__(self, other: Any) -> bool:
return isinstance(other, MinKey)
def __gt__(self, dummy: Any) -> bool:
return False
def __repr__(self) -> str:
return "MinKey()"

View File

@@ -0,0 +1,258 @@
# Copyright 2009-2015 MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for working with MongoDB ObjectIds."""
from __future__ import annotations
import datetime
import os
import struct
import threading
import time
from random import SystemRandom
from typing import Any, NoReturn, Optional, Type, Union
from bson.datetime_ms import _datetime_to_millis
from bson.errors import InvalidId
from bson.tz_util import utc
_MAX_COUNTER_VALUE = 0xFFFFFF
_PACK_INT = struct.Struct(">I").pack
_PACK_INT_RANDOM = struct.Struct(">I5s").pack
_UNPACK_INT = struct.Struct(">I").unpack
def _raise_invalid_id(oid: str) -> NoReturn:
raise InvalidId(
"%r is not a valid ObjectId, it must be a 12-byte input"
" or a 24-character hex string" % oid
)
def _random_bytes() -> bytes:
"""Get the 5-byte random field of an ObjectId."""
return os.urandom(5)
class ObjectId:
"""A MongoDB ObjectId."""
_pid = os.getpid()
_inc = SystemRandom().randint(0, _MAX_COUNTER_VALUE)
_inc_lock = threading.Lock()
__random = _random_bytes()
__slots__ = ("__id",)
_type_marker = 7
def __init__(self, oid: Optional[Union[str, ObjectId, bytes]] = None) -> None:
"""Initialize a new ObjectId.
An ObjectId is a 12-byte unique identifier consisting of:
- a 4-byte value representing the seconds since the Unix epoch,
- a 5-byte random value,
- a 3-byte counter, starting with a random value.
By default, ``ObjectId()`` creates a new unique identifier. The
optional parameter `oid` can be an :class:`ObjectId`, or any 12
:class:`bytes`.
For example, the 12 bytes b'foo-bar-quux' do not follow the ObjectId
specification but they are acceptable input::
>>> ObjectId(b'foo-bar-quux')
ObjectId('666f6f2d6261722d71757578')
`oid` can also be a :class:`str` of 24 hex digits::
>>> ObjectId('0123456789ab0123456789ab')
ObjectId('0123456789ab0123456789ab')
Raises :class:`~bson.errors.InvalidId` if `oid` is not 12 bytes nor
24 hex digits, or :class:`TypeError` if `oid` is not an accepted type.
:param oid: a valid ObjectId.
.. seealso:: The MongoDB documentation on `ObjectIds <http://dochub.mongodb.org/core/objectids>`_.
.. versionchanged:: 3.8
:class:`~bson.objectid.ObjectId` now implements the `ObjectID
specification version 0.2
<https://github.com/mongodb/specifications/blob/master/source/
objectid.rst>`_.
"""
if oid is None:
# Generate a new value for this ObjectId.
with ObjectId._inc_lock:
inc = ObjectId._inc
ObjectId._inc = (inc + 1) % (_MAX_COUNTER_VALUE + 1)
# 4 bytes current time, 5 bytes random, 3 bytes inc.
self.__id = _PACK_INT_RANDOM(int(time.time()), ObjectId._random()) + _PACK_INT(inc)[1:4]
elif isinstance(oid, bytes) and len(oid) == 12:
self.__id = oid
elif isinstance(oid, str):
if len(oid) == 24:
try:
self.__id = bytes.fromhex(oid)
except (TypeError, ValueError):
_raise_invalid_id(oid)
else:
_raise_invalid_id(oid)
elif isinstance(oid, ObjectId):
self.__id = oid.binary
else:
raise TypeError(f"id must be an instance of (bytes, str, ObjectId), not {type(oid)}")
@classmethod
def from_datetime(cls: Type[ObjectId], generation_time: datetime.datetime) -> ObjectId:
"""Create a dummy ObjectId instance with a specific generation time.
This method is useful for doing range queries on a field
containing :class:`ObjectId` instances.
.. warning::
It is not safe to insert a document containing an ObjectId
generated using this method. This method deliberately
eliminates the uniqueness guarantee that ObjectIds
generally provide. ObjectIds generated with this method
should be used exclusively in queries.
`generation_time` will be converted to UTC. Naive datetime
instances will be treated as though they already contain UTC.
An example using this helper to get documents where ``"_id"``
was generated before January 1, 2010 would be:
>>> gen_time = datetime.datetime(2010, 1, 1)
>>> dummy_id = ObjectId.from_datetime(gen_time)
>>> result = collection.find({"_id": {"$lt": dummy_id}})
:param generation_time: :class:`~datetime.datetime` to be used
as the generation time for the resulting ObjectId.
"""
oid = (
_PACK_INT(_datetime_to_millis(generation_time) // 1000)
+ b"\x00\x00\x00\x00\x00\x00\x00\x00"
)
return cls(oid)
@classmethod
def is_valid(cls: Type[ObjectId], oid: Any) -> bool:
"""Checks if a `oid` string is valid or not.
:param oid: the object id to validate
.. versionadded:: 2.3
"""
if not oid:
return False
try:
ObjectId(oid)
return True
except (InvalidId, TypeError):
return False
@classmethod
def _random(cls) -> bytes:
"""Generate a 5-byte random number once per process."""
pid = os.getpid()
if pid != cls._pid:
cls._pid = pid
cls.__random = _random_bytes()
return cls.__random
@property
def binary(self) -> bytes:
"""12-byte binary representation of this ObjectId."""
return self.__id
@property
def generation_time(self) -> datetime.datetime:
"""A :class:`datetime.datetime` instance representing the time of
generation for this :class:`ObjectId`.
The :class:`datetime.datetime` is timezone aware, and
represents the generation time in UTC. It is precise to the
second.
"""
timestamp = _UNPACK_INT(self.__id[0:4])[0]
return datetime.datetime.fromtimestamp(timestamp, utc)
def __getstate__(self) -> bytes:
"""Return value of object for pickling.
needed explicitly because __slots__() defined.
"""
return self.__id
def __setstate__(self, value: Any) -> None:
"""Explicit state set from pickling"""
# Provide backwards compatibility with OIDs
# pickled with pymongo-1.9 or older.
if isinstance(value, dict):
oid = value["_ObjectId__id"]
else:
oid = value
# ObjectIds pickled in python 2.x used `str` for __id.
# In python 3.x this has to be converted to `bytes`
# by encoding latin-1.
if isinstance(oid, str):
self.__id = oid.encode("latin-1")
else:
self.__id = oid
def __str__(self) -> str:
return self.__id.hex()
def __repr__(self) -> str:
return f"ObjectId('{self!s}')"
def __eq__(self, other: Any) -> bool:
if isinstance(other, ObjectId):
return self.__id == other.binary
return NotImplemented
def __ne__(self, other: Any) -> bool:
if isinstance(other, ObjectId):
return self.__id != other.binary
return NotImplemented
def __lt__(self, other: Any) -> bool:
if isinstance(other, ObjectId):
return self.__id < other.binary
return NotImplemented
def __le__(self, other: Any) -> bool:
if isinstance(other, ObjectId):
return self.__id <= other.binary
return NotImplemented
def __gt__(self, other: Any) -> bool:
if isinstance(other, ObjectId):
return self.__id > other.binary
return NotImplemented
def __ge__(self, other: Any) -> bool:
if isinstance(other, ObjectId):
return self.__id >= other.binary
return NotImplemented
def __hash__(self) -> int:
"""Get a hash value for this :class:`ObjectId`."""
return hash(self.__id)

View File

@@ -0,0 +1,2 @@
# PEP-561 Support File.
# "Package maintainers who wish to support type checking of their code MUST add a marker file named py.typed to their package supporting typing".

View File

@@ -0,0 +1,200 @@
# Copyright 2015-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for representing raw BSON documents.
Inserting and Retrieving RawBSONDocuments
=========================================
Example: Moving a document between different databases/collections
.. doctest::
>>> import bson
>>> from pymongo import MongoClient
>>> from bson.raw_bson import RawBSONDocument
>>> client = MongoClient(document_class=RawBSONDocument)
>>> client.drop_database("db")
>>> client.drop_database("replica_db")
>>> db = client.db
>>> result = db.test.insert_many(
... [{"_id": 1, "a": 1}, {"_id": 2, "b": 1}, {"_id": 3, "c": 1}, {"_id": 4, "d": 1}]
... )
>>> replica_db = client.replica_db
>>> for doc in db.test.find():
... print(f"raw document: {doc.raw}")
... print(f"decoded document: {bson.decode(doc.raw)}")
... result = replica_db.test.insert_one(doc)
...
raw document: b'...'
decoded document: {'_id': 1, 'a': 1}
raw document: b'...'
decoded document: {'_id': 2, 'b': 1}
raw document: b'...'
decoded document: {'_id': 3, 'c': 1}
raw document: b'...'
decoded document: {'_id': 4, 'd': 1}
For use cases like moving documents across different databases or writing binary
blobs to disk, using raw BSON documents provides better speed and avoids the
overhead of decoding or encoding BSON.
"""
from __future__ import annotations
from typing import Any, ItemsView, Iterator, Mapping, Optional
from bson import _get_object_size, _raw_to_dict
from bson.codec_options import _RAW_BSON_DOCUMENT_MARKER, CodecOptions
from bson.codec_options import DEFAULT_CODEC_OPTIONS as DEFAULT
def _inflate_bson(
bson_bytes: bytes | memoryview,
codec_options: CodecOptions[RawBSONDocument],
raw_array: bool = False,
) -> dict[str, Any]:
"""Inflates the top level fields of a BSON document.
:param bson_bytes: the BSON bytes that compose this document
:param codec_options: An instance of
:class:`~bson.codec_options.CodecOptions` whose ``document_class``
must be :class:`RawBSONDocument`.
"""
return _raw_to_dict(bson_bytes, 4, len(bson_bytes) - 1, codec_options, {}, raw_array=raw_array)
class RawBSONDocument(Mapping[str, Any]):
"""Representation for a MongoDB document that provides access to the raw
BSON bytes that compose it.
Only when a field is accessed or modified within the document does
RawBSONDocument decode its bytes.
"""
__slots__ = ("__raw", "__inflated_doc", "__codec_options")
_type_marker = _RAW_BSON_DOCUMENT_MARKER
__codec_options: CodecOptions[RawBSONDocument]
def __init__(
self,
bson_bytes: bytes | memoryview,
codec_options: Optional[CodecOptions[RawBSONDocument]] = None,
) -> None:
"""Create a new :class:`RawBSONDocument`
:class:`RawBSONDocument` is a representation of a BSON document that
provides access to the underlying raw BSON bytes. Only when a field is
accessed or modified within the document does RawBSONDocument decode
its bytes.
:class:`RawBSONDocument` implements the ``Mapping`` abstract base
class from the standard library so it can be used like a read-only
``dict``::
>>> from bson import encode
>>> raw_doc = RawBSONDocument(encode({'_id': 'my_doc'}))
>>> raw_doc.raw
b'...'
>>> raw_doc['_id']
'my_doc'
:param bson_bytes: the BSON bytes that compose this document
:param codec_options: An instance of
:class:`~bson.codec_options.CodecOptions` whose ``document_class``
must be :class:`RawBSONDocument`. The default is
:attr:`DEFAULT_RAW_BSON_OPTIONS`.
.. versionchanged:: 3.8
:class:`RawBSONDocument` now validates that the ``bson_bytes``
passed in represent a single bson document.
.. versionchanged:: 3.5
If a :class:`~bson.codec_options.CodecOptions` is passed in, its
`document_class` must be :class:`RawBSONDocument`.
"""
self.__raw = bson_bytes
self.__inflated_doc: Optional[Mapping[str, Any]] = None
# Can't default codec_options to DEFAULT_RAW_BSON_OPTIONS in signature,
# it refers to this class RawBSONDocument.
if codec_options is None:
codec_options = DEFAULT_RAW_BSON_OPTIONS
elif not issubclass(codec_options.document_class, RawBSONDocument):
raise TypeError(
"RawBSONDocument cannot use CodecOptions with document "
f"class {codec_options.document_class}"
)
self.__codec_options = codec_options
# Validate the bson object size.
_get_object_size(bson_bytes, 0, len(bson_bytes))
@property
def raw(self) -> bytes | memoryview:
"""The raw BSON bytes composing this document."""
return self.__raw
def items(self) -> ItemsView[str, Any]:
"""Lazily decode and iterate elements in this document."""
return self.__inflated.items()
@property
def __inflated(self) -> Mapping[str, Any]:
if self.__inflated_doc is None:
# We already validated the object's size when this document was
# created, so no need to do that again.
self.__inflated_doc = self._inflate_bson(self.__raw, self.__codec_options)
return self.__inflated_doc
@staticmethod
def _inflate_bson(
bson_bytes: bytes | memoryview, codec_options: CodecOptions[RawBSONDocument]
) -> Mapping[str, Any]:
return _inflate_bson(bson_bytes, codec_options)
def __getitem__(self, item: str) -> Any:
return self.__inflated[item]
def __iter__(self) -> Iterator[str]:
return iter(self.__inflated)
def __len__(self) -> int:
return len(self.__inflated)
def __eq__(self, other: Any) -> bool:
if isinstance(other, RawBSONDocument):
return self.__raw == other.raw
return NotImplemented
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.raw!r}, codec_options={self.__codec_options!r})"
class _RawArrayBSONDocument(RawBSONDocument):
"""A RawBSONDocument that only expands sub-documents and arrays when accessed."""
@staticmethod
def _inflate_bson(
bson_bytes: bytes | memoryview, codec_options: CodecOptions[RawBSONDocument]
) -> Mapping[str, Any]:
return _inflate_bson(bson_bytes, codec_options, raw_array=True)
DEFAULT_RAW_BSON_OPTIONS: CodecOptions[RawBSONDocument] = DEFAULT.with_options(
document_class=RawBSONDocument
)
_RAW_ARRAY_BSON_OPTIONS: CodecOptions[_RawArrayBSONDocument] = DEFAULT.with_options(
document_class=_RawArrayBSONDocument
)
"""The default :class:`~bson.codec_options.CodecOptions` for
:class:`RawBSONDocument`.
"""

View File

@@ -0,0 +1,133 @@
# Copyright 2013-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for representing MongoDB regular expressions."""
from __future__ import annotations
import re
from typing import Any, Generic, Pattern, Type, TypeVar, Union
from bson._helpers import _getstate_slots, _setstate_slots
from bson.son import RE_TYPE
def str_flags_to_int(str_flags: str) -> int:
flags = 0
if "i" in str_flags:
flags |= re.IGNORECASE
if "l" in str_flags:
flags |= re.LOCALE
if "m" in str_flags:
flags |= re.MULTILINE
if "s" in str_flags:
flags |= re.DOTALL
if "u" in str_flags:
flags |= re.UNICODE
if "x" in str_flags:
flags |= re.VERBOSE
return flags
_T = TypeVar("_T", str, bytes)
class Regex(Generic[_T]):
"""BSON regular expression data."""
__slots__ = ("pattern", "flags")
__getstate__ = _getstate_slots
__setstate__ = _setstate_slots
_type_marker = 11
@classmethod
def from_native(cls: Type[Regex[Any]], regex: Pattern[_T]) -> Regex[_T]:
"""Convert a Python regular expression into a ``Regex`` instance.
Note that in Python 3, a regular expression compiled from a
:class:`str` has the ``re.UNICODE`` flag set. If it is undesirable
to store this flag in a BSON regular expression, unset it first::
>>> pattern = re.compile('.*')
>>> regex = Regex.from_native(pattern)
>>> regex.flags ^= re.UNICODE
>>> db.collection.insert_one({'pattern': regex})
:param regex: A regular expression object from ``re.compile()``.
.. warning::
Python regular expressions use a different syntax and different
set of flags than MongoDB, which uses `PCRE`_. A regular
expression retrieved from the server may not compile in
Python, or may match a different set of strings in Python than
when used in a MongoDB query.
.. _PCRE: http://www.pcre.org/
"""
if not isinstance(regex, RE_TYPE):
raise TypeError("regex must be a compiled regular expression, not %s" % type(regex))
return Regex(regex.pattern, regex.flags)
def __init__(self, pattern: _T, flags: Union[str, int] = 0) -> None:
"""BSON regular expression data.
This class is useful to store and retrieve regular expressions that are
incompatible with Python's regular expression dialect.
:param pattern: string
:param flags: an integer bitmask, or a string of flag
characters like "im" for IGNORECASE and MULTILINE
"""
if not isinstance(pattern, (str, bytes)):
raise TypeError("pattern must be a string, not %s" % type(pattern))
self.pattern: _T = pattern
if isinstance(flags, str):
self.flags = str_flags_to_int(flags)
elif isinstance(flags, int):
self.flags = flags
else:
raise TypeError("flags must be a string or int, not %s" % type(flags))
def __eq__(self, other: Any) -> bool:
if isinstance(other, Regex):
return self.pattern == other.pattern and self.flags == other.flags
else:
return NotImplemented
__hash__ = None # type: ignore
def __ne__(self, other: Any) -> bool:
return not self == other
def __repr__(self) -> str:
return f"Regex({self.pattern!r}, {self.flags!r})"
def try_compile(self) -> Pattern[_T]:
"""Compile this :class:`Regex` as a Python regular expression.
.. warning::
Python regular expressions use a different syntax and different
set of flags than MongoDB, which uses `PCRE`_. A regular
expression retrieved from the server may not compile in
Python, or may match a different set of strings in Python than
when used in a MongoDB query. :meth:`try_compile()` may raise
:exc:`re.error`.
.. _PCRE: http://www.pcre.org/
"""
return re.compile(self.pattern, self.flags)

View File

@@ -0,0 +1,211 @@
# Copyright 2009-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for creating and manipulating SON, the Serialized Ocument Notation.
Regular dictionaries can be used instead of SON objects, but not when the order
of keys is important. A SON object can be used just like a normal Python
dictionary.
"""
from __future__ import annotations
import copy
import re
from collections.abc import Mapping as _Mapping
from typing import (
Any,
Dict,
Iterable,
Iterator,
Mapping,
Optional,
Pattern,
Tuple,
Type,
TypeVar,
Union,
cast,
)
# This sort of sucks, but seems to be as good as it gets...
# This is essentially the same as re._pattern_type
RE_TYPE: Type[Pattern[Any]] = type(re.compile(""))
_Key = TypeVar("_Key")
_Value = TypeVar("_Value")
_T = TypeVar("_T")
class SON(Dict[_Key, _Value]):
"""SON data.
A subclass of dict that maintains ordering of keys and provides a
few extra niceties for dealing with SON. SON provides an API
similar to collections.OrderedDict.
"""
__keys: list[Any]
def __init__(
self,
data: Optional[Union[Mapping[_Key, _Value], Iterable[Tuple[_Key, _Value]]]] = None,
**kwargs: Any,
) -> None:
self.__keys = []
dict.__init__(self)
self.update(data)
self.update(kwargs)
def __new__(cls: Type[SON[_Key, _Value]], *args: Any, **kwargs: Any) -> SON[_Key, _Value]:
instance = super().__new__(cls, *args, **kwargs)
instance.__keys = []
return instance
def __repr__(self) -> str:
result = []
for key in self.__keys:
result.append(f"({key!r}, {self[key]!r})")
return "SON([%s])" % ", ".join(result)
def __setitem__(self, key: _Key, value: _Value) -> None:
if key not in self.__keys:
self.__keys.append(key)
dict.__setitem__(self, key, value)
def __delitem__(self, key: _Key) -> None:
self.__keys.remove(key)
dict.__delitem__(self, key)
def copy(self) -> SON[_Key, _Value]:
other: SON[_Key, _Value] = SON()
other.update(self)
return other
# TODO this is all from UserDict.DictMixin. it could probably be made more
# efficient.
# second level definitions support higher levels
def __iter__(self) -> Iterator[_Key]:
yield from self.__keys
def has_key(self, key: _Key) -> bool:
return key in self.__keys
def iterkeys(self) -> Iterator[_Key]:
return self.__iter__()
# fourth level uses definitions from lower levels
def itervalues(self) -> Iterator[_Value]:
for _, v in self.items():
yield v
def values(self) -> list[_Value]: # type: ignore[override]
return [v for _, v in self.items()]
def clear(self) -> None:
self.__keys = []
super().clear()
def setdefault(self, key: _Key, default: _Value) -> _Value:
try:
return self[key]
except KeyError:
self[key] = default
return default
def pop(self, key: _Key, *args: Union[_Value, _T]) -> Union[_Value, _T]:
if len(args) > 1:
raise TypeError("pop expected at most 2 arguments, got " + repr(1 + len(args)))
try:
value = self[key]
except KeyError:
if args:
return args[0]
raise
del self[key]
return value
def popitem(self) -> Tuple[_Key, _Value]:
try:
k, v = next(iter(self.items()))
except StopIteration:
raise KeyError("container is empty") from None
del self[k]
return (k, v)
def update(self, other: Optional[Any] = None, **kwargs: _Value) -> None:
# Make progressively weaker assumptions about "other"
if other is None:
pass
elif hasattr(other, "items"):
for k, v in other.items():
self[k] = v
elif hasattr(other, "keys"):
for k in other:
self[k] = other[k]
else:
for k, v in other:
self[k] = v
if kwargs:
self.update(kwargs)
def get( # type: ignore[override]
self, key: _Key, default: Optional[Union[_Value, _T]] = None
) -> Union[_Value, _T, None]:
try:
return self[key]
except KeyError:
return default
def __eq__(self, other: Any) -> bool:
"""Comparison to another SON is order-sensitive while comparison to a
regular dictionary is order-insensitive.
"""
if isinstance(other, SON):
return len(self) == len(other) and list(self.items()) == list(other.items())
return cast(bool, self.to_dict() == other)
def __ne__(self, other: Any) -> bool:
return not self == other
def __len__(self) -> int:
return len(self.__keys)
def to_dict(self) -> dict[_Key, _Value]:
"""Convert a SON document to a normal Python dictionary instance.
This is trickier than just *dict(...)* because it needs to be
recursive.
"""
def transform_value(value: Any) -> Any:
if isinstance(value, list):
return [transform_value(v) for v in value]
elif isinstance(value, _Mapping):
return {k: transform_value(v) for k, v in value.items()}
else:
return value
return cast("dict[_Key, _Value]", transform_value(dict(self)))
def __deepcopy__(self, memo: dict[int, SON[_Key, _Value]]) -> SON[_Key, _Value]:
out: SON[_Key, _Value] = SON()
val_id = id(self)
if val_id in memo:
return memo[val_id]
memo[val_id] = out
for k, v in self.items():
if not isinstance(v, RE_TYPE):
v = copy.deepcopy(v, memo) # noqa: PLW2901
out[k] = v
return out

View File

@@ -0,0 +1,781 @@
/*
Copyright (c) 2007-2010 Michael G Schwern
This software originally derived from Paul Sheer's pivotal_gmtime_r.c.
The MIT License:
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Programmers who have available to them 64-bit time values as a 'long
long' type can use cbson_localtime64_r() and cbson_gmtime64_r() which correctly
converts the time even on 32-bit systems. Whether you have 64-bit time
values will depend on the operating system.
cbson_localtime64_r() is a 64-bit equivalent of localtime_r().
cbson_gmtime64_r() is a 64-bit equivalent of gmtime_r().
*/
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
/* Including Python.h fixes issues with interpreters built with -std=c99. */
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include <time.h>
#include "time64.h"
#include "time64_limits.h"
/* Spec says except for stftime() and the _r() functions, these
all return static memory. Stabbings! */
static struct TM Static_Return_Date;
static const int days_in_month[2][12] = {
{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
{31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
};
static const int julian_days_by_month[2][12] = {
{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334},
{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335},
};
static const int length_of_year[2] = { 365, 366 };
/* Some numbers relating to the gregorian cycle */
static const Year years_in_gregorian_cycle = 400;
#define days_in_gregorian_cycle ((365 * 400) + 100 - 4 + 1)
static const Time64_T seconds_in_gregorian_cycle = days_in_gregorian_cycle * 60LL * 60LL * 24LL;
/* Year range we can trust the time functions with */
#define MAX_SAFE_YEAR 2037
#define MIN_SAFE_YEAR 1971
/* 28 year Julian calendar cycle */
#define SOLAR_CYCLE_LENGTH 28
/* Year cycle from MAX_SAFE_YEAR down. */
static const int safe_years_high[SOLAR_CYCLE_LENGTH] = {
2016, 2017, 2018, 2019,
2020, 2021, 2022, 2023,
2024, 2025, 2026, 2027,
2028, 2029, 2030, 2031,
2032, 2033, 2034, 2035,
2036, 2037, 2010, 2011,
2012, 2013, 2014, 2015
};
/* Year cycle from MIN_SAFE_YEAR up */
static const int safe_years_low[SOLAR_CYCLE_LENGTH] = {
1996, 1997, 1998, 1971,
1972, 1973, 1974, 1975,
1976, 1977, 1978, 1979,
1980, 1981, 1982, 1983,
1984, 1985, 1986, 1987,
1988, 1989, 1990, 1991,
1992, 1993, 1994, 1995,
};
/* Let's assume people are going to be looking for dates in the future.
Let's provide some cheats so you can skip ahead.
This has a 4x speed boost when near 2008.
*/
/* Number of days since epoch on Jan 1st, 2008 GMT */
#define CHEAT_DAYS (1199145600 / 24 / 60 / 60)
#define CHEAT_YEARS 108
#define IS_LEAP(n) ((!(((n) + 1900) % 400) || (!(((n) + 1900) % 4) && (((n) + 1900) % 100))) != 0)
#define _TIME64_WRAP(a,b,m) ((a) = ((a) < 0 ) ? ((b)--, (a) + (m)) : (a))
#ifdef USE_SYSTEM_LOCALTIME
# define SHOULD_USE_SYSTEM_LOCALTIME(a) ( \
(a) <= SYSTEM_LOCALTIME_MAX && \
(a) >= SYSTEM_LOCALTIME_MIN \
)
#else
# define SHOULD_USE_SYSTEM_LOCALTIME(a) (0)
#endif
#ifdef USE_SYSTEM_GMTIME
# define SHOULD_USE_SYSTEM_GMTIME(a) ( \
(a) <= SYSTEM_GMTIME_MAX && \
(a) >= SYSTEM_GMTIME_MIN \
)
#else
# define SHOULD_USE_SYSTEM_GMTIME(a) (0)
#endif
/* Multi varadic macros are a C99 thing, alas */
#ifdef TIME_64_DEBUG
# define TIME64_TRACE(format) (fprintf(stderr, format))
# define TIME64_TRACE1(format, var1) (fprintf(stderr, format, var1))
# define TIME64_TRACE2(format, var1, var2) (fprintf(stderr, format, var1, var2))
# define TIME64_TRACE3(format, var1, var2, var3) (fprintf(stderr, format, var1, var2, var3))
#else
# define TIME64_TRACE(format) ((void)0)
# define TIME64_TRACE1(format, var1) ((void)0)
# define TIME64_TRACE2(format, var1, var2) ((void)0)
# define TIME64_TRACE3(format, var1, var2, var3) ((void)0)
#endif
static int is_exception_century(Year year)
{
int is_exception = ((year % 100 == 0) && !(year % 400 == 0));
TIME64_TRACE1("# is_exception_century: %s\n", is_exception ? "yes" : "no");
return(is_exception);
}
/* Compare two dates.
The result is like cmp.
Ignores things like gmtoffset and dst
*/
int cbson_cmp_date( const struct TM* left, const struct tm* right ) {
if( left->tm_year > right->tm_year )
return 1;
else if( left->tm_year < right->tm_year )
return -1;
if( left->tm_mon > right->tm_mon )
return 1;
else if( left->tm_mon < right->tm_mon )
return -1;
if( left->tm_mday > right->tm_mday )
return 1;
else if( left->tm_mday < right->tm_mday )
return -1;
if( left->tm_hour > right->tm_hour )
return 1;
else if( left->tm_hour < right->tm_hour )
return -1;
if( left->tm_min > right->tm_min )
return 1;
else if( left->tm_min < right->tm_min )
return -1;
if( left->tm_sec > right->tm_sec )
return 1;
else if( left->tm_sec < right->tm_sec )
return -1;
return 0;
}
/* Check if a date is safely inside a range.
The intention is to check if its a few days inside.
*/
int cbson_date_in_safe_range( const struct TM* date, const struct tm* min, const struct tm* max ) {
if( cbson_cmp_date(date, min) == -1 )
return 0;
if( cbson_cmp_date(date, max) == 1 )
return 0;
return 1;
}
/* timegm() is not in the C or POSIX spec, but it is such a useful
extension I would be remiss in leaving it out. Also I need it
for cbson_localtime64()
*/
Time64_T cbson_timegm64(const struct TM *date) {
Time64_T days = 0;
Time64_T seconds = 0;
Year year;
Year orig_year = (Year)date->tm_year;
int cycles = 0;
if( orig_year > 100 ) {
cycles = (int)((orig_year - 100) / 400);
orig_year -= cycles * 400;
days += (Time64_T)cycles * days_in_gregorian_cycle;
}
else if( orig_year < -300 ) {
cycles = (int)((orig_year - 100) / 400);
orig_year -= cycles * 400;
days += (Time64_T)cycles * days_in_gregorian_cycle;
}
TIME64_TRACE3("# timegm/ cycles: %d, days: %lld, orig_year: %lld\n", cycles, days, orig_year);
if( orig_year > 70 ) {
year = 70;
while( year < orig_year ) {
days += length_of_year[IS_LEAP(year)];
year++;
}
}
else if ( orig_year < 70 ) {
year = 69;
do {
days -= length_of_year[IS_LEAP(year)];
year--;
} while( year >= orig_year );
}
days += julian_days_by_month[IS_LEAP(orig_year)][date->tm_mon];
days += date->tm_mday - 1;
seconds = days * 60 * 60 * 24;
seconds += date->tm_hour * 60 * 60;
seconds += date->tm_min * 60;
seconds += date->tm_sec;
return(seconds);
}
#ifndef NDEBUG
static int check_tm(struct TM *tm)
{
/* Don't forget leap seconds */
assert(tm->tm_sec >= 0);
assert(tm->tm_sec <= 61);
assert(tm->tm_min >= 0);
assert(tm->tm_min <= 59);
assert(tm->tm_hour >= 0);
assert(tm->tm_hour <= 23);
assert(tm->tm_mday >= 1);
assert(tm->tm_mday <= days_in_month[IS_LEAP(tm->tm_year)][tm->tm_mon]);
assert(tm->tm_mon >= 0);
assert(tm->tm_mon <= 11);
assert(tm->tm_wday >= 0);
assert(tm->tm_wday <= 6);
assert(tm->tm_yday >= 0);
assert(tm->tm_yday <= length_of_year[IS_LEAP(tm->tm_year)]);
#ifdef HAS_TM_TM_GMTOFF
assert(tm->tm_gmtoff >= -24 * 60 * 60);
assert(tm->tm_gmtoff <= 24 * 60 * 60);
#endif
return 1;
}
#endif
/* The exceptional centuries without leap years cause the cycle to
shift by 16
*/
static Year cycle_offset(Year year)
{
const Year start_year = 2000;
Year year_diff = year - start_year;
Year exceptions;
if( year > start_year )
year_diff--;
exceptions = year_diff / 100;
exceptions -= year_diff / 400;
TIME64_TRACE3("# year: %lld, exceptions: %lld, year_diff: %lld\n",
year, exceptions, year_diff);
return exceptions * 16;
}
/* For a given year after 2038, pick the latest possible matching
year in the 28 year calendar cycle.
A matching year...
1) Starts on the same day of the week.
2) Has the same leap year status.
This is so the calendars match up.
Also the previous year must match. When doing Jan 1st you might
wind up on Dec 31st the previous year when doing a -UTC time zone.
Finally, the next year must have the same start day of week. This
is for Dec 31st with a +UTC time zone.
It doesn't need the same leap year status since we only care about
January 1st.
*/
static int safe_year(const Year year)
{
int safe_year = 0;
Year year_cycle;
if( year >= MIN_SAFE_YEAR && year <= MAX_SAFE_YEAR ) {
return (int)year;
}
year_cycle = year + cycle_offset(year);
/* safe_years_low is off from safe_years_high by 8 years */
if( year < MIN_SAFE_YEAR )
year_cycle -= 8;
/* Change non-leap xx00 years to an equivalent */
if( is_exception_century(year) )
year_cycle += 11;
/* Also xx01 years, since the previous year will be wrong */
if( is_exception_century(year - 1) )
year_cycle += 17;
year_cycle %= SOLAR_CYCLE_LENGTH;
if( year_cycle < 0 )
year_cycle = SOLAR_CYCLE_LENGTH + year_cycle;
assert( year_cycle >= 0 );
assert( year_cycle < SOLAR_CYCLE_LENGTH );
if( year < MIN_SAFE_YEAR )
safe_year = safe_years_low[year_cycle];
else if( year > MAX_SAFE_YEAR )
safe_year = safe_years_high[year_cycle];
else
assert(0);
TIME64_TRACE3("# year: %lld, year_cycle: %lld, safe_year: %d\n",
year, year_cycle, safe_year);
assert(safe_year <= MAX_SAFE_YEAR && safe_year >= MIN_SAFE_YEAR);
return safe_year;
}
void pymongo_copy_tm_to_TM64(const struct tm *src, struct TM *dest) {
if( src == NULL ) {
memset(dest, 0, sizeof(*dest));
}
else {
# ifdef USE_TM64
dest->tm_sec = src->tm_sec;
dest->tm_min = src->tm_min;
dest->tm_hour = src->tm_hour;
dest->tm_mday = src->tm_mday;
dest->tm_mon = src->tm_mon;
dest->tm_year = (Year)src->tm_year;
dest->tm_wday = src->tm_wday;
dest->tm_yday = src->tm_yday;
dest->tm_isdst = src->tm_isdst;
# ifdef HAS_TM_TM_GMTOFF
dest->tm_gmtoff = src->tm_gmtoff;
# endif
# ifdef HAS_TM_TM_ZONE
dest->tm_zone = src->tm_zone;
# endif
# else
/* They're the same type */
memcpy(dest, src, sizeof(*dest));
# endif
}
}
void cbson_copy_TM64_to_tm(const struct TM *src, struct tm *dest) {
if( src == NULL ) {
memset(dest, 0, sizeof(*dest));
}
else {
# ifdef USE_TM64
dest->tm_sec = src->tm_sec;
dest->tm_min = src->tm_min;
dest->tm_hour = src->tm_hour;
dest->tm_mday = src->tm_mday;
dest->tm_mon = src->tm_mon;
dest->tm_year = (int)src->tm_year;
dest->tm_wday = src->tm_wday;
dest->tm_yday = src->tm_yday;
dest->tm_isdst = src->tm_isdst;
# ifdef HAS_TM_TM_GMTOFF
dest->tm_gmtoff = src->tm_gmtoff;
# endif
# ifdef HAS_TM_TM_ZONE
dest->tm_zone = src->tm_zone;
# endif
# else
/* They're the same type */
memcpy(dest, src, sizeof(*dest));
# endif
}
}
/* Simulate localtime_r() to the best of our ability */
struct tm * cbson_fake_localtime_r(const time_t *time, struct tm *result) {
const struct tm *static_result = localtime(time);
assert(result != NULL);
if( static_result == NULL ) {
memset(result, 0, sizeof(*result));
return NULL;
}
else {
memcpy(result, static_result, sizeof(*result));
return result;
}
}
/* Simulate gmtime_r() to the best of our ability */
struct tm * cbson_fake_gmtime_r(const time_t *time, struct tm *result) {
const struct tm *static_result = gmtime(time);
assert(result != NULL);
if( static_result == NULL ) {
memset(result, 0, sizeof(*result));
return NULL;
}
else {
memcpy(result, static_result, sizeof(*result));
return result;
}
}
static Time64_T seconds_between_years(Year left_year, Year right_year) {
int increment = (left_year > right_year) ? 1 : -1;
Time64_T seconds = 0;
int cycles;
if( left_year > 2400 ) {
cycles = (int)((left_year - 2400) / 400);
left_year -= cycles * 400;
seconds += cycles * seconds_in_gregorian_cycle;
}
else if( left_year < 1600 ) {
cycles = (int)((left_year - 1600) / 400);
left_year += cycles * 400;
seconds += cycles * seconds_in_gregorian_cycle;
}
while( left_year != right_year ) {
seconds += length_of_year[IS_LEAP(right_year - 1900)] * 60 * 60 * 24;
right_year += increment;
}
return seconds * increment;
}
Time64_T cbson_mktime64(const struct TM *input_date) {
struct tm safe_date;
struct TM date;
Time64_T time;
Year year = input_date->tm_year + 1900;
if( cbson_date_in_safe_range(input_date, &SYSTEM_MKTIME_MIN, &SYSTEM_MKTIME_MAX) )
{
cbson_copy_TM64_to_tm(input_date, &safe_date);
return (Time64_T)mktime(&safe_date);
}
/* Have to make the year safe in date else it won't fit in safe_date */
date = *input_date;
date.tm_year = safe_year(year) - 1900;
cbson_copy_TM64_to_tm(&date, &safe_date);
time = (Time64_T)mktime(&safe_date);
time += seconds_between_years(year, (Year)(safe_date.tm_year + 1900));
return time;
}
/* Because I think mktime() is a crappy name */
Time64_T timelocal64(const struct TM *date) {
return cbson_mktime64(date);
}
struct TM *cbson_gmtime64_r (const Time64_T *in_time, struct TM *p)
{
int v_tm_sec, v_tm_min, v_tm_hour, v_tm_mon, v_tm_wday;
Time64_T v_tm_tday;
int leap;
Time64_T m;
Time64_T time = *in_time;
Year year = 70;
int cycles = 0;
assert(p != NULL);
#ifdef USE_SYSTEM_GMTIME
/* Use the system gmtime() if time_t is small enough */
if( SHOULD_USE_SYSTEM_GMTIME(*in_time) ) {
time_t safe_time = (time_t)*in_time;
struct tm safe_date;
GMTIME_R(&safe_time, &safe_date);
pymongo_copy_tm_to_TM64(&safe_date, p);
assert(check_tm(p));
return p;
}
#endif
#ifdef HAS_TM_TM_GMTOFF
p->tm_gmtoff = 0;
#endif
p->tm_isdst = 0;
#ifdef HAS_TM_TM_ZONE
p->tm_zone = "UTC";
#endif
v_tm_sec = (int)(time % 60);
time /= 60;
v_tm_min = (int)(time % 60);
time /= 60;
v_tm_hour = (int)(time % 24);
time /= 24;
v_tm_tday = time;
_TIME64_WRAP (v_tm_sec, v_tm_min, 60);
_TIME64_WRAP (v_tm_min, v_tm_hour, 60);
_TIME64_WRAP (v_tm_hour, v_tm_tday, 24);
v_tm_wday = (int)((v_tm_tday + 4) % 7);
if (v_tm_wday < 0)
v_tm_wday += 7;
m = v_tm_tday;
if (m >= CHEAT_DAYS) {
year = CHEAT_YEARS;
m -= CHEAT_DAYS;
}
if (m >= 0) {
/* Gregorian cycles, this is huge optimization for distant times */
cycles = (int)(m / (Time64_T) days_in_gregorian_cycle);
if( cycles ) {
m -= (cycles * (Time64_T) days_in_gregorian_cycle);
year += (cycles * years_in_gregorian_cycle);
}
/* Years */
leap = IS_LEAP (year);
while (m >= (Time64_T) length_of_year[leap]) {
m -= (Time64_T) length_of_year[leap];
year++;
leap = IS_LEAP (year);
}
/* Months */
v_tm_mon = 0;
while (m >= (Time64_T) days_in_month[leap][v_tm_mon]) {
m -= (Time64_T) days_in_month[leap][v_tm_mon];
v_tm_mon++;
}
} else {
year--;
/* Gregorian cycles */
cycles = (int)((m / (Time64_T) days_in_gregorian_cycle) + 1);
if( cycles ) {
m -= (cycles * (Time64_T) days_in_gregorian_cycle);
year += (cycles * years_in_gregorian_cycle);
}
/* Years */
leap = IS_LEAP (year);
while (m < (Time64_T) -length_of_year[leap]) {
m += (Time64_T) length_of_year[leap];
year--;
leap = IS_LEAP (year);
}
/* Months */
v_tm_mon = 11;
while (m < (Time64_T) -days_in_month[leap][v_tm_mon]) {
m += (Time64_T) days_in_month[leap][v_tm_mon];
v_tm_mon--;
}
m += (Time64_T) days_in_month[leap][v_tm_mon];
}
p->tm_year = (int)year;
if( p->tm_year != year ) {
#ifdef EOVERFLOW
errno = EOVERFLOW;
#endif
return NULL;
}
/* At this point m is less than a year so casting to an int is safe */
p->tm_mday = (int) m + 1;
p->tm_yday = julian_days_by_month[leap][v_tm_mon] + (int)m;
p->tm_sec = v_tm_sec;
p->tm_min = v_tm_min;
p->tm_hour = v_tm_hour;
p->tm_mon = v_tm_mon;
p->tm_wday = v_tm_wday;
assert(check_tm(p));
return p;
}
struct TM *cbson_localtime64_r (const Time64_T *time, struct TM *local_tm)
{
time_t safe_time;
struct tm safe_date;
struct TM gm_tm;
Year orig_year;
int month_diff;
assert(local_tm != NULL);
#ifdef USE_SYSTEM_LOCALTIME
/* Use the system localtime() if time_t is small enough */
if( SHOULD_USE_SYSTEM_LOCALTIME(*time) ) {
safe_time = (time_t)*time;
TIME64_TRACE1("Using system localtime for %lld\n", *time);
LOCALTIME_R(&safe_time, &safe_date);
pymongo_copy_tm_to_TM64(&safe_date, local_tm);
assert(check_tm(local_tm));
return local_tm;
}
#endif
if( cbson_gmtime64_r(time, &gm_tm) == NULL ) {
TIME64_TRACE1("cbson_gmtime64_r returned null for %lld\n", *time);
return NULL;
}
orig_year = gm_tm.tm_year;
if (gm_tm.tm_year > (2037 - 1900) ||
gm_tm.tm_year < (1970 - 1900)
)
{
TIME64_TRACE1("Mapping tm_year %lld to safe_year\n", (Year)gm_tm.tm_year);
gm_tm.tm_year = safe_year((Year)(gm_tm.tm_year + 1900)) - 1900;
}
safe_time = (time_t)cbson_timegm64(&gm_tm);
if( LOCALTIME_R(&safe_time, &safe_date) == NULL ) {
TIME64_TRACE1("localtime_r(%d) returned NULL\n", (int)safe_time);
return NULL;
}
pymongo_copy_tm_to_TM64(&safe_date, local_tm);
local_tm->tm_year = (int)orig_year;
if( local_tm->tm_year != orig_year ) {
TIME64_TRACE2("tm_year overflow: tm_year %lld, orig_year %lld\n",
(Year)local_tm->tm_year, (Year)orig_year);
#ifdef EOVERFLOW
errno = EOVERFLOW;
#endif
return NULL;
}
month_diff = local_tm->tm_mon - gm_tm.tm_mon;
/* When localtime is Dec 31st previous year and
gmtime is Jan 1st next year.
*/
if( month_diff == 11 ) {
local_tm->tm_year--;
}
/* When localtime is Jan 1st, next year and
gmtime is Dec 31st, previous year.
*/
if( month_diff == -11 ) {
local_tm->tm_year++;
}
/* GMT is Jan 1st, xx01 year, but localtime is still Dec 31st
in a non-leap xx00. There is one point in the cycle
we can't account for which the safe xx00 year is a leap
year. So we need to correct for Dec 31st coming out as
the 366th day of the year.
*/
if( !IS_LEAP(local_tm->tm_year) && local_tm->tm_yday == 365 )
local_tm->tm_yday--;
assert(check_tm(local_tm));
return local_tm;
}
int cbson_valid_tm_wday( const struct TM* date ) {
if( 0 <= date->tm_wday && date->tm_wday <= 6 )
return 1;
else
return 0;
}
int cbson_valid_tm_mon( const struct TM* date ) {
if( 0 <= date->tm_mon && date->tm_mon <= 11 )
return 1;
else
return 0;
}
/* Non-thread safe versions of the above */
struct TM *cbson_localtime64(const Time64_T *time) {
#ifdef _MSC_VER
_tzset();
#else
tzset();
#endif
return cbson_localtime64_r(time, &Static_Return_Date);
}
struct TM *cbson_gmtime64(const Time64_T *time) {
return cbson_gmtime64_r(time, &Static_Return_Date);
}

View File

@@ -0,0 +1,67 @@
#ifndef TIME64_H
# define TIME64_H
#include <time.h>
#include "time64_config.h"
/* Set our custom types */
typedef INT_64_T Int64;
typedef Int64 Time64_T;
typedef Int64 Year;
/* A copy of the tm struct but with a 64 bit year */
struct TM64 {
int tm_sec;
int tm_min;
int tm_hour;
int tm_mday;
int tm_mon;
Year tm_year;
int tm_wday;
int tm_yday;
int tm_isdst;
#ifdef HAS_TM_TM_GMTOFF
long tm_gmtoff;
#endif
#ifdef HAS_TM_TM_ZONE
char *tm_zone;
#endif
};
/* Decide which tm struct to use */
#ifdef USE_TM64
#define TM TM64
#else
#define TM tm
#endif
/* Declare public functions */
struct TM *cbson_gmtime64_r (const Time64_T *, struct TM *);
struct TM *cbson_localtime64_r (const Time64_T *, struct TM *);
struct TM *cbson_gmtime64 (const Time64_T *);
struct TM *cbson_localtime64 (const Time64_T *);
Time64_T cbson_timegm64 (const struct TM *);
Time64_T cbson_mktime64 (const struct TM *);
Time64_T timelocal64 (const struct TM *);
/* Not everyone has gm/localtime_r(), provide a replacement */
#ifdef HAS_LOCALTIME_R
# define LOCALTIME_R(clock, result) localtime_r(clock, result)
#else
# define LOCALTIME_R(clock, result) cbson_fake_localtime_r(clock, result)
#endif
#ifdef HAS_GMTIME_R
# define GMTIME_R(clock, result) gmtime_r(clock, result)
#else
# define GMTIME_R(clock, result) cbson_fake_gmtime_r(clock, result)
#endif
#endif

View File

@@ -0,0 +1,78 @@
/* Configuration
-------------
Define as appropriate for your system.
Sensible defaults provided.
*/
#ifndef TIME64_CONFIG_H
# define TIME64_CONFIG_H
/* Debugging
TIME_64_DEBUG
Define if you want debugging messages
*/
/* #define TIME_64_DEBUG */
/* INT_64_T
A 64 bit integer type to use to store time and others.
Must be defined.
*/
#define INT_64_T long long
/* USE_TM64
Should we use a 64 bit safe replacement for tm? This will
let you go past year 2 billion but the struct will be incompatible
with tm. Conversion functions will be provided.
*/
/* #define USE_TM64 */
/* Availability of system functions.
HAS_GMTIME_R
Define if your system has gmtime_r()
HAS_LOCALTIME_R
Define if your system has localtime_r()
HAS_TIMEGM
Define if your system has timegm(), a GNU extension.
*/
#if !defined(WIN32) && !defined(_MSC_VER)
#define HAS_GMTIME_R
#define HAS_LOCALTIME_R
#endif
/* #define HAS_TIMEGM */
/* Details of non-standard tm struct elements.
HAS_TM_TM_GMTOFF
True if your tm struct has a "tm_gmtoff" element.
A BSD extension.
HAS_TM_TM_ZONE
True if your tm struct has a "tm_zone" element.
A BSD extension.
*/
/* #define HAS_TM_TM_GMTOFF */
/* #define HAS_TM_TM_ZONE */
/* USE_SYSTEM_LOCALTIME
USE_SYSTEM_GMTIME
USE_SYSTEM_MKTIME
USE_SYSTEM_TIMEGM
Should we use the system functions if the time is inside their range?
Your system localtime() is probably more accurate, but our gmtime() is
fast and safe.
*/
#define USE_SYSTEM_LOCALTIME
/* #define USE_SYSTEM_GMTIME */
#define USE_SYSTEM_MKTIME
/* #define USE_SYSTEM_TIMEGM */
#endif /* TIME64_CONFIG_H */

View File

@@ -0,0 +1,95 @@
/*
Maximum and minimum inputs your system's respective time functions
can correctly handle. time64.h will use your system functions if
the input falls inside these ranges and corresponding USE_SYSTEM_*
constant is defined.
*/
#ifndef TIME64_LIMITS_H
#define TIME64_LIMITS_H
/* Max/min for localtime() */
#define SYSTEM_LOCALTIME_MAX 2147483647
#define SYSTEM_LOCALTIME_MIN -2147483647-1
/* Max/min for gmtime() */
#define SYSTEM_GMTIME_MAX 2147483647
#define SYSTEM_GMTIME_MIN -2147483647-1
/* Max/min for mktime() */
static const struct tm SYSTEM_MKTIME_MAX = {
7,
14,
19,
18,
0,
138,
1,
17,
0
#ifdef HAS_TM_TM_GMTOFF
,-28800
#endif
#ifdef HAS_TM_TM_ZONE
,"PST"
#endif
};
static const struct tm SYSTEM_MKTIME_MIN = {
52,
45,
12,
13,
11,
1,
5,
346,
0
#ifdef HAS_TM_TM_GMTOFF
,-28800
#endif
#ifdef HAS_TM_TM_ZONE
,"PST"
#endif
};
/* Max/min for timegm() */
#ifdef HAS_TIMEGM
static const struct tm SYSTEM_TIMEGM_MAX = {
7,
14,
3,
19,
0,
138,
2,
18,
0
#ifdef HAS_TM_TM_GMTOFF
,0
#endif
#ifdef HAS_TM_TM_ZONE
,"UTC"
#endif
};
static const struct tm SYSTEM_TIMEGM_MIN = {
52,
45,
20,
13,
11,
1,
5,
346,
0
#ifdef HAS_TM_TM_GMTOFF
,0
#endif
#ifdef HAS_TM_TM_ZONE
,"UTC"
#endif
};
#endif /* HAS_TIMEGM */
#endif /* TIME64_LIMITS_H */

View File

@@ -0,0 +1,123 @@
# Copyright 2010-2015 MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for representing MongoDB internal Timestamps."""
from __future__ import annotations
import calendar
import datetime
from typing import Any, Union
from bson._helpers import _getstate_slots, _setstate_slots
from bson.tz_util import utc
UPPERBOUND = 4294967296
class Timestamp:
"""MongoDB internal timestamps used in the opLog."""
__slots__ = ("__time", "__inc")
__getstate__ = _getstate_slots
__setstate__ = _setstate_slots
_type_marker = 17
def __init__(self, time: Union[datetime.datetime, int], inc: int) -> None:
"""Create a new :class:`Timestamp`.
This class is only for use with the MongoDB opLog. If you need
to store a regular timestamp, please use a
:class:`~datetime.datetime`.
Raises :class:`TypeError` if `time` is not an instance of
:class: `int` or :class:`~datetime.datetime`, or `inc` is not
an instance of :class:`int`. Raises :class:`ValueError` if
`time` or `inc` is not in [0, 2**32).
:param time: time in seconds since epoch UTC, or a naive UTC
:class:`~datetime.datetime`, or an aware
:class:`~datetime.datetime`
:param inc: the incrementing counter
"""
if isinstance(time, datetime.datetime):
offset = time.utcoffset()
if offset is not None:
time = time - offset
time = int(calendar.timegm(time.timetuple()))
if not isinstance(time, int):
raise TypeError(f"time must be an instance of int, not {type(time)}")
if not isinstance(inc, int):
raise TypeError(f"inc must be an instance of int, not {type(inc)}")
if not 0 <= time < UPPERBOUND:
raise ValueError("time must be contained in [0, 2**32)")
if not 0 <= inc < UPPERBOUND:
raise ValueError("inc must be contained in [0, 2**32)")
self.__time = time
self.__inc = inc
@property
def time(self) -> int:
"""Get the time portion of this :class:`Timestamp`."""
return self.__time
@property
def inc(self) -> int:
"""Get the inc portion of this :class:`Timestamp`."""
return self.__inc
def __eq__(self, other: Any) -> bool:
if isinstance(other, Timestamp):
return self.__time == other.time and self.__inc == other.inc
else:
return NotImplemented
def __hash__(self) -> int:
return hash(self.time) ^ hash(self.inc)
def __ne__(self, other: Any) -> bool:
return not self == other
def __lt__(self, other: Any) -> bool:
if isinstance(other, Timestamp):
return (self.time, self.inc) < (other.time, other.inc)
return NotImplemented
def __le__(self, other: Any) -> bool:
if isinstance(other, Timestamp):
return (self.time, self.inc) <= (other.time, other.inc)
return NotImplemented
def __gt__(self, other: Any) -> bool:
if isinstance(other, Timestamp):
return (self.time, self.inc) > (other.time, other.inc)
return NotImplemented
def __ge__(self, other: Any) -> bool:
if isinstance(other, Timestamp):
return (self.time, self.inc) >= (other.time, other.inc)
return NotImplemented
def __repr__(self) -> str:
return f"Timestamp({self.__time}, {self.__inc})"
def as_datetime(self) -> datetime.datetime:
"""Return a :class:`~datetime.datetime` instance corresponding
to the time portion of this :class:`Timestamp`.
The returned datetime's timezone is UTC.
"""
return datetime.datetime.fromtimestamp(self.__time, utc)

View File

@@ -0,0 +1,31 @@
# Copyright 2023-Present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Type aliases used by bson"""
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Mapping, MutableMapping, TypeVar, Union
if TYPE_CHECKING:
from array import array
from mmap import mmap
from bson.raw_bson import RawBSONDocument
# Common Shared Types.
_DocumentOut = Union[MutableMapping[str, Any], "RawBSONDocument"]
_DocumentType = TypeVar("_DocumentType", bound=Mapping[str, Any])
_DocumentTypeArg = TypeVar("_DocumentTypeArg", bound=Mapping[str, Any])
_ReadableBuffer = Union[bytes, memoryview, bytearray, "mmap", "array"] # type: ignore[type-arg]

View File

@@ -0,0 +1,56 @@
# Copyright 2010-2015 MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Timezone related utilities for BSON."""
from __future__ import annotations
from datetime import datetime, timedelta, tzinfo
from typing import Optional, Tuple, Union
ZERO: timedelta = timedelta(0)
class FixedOffset(tzinfo):
"""Fixed offset timezone, in minutes east from UTC.
Implementation based from the Python `standard library documentation
<http://docs.python.org/library/datetime.html#tzinfo-objects>`_.
Defining __getinitargs__ enables pickling / copying.
"""
def __init__(self, offset: Union[float, timedelta], name: str) -> None:
if isinstance(offset, timedelta):
self.__offset = offset
else:
self.__offset = timedelta(minutes=offset)
self.__name = name
def __getinitargs__(self) -> Tuple[timedelta, str]:
return self.__offset, self.__name
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.__offset!r}, {self.__name!r})"
def utcoffset(self, dt: Optional[datetime]) -> timedelta:
return self.__offset
def tzname(self, dt: Optional[datetime]) -> str:
return self.__name
def dst(self, dt: Optional[datetime]) -> timedelta:
return ZERO
utc: FixedOffset = FixedOffset(0, "UTC")
"""Fixed offset timezone representing UTC."""