Skip to content

API Reference: CrossContract

BaseContract

BaseContract

Bases: BaseMetaData

The BaseContract class is the most basic representation of a data contract. It combines the minimum required metadata with the contract structure given by Schema.

It serves as the foundational blueprint for defining data contracts. Any custom contract implementation MUST inherit from this class to ensure structural consistency and compatibility with the system.

Attributes:

Name Type Description
name str

A unique identifier for the data contract. Must contain only alphanumeric characters, underscores, or hyphens. Maximum length is 100 characters.

tableschema TableSchema

The schema defining the structure of the contract (fields, primary keys, foreign keys, field descriptors).

Example

To implement a custom contract with additional metadata:

from pydantic import Field
from crosscontract.contracts import BaseContract

class MyCustomContract(BaseContract):
    # Add custom metadata fields
    owner: str = Field(description="The owner of this dataset")
    version: str = Field(description="Semantic version of the contract")

    # The 'schema' field is already inherited from BaseContract!
Source code in src/crosscontract/contracts/contracts/base_contract.py
class BaseContract(BaseMetaData):
    """
    The BaseContract class is the most basic representation of a data contract.
    It combines the minimum required metadata with the contract structure given by
    Schema.

    It serves as the foundational blueprint for defining data contracts.
    Any custom contract implementation MUST inherit from this class to ensure
    structural consistency and compatibility with the system.

    Attributes:
        name (str): A unique identifier for the data contract.
            Must contain only alphanumeric characters, underscores, or hyphens.
            Maximum length is 100 characters.
        tableschema (TableSchema): The schema defining the structure of the contract
            (fields, primary keys, foreign keys, field descriptors).

    Example:
        To implement a custom contract with additional metadata:

        ```python
        from pydantic import Field
        from crosscontract.contracts import BaseContract

        class MyCustomContract(BaseContract):
            # Add custom metadata fields
            owner: str = Field(description="The owner of this dataset")
            version: str = Field(description="Semantic version of the contract")

            # The 'schema' field is already inherited from BaseContract!
        ```
    """

    model_config = ConfigDict(populate_by_name=True, extra="forbid")

    tableschema: TableSchema = Field(
        description="The Frictionless Table Schema definition.",
    )

    @classmethod
    def from_file(cls, file_path: str | Path) -> Self:
        """
        Load a BaseContract from a YAML or JSON file.

        Args:
            file_path (str | Path): The path to the YAML or JSON file.

        Returns:
            Self: An instance of BaseContract loaded from the file.

        Raises:
            FileNotFoundError: If the specified file does not exist.
            ValueError: If the file format is not supported (not .json, .yaml, or .yml).
        """
        data = read_yaml_or_json_file(file_path)
        return cls.model_validate(data)

    @model_validator(mode="after")
    def _validate_self_reference(self) -> Self:
        """Validate that self-referencing foreign keys are given as None on the
        resource field. Raise if a reference has the same name as the contract itself.
        """
        for fk in self.tableschema.foreignKeys:
            if fk.reference.resource == self.name:
                raise ValueError(
                    f"Foreign key reference resource '{fk.reference.resource}' "
                    "cannot be the same as the contract name. Self-references must "
                    "use None for the resource field."
                )
        return self

    def validate_references(
        self,
        resolver: ContractResolver,
        enforce_star_schema: bool = False,
    ) -> None:
        """Validate that every external foreign key resolves to a contract whose
        fields match the reference.

        This check is topology-agnostic by default — it only verifies that
        referenced contracts exist and their fields line up. Subclasses that
        enforce a particular topology (e.g. star schema) may flip the default
        of `enforce_star_schema` to True; see `CrossContract.validate_references`.

        Args:
            resolver: Lookup for referenced contracts by name.
            enforce_star_schema: If True, additionally require that every
                external reference points to a contract whose tableschema is a
                BaseDimensionSchema. The check is on the schema type, not the
                contract type — users pick contract types (e.g. Dimension,
                FlexibleDimension) that in turn enforce the schema constraint.

        Raises:
            ValueError: If any reference validation checks fail, with details on
                the specific errors. All failures are collected and reported in
                a single exception.
        """
        # avoid circular imports by importing here
        from crosscontract.contracts.schema.subschemas import BaseDimensionSchema

        errors: list[str] = []
        for fk in self.tableschema.foreignKeys:
            target = fk.reference.resource
            if target is None or target == self.name:
                continue

            referenced = resolver.resolve(target)
            if referenced is None:
                errors.append(f"Foreign key references unknown contract '{target}'.")
                continue
            if enforce_star_schema and not isinstance(
                referenced.tableschema, BaseDimensionSchema
            ):
                errors.append(
                    f"Foreign key references contract '{target}' with invalid schema "
                    f"type '{type(referenced.tableschema).__name__}'. Expected a "
                    "dimension schema."
                )
                continue
            try:
                fk.validate_referenced_fields(referenced.tableschema.field_names)
            except ValueError as e:
                errors.append(f"Foreign key to '{target}': {e}")

        if errors:
            raise ValueError(
                f"Reference validation failed for '{self.name}':\n  - "
                + "\n  - ".join(errors)
            )

from_file(file_path) classmethod

Load a BaseContract from a YAML or JSON file.

Parameters:

Name Type Description Default
file_path str | Path

The path to the YAML or JSON file.

required

Returns:

Name Type Description
Self Self

An instance of BaseContract loaded from the file.

Raises:

Type Description
FileNotFoundError

If the specified file does not exist.

ValueError

If the file format is not supported (not .json, .yaml, or .yml).

Source code in src/crosscontract/contracts/contracts/base_contract.py
@classmethod
def from_file(cls, file_path: str | Path) -> Self:
    """
    Load a BaseContract from a YAML or JSON file.

    Args:
        file_path (str | Path): The path to the YAML or JSON file.

    Returns:
        Self: An instance of BaseContract loaded from the file.

    Raises:
        FileNotFoundError: If the specified file does not exist.
        ValueError: If the file format is not supported (not .json, .yaml, or .yml).
    """
    data = read_yaml_or_json_file(file_path)
    return cls.model_validate(data)

validate_references(resolver, enforce_star_schema=False)

Validate that every external foreign key resolves to a contract whose fields match the reference.

This check is topology-agnostic by default — it only verifies that referenced contracts exist and their fields line up. Subclasses that enforce a particular topology (e.g. star schema) may flip the default of enforce_star_schema to True; see CrossContract.validate_references.

Parameters:

Name Type Description Default
resolver ContractResolver

Lookup for referenced contracts by name.

required
enforce_star_schema bool

If True, additionally require that every external reference points to a contract whose tableschema is a BaseDimensionSchema. The check is on the schema type, not the contract type — users pick contract types (e.g. Dimension, FlexibleDimension) that in turn enforce the schema constraint.

False

Raises:

Type Description
ValueError

If any reference validation checks fail, with details on the specific errors. All failures are collected and reported in a single exception.

Source code in src/crosscontract/contracts/contracts/base_contract.py
def validate_references(
    self,
    resolver: ContractResolver,
    enforce_star_schema: bool = False,
) -> None:
    """Validate that every external foreign key resolves to a contract whose
    fields match the reference.

    This check is topology-agnostic by default — it only verifies that
    referenced contracts exist and their fields line up. Subclasses that
    enforce a particular topology (e.g. star schema) may flip the default
    of `enforce_star_schema` to True; see `CrossContract.validate_references`.

    Args:
        resolver: Lookup for referenced contracts by name.
        enforce_star_schema: If True, additionally require that every
            external reference points to a contract whose tableschema is a
            BaseDimensionSchema. The check is on the schema type, not the
            contract type — users pick contract types (e.g. Dimension,
            FlexibleDimension) that in turn enforce the schema constraint.

    Raises:
        ValueError: If any reference validation checks fail, with details on
            the specific errors. All failures are collected and reported in
            a single exception.
    """
    # avoid circular imports by importing here
    from crosscontract.contracts.schema.subschemas import BaseDimensionSchema

    errors: list[str] = []
    for fk in self.tableschema.foreignKeys:
        target = fk.reference.resource
        if target is None or target == self.name:
            continue

        referenced = resolver.resolve(target)
        if referenced is None:
            errors.append(f"Foreign key references unknown contract '{target}'.")
            continue
        if enforce_star_schema and not isinstance(
            referenced.tableschema, BaseDimensionSchema
        ):
            errors.append(
                f"Foreign key references contract '{target}' with invalid schema "
                f"type '{type(referenced.tableschema).__name__}'. Expected a "
                "dimension schema."
            )
            continue
        try:
            fk.validate_referenced_fields(referenced.tableschema.field_names)
        except ValueError as e:
            errors.append(f"Foreign key to '{target}': {e}")

    if errors:
        raise ValueError(
            f"Reference validation failed for '{self.name}':\n  - "
            + "\n  - ".join(errors)
        )

BaseMetaData

Bases: BaseModel

The BaseMetadata class encapsulates the essential metadata attributes required for defining a data contract. Every data contract MUST include these metadata fields to ensure proper identification and description. To extend the metadata for specific use cases, inherit from this class and add additional fields as necessary. Then use the extended metadata class as a base for your custom contract together with BaseContract.

Attributes:

Name Type Description
name str

A unique identifier for the data contract. Must contain only alphanumeric characters, underscores, or hyphens. Maximum length is 100 characters.

Source code in src/crosscontract/contracts/contracts/base_contract.py
class BaseMetaData(BaseModel):
    """
    The BaseMetadata class encapsulates the essential metadata attributes
    required for defining a data contract. Every data contract MUST include
    these metadata fields to ensure proper identification and description.
    To extend the metadata for specific use cases, inherit from this class
    and add additional fields as necessary. Then use the extended metadata
    class as a base for your custom contract together with BaseContract.

    Attributes:
        name (str): A unique identifier for the data contract.
            Must contain only alphanumeric characters, underscores, or hyphens.
            Maximum length is 100 characters.
    """

    model_config = ConfigDict(extra="forbid")

    name: str = Field(
        pattern="^[a-zA-Z0-9_-]+$",
        max_length=100,
        description="A unique identifier for the data contract.",
    )

CrossContract

CrossContract

Bases: BaseContract, CrossMetaData

A concrete implementation of a data contract for the CrossContract system.

This class extends BaseContract by adding tagging capabilities. It serves as the standard contract definition for resources within the CrossContract ecosystem.

Attributes:

Name Type Description
name str

A unique identifier for the data contract. Must contain only alphanumeric characters, underscores, or hyphens. Inherited from BaseContract.

title str

A human-readable title for the data.

description str

A human-readable description of the data.

tags list[str] | None

A list of tags used for categorization and filtering.

tableschema Schema

The Frictionless Table Schema definition. Accessible via the tableschema property as well. This is the core schema definition that describes the structure of the data, including fields, types, and constraints. It changes based on the contract type (e.g., Table, Dimension, ValueVariable, FlexibleDimension).

Source code in src/crosscontract/contracts/contracts/cross_contract.py
class CrossContract(BaseContract, CrossMetaData):
    """
    A concrete implementation of a data contract for the CrossContract system.

    This class extends `BaseContract` by adding tagging capabilities.
    It serves as the standard contract definition for resources within the
    CrossContract ecosystem.

    Attributes:
        name (str): A unique identifier for the data contract.
            Must contain only alphanumeric characters, underscores, or hyphens.
            Inherited from BaseContract.
        title (str): A human-readable title for the data.
        description (str): A human-readable description of the data.
        tags (list[str] | None): A list of tags used for categorization and filtering.
        tableschema (Schema): The Frictionless Table Schema definition.
            Accessible via the `tableschema` property as well.
            This is the core schema definition that describes the structure of the data,
            including fields, types, and constraints. It changes based on the contract
            type (e.g., Table, Dimension, ValueVariable, FlexibleDimension).
    """

    model_config = ConfigDict(
        populate_by_name=True,
        extra="forbid",
        serialize_by_alias=True,
    )

    contract_type: ContractType = Field(
        default="General",
        description=(
            "The type of the contract, which determines the structure of the "
            "tableschema."
        ),
    )
    tableschema: AnyTableSchema = Field(
        description="The Frictionless Table Schema definition."
    )

    @classmethod
    def from_server(cls, data: dict[str, Any]) -> "CrossContract":
        """
        Server responses include the materialized `tableschema` for all contract
        types so consumers can work with it. For Dimension contracts the schema
        is derived from a fixed template and the public validator forbids it as
        input; this method strips it before validation so the template injection
        can rebuild it cleanly.

        Use this for any dict coming from the server or from stored server
        payloads (e.g. a DB row). For user-authored dicts, use the standard
        constructor.

        Args:
            data (dict[str, Any]): A dictionary containing the data for the
                CrossContract.

        Returns:
            CrossContract: An instance of CrossContract initialized with the
                provided data.
        """
        # as dimensions do not allow for a tableschema to be provided
        # we strip it during construction
        if data.get("contract_type") == "Dimension":
            data = {k: v for k, v in data.items() if k != "tableschema"}
        return cls.model_validate(data)

    def to_server(self) -> dict[str, Any]:
        """Serializes the CrossContract instance into a dictionary format suitable for
        server communication.

        This method converts the CrossContract instance into a dictionary that can be
        easily serialized to JSON for API requests. It ensures that all necessary
        fields are included and properly formatted according to the server's
        expectations.

        Returns:
            dict[str, Any]: A dictionary representation of the CrossContract instance.
        """
        data = self.model_dump(mode="json")
        # server will create the tableschema for dimensions, so we remove it from
        # the payload if it's a dimension
        if self.contract_type == "Dimension":
            data.pop("tableschema", None)
        return data

    @model_validator(mode="before")
    @classmethod
    def _inject_table_type(cls, data: Any) -> Any:
        """We inject the table_type into the tableschema based on the contract_type
        Generally, the table schema name is the same as the contract type,
        but we want to keep them separate in case we want to have multiple contract
        types based on the same schema.

        The injection is necessary because the tableschema uses the table_type
        as a discriminator to determine which schema to use, and we want to avoid
        requiring the user to manually specify it."""
        # check the input type as we need a dictionary
        # 1. If Pydantic is re-validating an already built CrossContract, let it pass
        # -> not needed pydantic 2.0 automatically bypasses before validators on
        # already validated instances, but we keep it here for clarity and to
        # ensure it works as expected
        if isinstance(data, cls):
            return data  # pragma: no cover - this is just a safety check

        # 2. Fail fast if the user passes an unsupported object (like an ORM model)
        if not isinstance(data, dict):
            raise TypeError(
                f"CrossContract must be initialized with a dictionary or keyword "
                f"arguments, got {type(data).__name__}."
            )

        return cls._inject_table_type_to_schema(data)

    @staticmethod
    def _inject_table_type_to_schema(data: dict[str, Any]) -> dict[str, Any]:
        """Helper method to inject the table_type into the tableschema.

        Args:
            data (dict[str, Any]): The input data dictionary to be processed.

        Returns:
            dict[str, Any]: The processed data dictionary with the table_type injected.
        """
        contr_type = data.get("contract_type", "General")
        schema_data = data.get("tableschema")

        # if not schema provided, create an empty one
        if schema_data is None:
            schema_data = {}

        # check existence and type of tableschema before proceeding
        if isinstance(schema_data, TableSchema):
            if schema_data.table_type != contr_type:
                raise ValueError(
                    f"Mismatch between contract_type '{contr_type}' and "
                    f"tableschema.table_type '{schema_data.table_type}'."
                )
            # If it's already a TableSchema instance, we can skip injection
            return data
        if not isinstance(schema_data, dict):
            raise TypeError(
                f"Expected 'tableschema' to be a dictionary, got "
                f"{type(schema_data).__name__}."
            )

        # Fail fast if the user tries to be too clever and provides a table_type
        # in the tableschema
        if "table_type" in schema_data:
            raise ValueError(
                "Do not define 'table_type' inside the tableschema. "
                "It is automatically inferred from the root contract level."
            )

        # insert the table_type into the tableschema for the discriminator
        schema_copy = dict(schema_data)
        schema_copy["table_type"] = contr_type

        # add the new schema back into the data
        data_copy = dict(data)
        data_copy["tableschema"] = schema_copy
        return data_copy

    def validate_references(
        self,
        resolver: ContractResolver,
        enforce_star_schema: bool = True,
    ) -> None:
        """Validate references with star-schema enforcement on by default.

        CrossContract models a star schema: external foreign keys must point to
        contracts whose tableschema is a BaseDimensionSchema. Users achieve this
        by choosing a dimension-flavored contract_type (Dimension or
        FlexibleDimension), which binds the corresponding schema subclass via
        the discriminator. The default of ``enforce_star_schema=True`` reflects
        this invariant, so callers get the canonical check when they omit that
        optional argument and provide only the required ``resolver``.
        Delegates to BaseContract.validate_references; see there for
        implementation details.

        Args:
            resolver: Lookup for referenced contracts by name.
            enforce_star_schema: If True (default), require that every external
                reference points to a contract whose tableschema is a
                BaseDimensionSchema. The check is on the schema type, not the
                contract type — users pick contract types (e.g. Dimension,
                FlexibleDimension) that in turn enforce the schema constraint.
                Pass False to run only the existence + field integrity check —
                see BaseContract.validate_references for that topology-agnostic
                mode.

        Raises:
            ValueError: If any reference validation checks fail, with details on
                the specific errors. All failures are collected and reported in
                a single exception.
        """
        super().validate_references(resolver, enforce_star_schema=enforce_star_schema)

from_server(data) classmethod

Server responses include the materialized tableschema for all contract types so consumers can work with it. For Dimension contracts the schema is derived from a fixed template and the public validator forbids it as input; this method strips it before validation so the template injection can rebuild it cleanly.

Use this for any dict coming from the server or from stored server payloads (e.g. a DB row). For user-authored dicts, use the standard constructor.

Parameters:

Name Type Description Default
data dict[str, Any]

A dictionary containing the data for the CrossContract.

required

Returns:

Name Type Description
CrossContract CrossContract

An instance of CrossContract initialized with the provided data.

Source code in src/crosscontract/contracts/contracts/cross_contract.py
@classmethod
def from_server(cls, data: dict[str, Any]) -> "CrossContract":
    """
    Server responses include the materialized `tableschema` for all contract
    types so consumers can work with it. For Dimension contracts the schema
    is derived from a fixed template and the public validator forbids it as
    input; this method strips it before validation so the template injection
    can rebuild it cleanly.

    Use this for any dict coming from the server or from stored server
    payloads (e.g. a DB row). For user-authored dicts, use the standard
    constructor.

    Args:
        data (dict[str, Any]): A dictionary containing the data for the
            CrossContract.

    Returns:
        CrossContract: An instance of CrossContract initialized with the
            provided data.
    """
    # as dimensions do not allow for a tableschema to be provided
    # we strip it during construction
    if data.get("contract_type") == "Dimension":
        data = {k: v for k, v in data.items() if k != "tableschema"}
    return cls.model_validate(data)

to_server()

Serializes the CrossContract instance into a dictionary format suitable for server communication.

This method converts the CrossContract instance into a dictionary that can be easily serialized to JSON for API requests. It ensures that all necessary fields are included and properly formatted according to the server's expectations.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: A dictionary representation of the CrossContract instance.

Source code in src/crosscontract/contracts/contracts/cross_contract.py
def to_server(self) -> dict[str, Any]:
    """Serializes the CrossContract instance into a dictionary format suitable for
    server communication.

    This method converts the CrossContract instance into a dictionary that can be
    easily serialized to JSON for API requests. It ensures that all necessary
    fields are included and properly formatted according to the server's
    expectations.

    Returns:
        dict[str, Any]: A dictionary representation of the CrossContract instance.
    """
    data = self.model_dump(mode="json")
    # server will create the tableschema for dimensions, so we remove it from
    # the payload if it's a dimension
    if self.contract_type == "Dimension":
        data.pop("tableschema", None)
    return data

validate_references(resolver, enforce_star_schema=True)

Validate references with star-schema enforcement on by default.

CrossContract models a star schema: external foreign keys must point to contracts whose tableschema is a BaseDimensionSchema. Users achieve this by choosing a dimension-flavored contract_type (Dimension or FlexibleDimension), which binds the corresponding schema subclass via the discriminator. The default of enforce_star_schema=True reflects this invariant, so callers get the canonical check when they omit that optional argument and provide only the required resolver. Delegates to BaseContract.validate_references; see there for implementation details.

Parameters:

Name Type Description Default
resolver ContractResolver

Lookup for referenced contracts by name.

required
enforce_star_schema bool

If True (default), require that every external reference points to a contract whose tableschema is a BaseDimensionSchema. The check is on the schema type, not the contract type — users pick contract types (e.g. Dimension, FlexibleDimension) that in turn enforce the schema constraint. Pass False to run only the existence + field integrity check — see BaseContract.validate_references for that topology-agnostic mode.

True

Raises:

Type Description
ValueError

If any reference validation checks fail, with details on the specific errors. All failures are collected and reported in a single exception.

Source code in src/crosscontract/contracts/contracts/cross_contract.py
def validate_references(
    self,
    resolver: ContractResolver,
    enforce_star_schema: bool = True,
) -> None:
    """Validate references with star-schema enforcement on by default.

    CrossContract models a star schema: external foreign keys must point to
    contracts whose tableschema is a BaseDimensionSchema. Users achieve this
    by choosing a dimension-flavored contract_type (Dimension or
    FlexibleDimension), which binds the corresponding schema subclass via
    the discriminator. The default of ``enforce_star_schema=True`` reflects
    this invariant, so callers get the canonical check when they omit that
    optional argument and provide only the required ``resolver``.
    Delegates to BaseContract.validate_references; see there for
    implementation details.

    Args:
        resolver: Lookup for referenced contracts by name.
        enforce_star_schema: If True (default), require that every external
            reference points to a contract whose tableschema is a
            BaseDimensionSchema. The check is on the schema type, not the
            contract type — users pick contract types (e.g. Dimension,
            FlexibleDimension) that in turn enforce the schema constraint.
            Pass False to run only the existence + field integrity check —
            see BaseContract.validate_references for that topology-agnostic
            mode.

    Raises:
        ValueError: If any reference validation checks fail, with details on
            the specific errors. All failures are collected and reported in
            a single exception.
    """
    super().validate_references(resolver, enforce_star_schema=enforce_star_schema)

CrossMetaData

Bases: BaseMetaData

Metadata specific to the CrossContract system, extending the base metadata requirements

Attributes:

Name Type Description
title str

A human-readable title for the data.

description str

A human-readable description of the data.

tags list[str] | None

A list of tags for categorization and filtering.

Source code in src/crosscontract/contracts/contracts/cross_contract.py
class CrossMetaData(BaseMetaData):
    """
    Metadata specific to the CrossContract system,
    extending the base metadata requirements

    Attributes:
        title (str): A human-readable title for the data.
        description (str): A human-readable description of the data.
        tags (list[str] | None): A list of tags for categorization and filtering.
    """

    model_config = ConfigDict(str_strip_whitespace=True)
    title: str = Field(
        description=(
            "A human-readable title for the data."
            "Think of this as the label that will be used in graphs and tables."
        ),
    )

    description: str = Field(
        description=(
            "A human-readable description of the data. This should explain what "
            " the data is about."
        )
    )

    tags: list[str] = Field(
        default_factory=list,
        description=(
            "A list of tags that can be used to categorize the table. "
            "This can be used to filter tables in the UI."
        ),
    )

Schemas

MandatoryField

Bases: BaseModel

A helper class to define mandatory fields in the schema. This is used for validation purposes to ensure that certain fields are always present in the schema.

Source code in src/crosscontract/contracts/schema/schema.py
class MandatoryField(BaseModel):
    """
    A helper class to define mandatory fields in the schema. This is used for
    validation purposes to ensure that certain fields are always present in the
    schema.
    """

    name: str = Field(description="The name of the mandatory field.")
    type: Literal["integer", "number", "string", "datetime", "list"] | None = Field(
        default=None, description="The type of the mandatory field."
    )
    description: str = Field(
        description="A description of the mandatory field and its purpose."
    )

TableSchema

Bases: BaseModel

A Frictionless Table Schema compatible schema definition. Includes fields, primary keys, foreign keys, and field descriptors.

Source code in src/crosscontract/contracts/schema/schema.py
class TableSchema(BaseModel):
    """
    A Frictionless Table Schema compatible schema definition.
    Includes fields, primary keys, foreign keys, and field descriptors.
    """

    _mandatory_fields: ClassVar[list[MandatoryField]] = []
    """Fields that a schema subclass is required to declare.

    Override in subclasses to enforce domain-specific invariants.
    """

    table_type: Literal["General"] = Field(
        default="General",
        description="Type of the table determines the structure of the schema.",
        exclude=True,
        repr=False,
    )

    model_config = ConfigDict(
        title="TableSchema", ignored_types=(cached_property,), str_strip_whitespace=True
    )

    fields: list[FieldUnion] = Field(
        default_factory=list,
        description="An `array` of Table Schema Field objects.",
        min_length=1,
    )
    primaryKey: PrimaryKey = Field(
        default_factory=PrimaryKey,
        description=(
            "The primary key definition. Primary keys are used to uniquely "
            "identify records in the data."
        ),
    )
    foreignKeys: ForeignKeys = Field(
        default_factory=ForeignKeys,
        description=(
            "The foreign key definitions. Foreign keys are used to establish "
            "relationships between tables."
        ),
    )
    fieldDescriptors: FieldDescriptors | None = None

    def field_iterator(self) -> Iterator[FieldUnion]:
        """Returns an iterator over the fields in the schema."""
        return iter(self.fields)

    def __getitem__(self, key: int | str) -> FieldUnion:
        if isinstance(key, int):
            return self.fields[key]
        try:
            return self._name_index[key]
        except KeyError as e:
            raise KeyError(f"Field '{key}' not found in Schema.") from e

    def __len__(self) -> int:
        return len(self.fields)

    @cached_property
    def _name_index(self) -> dict[str, FieldUnion]:
        """
        Creates a dictionary mapping field names to field objects.
        This runs only once when accessed, providing O(1) lookups thereafter.
        """
        return {field.name: field for field in self.fields}

    @property
    def field_names(self) -> list[str]:
        """Returns a list of all field names."""
        return list(self._name_index)

    def get(self, name: str) -> FieldUnion | None:
        """Returns the field by name, or None if it doesn't exist."""
        return self._name_index.get(name)

    def has_fields(self, field_names: str | list[str]) -> bool:
        """Check if a field with the given name exists in the data contract."""
        if isinstance(field_names, str):
            return field_names in self.field_names
        else:
            return all(name in self.field_names for name in field_names)

    @model_validator(mode="after")
    def _validate_mandatory_fields(self) -> Self:
        """Validate that all mandatory fields are present and of the correct type."""
        errors: list[str] = []
        for spec in self._mandatory_fields:
            field = self.get(spec.name)
            if field is None:
                errors.append(f"missing field '{spec.name}' — {spec.description}")
            elif spec.type is not None and field.type != spec.type:
                errors.append(
                    f"field '{spec.name}' must be of type '{spec.type}', "
                    f"got '{field.type}' — {spec.description}"
                )
        if errors:
            raise ValueError(
                f"Mandatory field validation failed for "
                f"'{type(self).__name__}':\n  - " + "\n  - ".join(errors)
            )
        return self

    @model_validator(mode="after")
    def validate_structural_integrity(self) -> "TableSchema":
        """
        Validates that all key definitions refer to fields that actually
        exist in the schema.
        """
        valid_fields = self.field_names

        if self.primaryKey:
            self.primaryKey.validate_fields(valid_fields)

        if self.foreignKeys:
            for fk in self.foreignKeys:
                fk.validate_fields(valid_fields)
                if fk.reference.resource is None:
                    fk.validate_referenced_fields(valid_fields)

        if self.fieldDescriptors is not None:
            self.fieldDescriptors.validate_all_exist(valid_fields)
        return self

    @classmethod
    def from_file(cls, file_path: str | Path) -> Self:
        data = read_yaml_or_json_file(file_path)
        return cls.model_validate(data)

    def to_sa_table(
        self, metadata: MetaData | None = None, table_name: str | None = None
    ) -> Table:
        from .adapters import SQLAlchemyPostgresAdapter

        if metadata is None:
            metadata = MetaData()
        if table_name is None:
            table_name = f"dct_{getattr(self, 'name', 'contract_table')}"
        return SQLAlchemyPostgresAdapter.convert_schema(
            self, metadata=metadata, table_name=table_name
        )

    def to_pandera_schema(
        self,
        name: str = "ConvertedSchema",
        primary_key_values: list[tuple[Any, ...]] | None = None,
        foreign_key_values: dict[tuple[str, ...], list[tuple[Any, ...]]] | None = None,
        skip_primary_key_validation: bool = False,
        skip_foreign_key_validation: bool = False,
        backend: Literal["pandas"] = "pandas",
    ) -> pa.DataFrameSchema:
        """Convert the TableSchema to a Pandera DataFrameSchema. This is used for
        validating DataFrames against the TableSchema. It allows to provide existing
        primary key and foreign key values for validation. If provided, the primary key
        uniqueness is checked against the union of the existing and the DataFrame
        values. Similarly, foreign key integrity is checked against the union of
        the existing and the DataFrame values.

        Args:
            name (str): The name of the schema. Defaults to "ConvertedSchema".
            primary_key_values (list[tuple[Any, ...]] | None): Existing primary key
                values to check for uniqueness.
                Note: The uniqueness of the primary key is validated is checked against
                    the union of the provided values and the values in the DataFrame.
            foreign_key_values (dict[tuple[str, ...], list[tuple[Any, ...]]] | None):
                Existing foreign key values to check against. This is provided as a
                dictionary where the keys are the tuples of fields that refer to the
                referenced values, and the values are lists of tuples representing the
                existing referenced values.
                Note: In the case of self-referencing foreign keys, the values in the
                    DataFrame are considered automatically, i.e., the referring fields
                    are validated against the union of the provided values and the
                    values in the DataFrame.
            skip_primary_key_validation (bool): Whether to skip primary key validation.
            skip_foreign_key_validation (bool): Whether to skip foreign key validation.
            backend (Literal["pandas"]): The backend to use for validation.
                Currently, only "pandas" is supported.
        """
        match backend:
            case "pandas":
                from .adapters import PanderaPandasAdapter
            case _:
                raise ValueError(
                    f"Unsupported backend '{backend}' for schema conversion."
                    "Currently, only 'pandas' is supported."
                )

        pandera_schema: pa.DataFrameSchema = PanderaPandasAdapter.convert_schema(
            self,
            name=name,
            skip_primary_key_validation=skip_primary_key_validation,
            skip_foreign_key_validation=skip_foreign_key_validation,
            primary_key_values=primary_key_values,
            foreign_key_values=foreign_key_values,
        )

        return pandera_schema

    def to_pydantic_model(
        self, model_name: str | None = None, base_class: type[BaseModel] = BaseModel
    ) -> type[BaseModel]:
        from .adapters import PydanticAdapter

        if model_name is None:
            model_name = getattr(self, "name", "ContractModel")
        return PydanticAdapter.convert_schema(
            self, name=model_name, base_class=base_class
        )

    def validate_dataframe(
        self,
        df: Any,
        primary_key_values: list[tuple[Any, ...]] | None = None,
        foreign_key_values: dict[tuple[str, ...], list[tuple[Any, ...]]] | None = None,
        skip_primary_key_validation: bool = False,
        skip_foreign_key_validation: bool = False,
        lazy: bool = True,
        backend: Literal["pandas"] = "pandas",
    ) -> pd.DataFrame:
        """Validate a DataFrame against the schema.
        It allows to provide existing primary key and foreign key values for validation.
        If provided, the primary key uniqueness is checked against the union of the
        existing and the DataFrame values. Similarly, foreign key integrity is checked
        against the union of existing and DataFrame values in case of self-referencing
        foreign keys.

        Args:
            df (Any): The DataFrame to validate.
            primary_key_values (list[tuple[Any, ...]] | None): Existing primary key
                values to check for uniqueness.
                Note: The uniqueness of the primary key is validated is checked against
                    the union of the provided values and the values in the DataFrame.
            foreign_key_values (dict[tuple[str, ...], list[tuple[Any, ...]]] | None):
                Existing foreign key values to check against. This is provided as a
                dictionary where the keys are the tuples of fields that refer to the
                referenced values, and the values are lists of tuples representing the
                existing referenced values.
                Note: In the case of self-referencing foreign keys, the values in the
                    DataFrame are considered automatically, i.e., the referring fields
                    are validated against the union of the provided values and the
                    values in the DataFrame.
            skip_primary_key_validation (bool): Whether to skip primary key validation.
            skip_foreign_key_validation (bool): Whether to skip foreign key validation.
            lazy (bool): Whether to perform lazy validation, collecting all errors.
                Defaults to True.
            backend (Literal["pandas"]): The backend to use for validation.
                Currently, only "pandas" is supported.
        Raises:
            SchemaValidationError: If the DataFrame does not conform to the
                schema. This exception wraps underlying ``pandera`` validation
                errors raised during DataFrame validation.

        Returns:
            pd.DataFrame: The validated DataFrame. If validation fails, an exception
                is raised and this return value is not reached.
        """
        return validate_dataframe(
            schema=self,
            df=df,
            primary_key_values=primary_key_values,
            foreign_key_values=foreign_key_values,
            skip_primary_key_validation=skip_primary_key_validation,
            skip_foreign_key_validation=skip_foreign_key_validation,
            lazy=lazy,
            backend=backend,
        )

field_names property

Returns a list of all field names.

field_iterator()

Returns an iterator over the fields in the schema.

Source code in src/crosscontract/contracts/schema/schema.py
def field_iterator(self) -> Iterator[FieldUnion]:
    """Returns an iterator over the fields in the schema."""
    return iter(self.fields)

get(name)

Returns the field by name, or None if it doesn't exist.

Source code in src/crosscontract/contracts/schema/schema.py
def get(self, name: str) -> FieldUnion | None:
    """Returns the field by name, or None if it doesn't exist."""
    return self._name_index.get(name)

has_fields(field_names)

Check if a field with the given name exists in the data contract.

Source code in src/crosscontract/contracts/schema/schema.py
def has_fields(self, field_names: str | list[str]) -> bool:
    """Check if a field with the given name exists in the data contract."""
    if isinstance(field_names, str):
        return field_names in self.field_names
    else:
        return all(name in self.field_names for name in field_names)

to_pandera_schema(name='ConvertedSchema', primary_key_values=None, foreign_key_values=None, skip_primary_key_validation=False, skip_foreign_key_validation=False, backend='pandas')

Convert the TableSchema to a Pandera DataFrameSchema. This is used for validating DataFrames against the TableSchema. It allows to provide existing primary key and foreign key values for validation. If provided, the primary key uniqueness is checked against the union of the existing and the DataFrame values. Similarly, foreign key integrity is checked against the union of the existing and the DataFrame values.

Parameters:

Name Type Description Default
name str

The name of the schema. Defaults to "ConvertedSchema".

'ConvertedSchema'
primary_key_values list[tuple[Any, ...]] | None

Existing primary key values to check for uniqueness. Note: The uniqueness of the primary key is validated is checked against the union of the provided values and the values in the DataFrame.

None
foreign_key_values dict[tuple[str, ...], list[tuple[Any, ...]]] | None

Existing foreign key values to check against. This is provided as a dictionary where the keys are the tuples of fields that refer to the referenced values, and the values are lists of tuples representing the existing referenced values. Note: In the case of self-referencing foreign keys, the values in the DataFrame are considered automatically, i.e., the referring fields are validated against the union of the provided values and the values in the DataFrame.

None
skip_primary_key_validation bool

Whether to skip primary key validation.

False
skip_foreign_key_validation bool

Whether to skip foreign key validation.

False
backend Literal['pandas']

The backend to use for validation. Currently, only "pandas" is supported.

'pandas'
Source code in src/crosscontract/contracts/schema/schema.py
def to_pandera_schema(
    self,
    name: str = "ConvertedSchema",
    primary_key_values: list[tuple[Any, ...]] | None = None,
    foreign_key_values: dict[tuple[str, ...], list[tuple[Any, ...]]] | None = None,
    skip_primary_key_validation: bool = False,
    skip_foreign_key_validation: bool = False,
    backend: Literal["pandas"] = "pandas",
) -> pa.DataFrameSchema:
    """Convert the TableSchema to a Pandera DataFrameSchema. This is used for
    validating DataFrames against the TableSchema. It allows to provide existing
    primary key and foreign key values for validation. If provided, the primary key
    uniqueness is checked against the union of the existing and the DataFrame
    values. Similarly, foreign key integrity is checked against the union of
    the existing and the DataFrame values.

    Args:
        name (str): The name of the schema. Defaults to "ConvertedSchema".
        primary_key_values (list[tuple[Any, ...]] | None): Existing primary key
            values to check for uniqueness.
            Note: The uniqueness of the primary key is validated is checked against
                the union of the provided values and the values in the DataFrame.
        foreign_key_values (dict[tuple[str, ...], list[tuple[Any, ...]]] | None):
            Existing foreign key values to check against. This is provided as a
            dictionary where the keys are the tuples of fields that refer to the
            referenced values, and the values are lists of tuples representing the
            existing referenced values.
            Note: In the case of self-referencing foreign keys, the values in the
                DataFrame are considered automatically, i.e., the referring fields
                are validated against the union of the provided values and the
                values in the DataFrame.
        skip_primary_key_validation (bool): Whether to skip primary key validation.
        skip_foreign_key_validation (bool): Whether to skip foreign key validation.
        backend (Literal["pandas"]): The backend to use for validation.
            Currently, only "pandas" is supported.
    """
    match backend:
        case "pandas":
            from .adapters import PanderaPandasAdapter
        case _:
            raise ValueError(
                f"Unsupported backend '{backend}' for schema conversion."
                "Currently, only 'pandas' is supported."
            )

    pandera_schema: pa.DataFrameSchema = PanderaPandasAdapter.convert_schema(
        self,
        name=name,
        skip_primary_key_validation=skip_primary_key_validation,
        skip_foreign_key_validation=skip_foreign_key_validation,
        primary_key_values=primary_key_values,
        foreign_key_values=foreign_key_values,
    )

    return pandera_schema

validate_dataframe(df, primary_key_values=None, foreign_key_values=None, skip_primary_key_validation=False, skip_foreign_key_validation=False, lazy=True, backend='pandas')

Validate a DataFrame against the schema. It allows to provide existing primary key and foreign key values for validation. If provided, the primary key uniqueness is checked against the union of the existing and the DataFrame values. Similarly, foreign key integrity is checked against the union of existing and DataFrame values in case of self-referencing foreign keys.

Parameters:

Name Type Description Default
df Any

The DataFrame to validate.

required
primary_key_values list[tuple[Any, ...]] | None

Existing primary key values to check for uniqueness. Note: The uniqueness of the primary key is validated is checked against the union of the provided values and the values in the DataFrame.

None
foreign_key_values dict[tuple[str, ...], list[tuple[Any, ...]]] | None

Existing foreign key values to check against. This is provided as a dictionary where the keys are the tuples of fields that refer to the referenced values, and the values are lists of tuples representing the existing referenced values. Note: In the case of self-referencing foreign keys, the values in the DataFrame are considered automatically, i.e., the referring fields are validated against the union of the provided values and the values in the DataFrame.

None
skip_primary_key_validation bool

Whether to skip primary key validation.

False
skip_foreign_key_validation bool

Whether to skip foreign key validation.

False
lazy bool

Whether to perform lazy validation, collecting all errors. Defaults to True.

True
backend Literal['pandas']

The backend to use for validation. Currently, only "pandas" is supported.

'pandas'

Raises: SchemaValidationError: If the DataFrame does not conform to the schema. This exception wraps underlying pandera validation errors raised during DataFrame validation.

Returns:

Type Description
DataFrame

pd.DataFrame: The validated DataFrame. If validation fails, an exception is raised and this return value is not reached.

Source code in src/crosscontract/contracts/schema/schema.py
def validate_dataframe(
    self,
    df: Any,
    primary_key_values: list[tuple[Any, ...]] | None = None,
    foreign_key_values: dict[tuple[str, ...], list[tuple[Any, ...]]] | None = None,
    skip_primary_key_validation: bool = False,
    skip_foreign_key_validation: bool = False,
    lazy: bool = True,
    backend: Literal["pandas"] = "pandas",
) -> pd.DataFrame:
    """Validate a DataFrame against the schema.
    It allows to provide existing primary key and foreign key values for validation.
    If provided, the primary key uniqueness is checked against the union of the
    existing and the DataFrame values. Similarly, foreign key integrity is checked
    against the union of existing and DataFrame values in case of self-referencing
    foreign keys.

    Args:
        df (Any): The DataFrame to validate.
        primary_key_values (list[tuple[Any, ...]] | None): Existing primary key
            values to check for uniqueness.
            Note: The uniqueness of the primary key is validated is checked against
                the union of the provided values and the values in the DataFrame.
        foreign_key_values (dict[tuple[str, ...], list[tuple[Any, ...]]] | None):
            Existing foreign key values to check against. This is provided as a
            dictionary where the keys are the tuples of fields that refer to the
            referenced values, and the values are lists of tuples representing the
            existing referenced values.
            Note: In the case of self-referencing foreign keys, the values in the
                DataFrame are considered automatically, i.e., the referring fields
                are validated against the union of the provided values and the
                values in the DataFrame.
        skip_primary_key_validation (bool): Whether to skip primary key validation.
        skip_foreign_key_validation (bool): Whether to skip foreign key validation.
        lazy (bool): Whether to perform lazy validation, collecting all errors.
            Defaults to True.
        backend (Literal["pandas"]): The backend to use for validation.
            Currently, only "pandas" is supported.
    Raises:
        SchemaValidationError: If the DataFrame does not conform to the
            schema. This exception wraps underlying ``pandera`` validation
            errors raised during DataFrame validation.

    Returns:
        pd.DataFrame: The validated DataFrame. If validation fails, an exception
            is raised and this return value is not reached.
    """
    return validate_dataframe(
        schema=self,
        df=df,
        primary_key_values=primary_key_values,
        foreign_key_values=foreign_key_values,
        skip_primary_key_validation=skip_primary_key_validation,
        skip_foreign_key_validation=skip_foreign_key_validation,
        lazy=lazy,
        backend=backend,
    )

validate_structural_integrity()

Validates that all key definitions refer to fields that actually exist in the schema.

Source code in src/crosscontract/contracts/schema/schema.py
@model_validator(mode="after")
def validate_structural_integrity(self) -> "TableSchema":
    """
    Validates that all key definitions refer to fields that actually
    exist in the schema.
    """
    valid_fields = self.field_names

    if self.primaryKey:
        self.primaryKey.validate_fields(valid_fields)

    if self.foreignKeys:
        for fk in self.foreignKeys:
            fk.validate_fields(valid_fields)
            if fk.reference.resource is None:
                fk.validate_referenced_fields(valid_fields)

    if self.fieldDescriptors is not None:
        self.fieldDescriptors.validate_all_exist(valid_fields)
    return self

BaseDimensionSchema

Bases: TableSchema

(Abstract) Base class for dimension schemas. This class is not meant to be instantiated directly but serves as a base for specific dimension schemas.

Used to enforce the star-schema structure of dimensions, which requires an explicitly defined primary key. Foreign keys are optional, but if provided, they must all be self-referencing. Multiple self-referencing foreign keys are allowed, and no external foreign keys are permitted.

The primary key can be a single field or a composite key, but it must be explicitly defined by the user.

Source code in src/crosscontract/contracts/schema/subschemas/base_dimension.py
class BaseDimensionSchema(TableSchema):
    """
    (Abstract) Base class for dimension schemas. This class is not meant to be
    instantiated directly but serves as a base for specific dimension schemas.

    Used to enforce the star-schema structure of dimensions, which requires an
    explicitly defined primary key. Foreign keys are optional, but if provided,
    they must all be self-referencing. Multiple self-referencing foreign keys
    are allowed, and no external foreign keys are permitted.

    The primary key can be a single field or a composite key, but it must be
    explicitly defined by the user.
    """

    @model_validator(mode="after")
    def _reject_abstract_instantiation(self) -> Self:
        """Prevents direct instantiation of the abstract base class."""
        if type(self) is BaseDimensionSchema:
            raise TypeError(
                "BaseDimensionSchema is abstract; use a concrete subclass "
                "like FlexibleDimensionSchema."
            )
        return self

    @model_validator(mode="after")
    def _validate_primary_key_defined(self) -> Self:
        """Ensures that a primary key is explicitly defined."""
        if not self.primaryKey or not self.primaryKey.fields:
            raise ValueError(
                f"{type(self).__name__} requires an explicitly defined primary key."
            )
        return self

    @model_validator(mode="after")
    def _validate_foreign_keys_self_only(self) -> Self:
        """Ensures that all foreign keys reference only the same table
        (self-referencing)."""
        for fk in self.foreignKeys:
            if fk.reference.resource is not None:
                raise ValueError(
                    f"{type(self).__name__} only supports self-referencing foreign"
                    f" keys: found external reference to '{fk.reference.resource}'."
                )
        return self

DimensionSchema

Bases: BaseDimensionSchema

A specialized schema for dimension tables in the CrossContract system.

This schema extends the base TableSchema by adding specific constraints and conventions for dimension tables.

A Dimension is a hierarchical structure that organizes data into levels, where each level represents a different granularity of information. For example, a "Location" dimension might have levels for "Country", "State", and "City".

Dimensions have the following fields: - "id": - required - Type: string (max length 100 characters) - Description: A unique identifier for each entry in the dimension table. - Constraints: Must be unique across the entire table and serves as the primary key. - "parent_id": - optional (required for levels > 0) - Type: string (max length 100 characters) - Description: A reference to the "id" of the parent entry in the same table - "level": - required - Type: integer (non-negative, >= 0) - Description: Indicates the hierarchy level of the dimension, starting at 0 for the top level. - "label": - optional - Type: string (max length 255 characters) - Description: A human-readable label for the dimension entry. This is the default fallback label for plotting etc. purposes if no other label is provided. - "description": - optional - Type: string - Description: A detailed description of the dimension entry. - "color": - optional - Type: string (must be a valid hex color code, e.g., "#RRGGBB") - Description: A color associated with the dimension entry, which can be used for visualization purposes.

At the data level, dimensions receive more checks to ensure the hierarchy is consistent and valid.

  1. At level 0, no parent_id can be provided
  2. A row at level N (N > 0) must reference a parent at level N-1
  3. Each row at level N (N > 0) must have a parent_id
  4. The root level of the dimension hierarchy should have an entry with id "other". Each sub-level should have a sibling entry with id "other_" to capture uncategorized entries at that level.
Source code in src/crosscontract/contracts/schema/subschemas/dimension.py
class DimensionSchema(BaseDimensionSchema):
    """
    A specialized schema for dimension tables in the CrossContract system.

    This schema extends the base `TableSchema` by adding specific constraints
    and conventions for dimension tables.

    A Dimension is a hierarchical structure that organizes data into levels,
    where each level represents a different granularity of information.
    For example, a "Location" dimension might have levels for "Country", "State",
    and "City".

    Dimensions have the following fields:
    - "id":
        - required
        - Type: string (max length 100 characters)
        - Description: A unique identifier for each entry in the dimension table.
        - Constraints: Must be unique across the entire table and serves as the
                       primary key.
    - "parent_id":
        - optional (required for levels > 0)
        - Type: string (max length 100 characters)
        - Description: A reference to the "id" of the parent entry in the same table
    - "level":
        - required
        - Type: integer (non-negative, >= 0)
        - Description: Indicates the hierarchy level of the dimension, starting at 0
                       for the top level.
    - "label":
        - optional
        - Type: string (max length 255 characters)
        - Description: A human-readable label for the dimension entry.
                       This is the default fallback label for plotting etc.
                       purposes if no other label is provided.
    - "description":
        - optional
        - Type: string
        - Description: A detailed description of the dimension entry.
    - "color":
        - optional
        - Type: string (must be a valid hex color code, e.g., "#RRGGBB")
        - Description: A color associated with the dimension entry, which can be
                       used for visualization purposes.

    At the data level, dimensions receive more checks to ensure the hierarchy is
    consistent and valid.

    1. At level 0, no parent_id can be provided
    2. A row at level N (N > 0) must reference a parent at level N-1
    3. Each row at level N (N > 0) must have a parent_id
    4. The root level of the dimension hierarchy should have an entry with id "other".
       Each sub-level should have a sibling entry with id "other_<parent_id>" to
       capture uncategorized entries at that level.
    """

    # ignore type error as we want to enforce the table_type for this schema
    # for the pydantic discriminator to work correctly
    table_type: Literal["Dimension"] = Field(  # type: ignore[assignment]
        default="Dimension",
        description="Type of the table determines the structure of the schema.",
        exclude=True,
        repr=False,
    )

    @model_validator(mode="before")
    @classmethod
    def _inject_dimension_template(cls, data: Any) -> Any:
        """
        If the user provides an empty dict (or minimal data),
        we automatically merge the template into it before Pydantic parses it.
        """
        # 1. If it's garbage (list, string, etc.), pass it down so Pydantic
        # can throw a standard ValidationError. (Pydantic already intercepted valid
        # instances).
        if not isinstance(data, dict):
            return data

        # custom data can only be the table_type, which is set to "Dimension"
        # by default, so we check for any other keys and reject them to avoid
        # confusion about where to put metadata
        user_provided_keys = set(data.keys()) - {"table_type"}
        if user_provided_keys:
            raise ValueError(
                f"DimensionSchema is rigidly defined and cannot accept custom data. "
                f"Found restricted keys: {', '.join(user_provided_keys)}. "
                "Please put all metadata (title, description, etc.) on the "
                "Contract level."
            )
        return deepcopy(DIMENSION_SCHEMA_TEMPLATE)

FlexibleDimensionSchema

Bases: BaseDimensionSchema

A flexible dimension schema that allows for user-defined fields while enforcing the presence of two mandatory fields: - label: A label that describes the dimension value, which can be used for display purposes. - description: A description of the item in the dimension

  • ForeignKey references: As every other dimension, the flexible dimension can only have a self-reference but cannot enforce any other references.
  • PrimaryKey: The flexible dimension must have a primary key. The key can be a single field or a composite key, but it must be explicitly defined by the user.
Source code in src/crosscontract/contracts/schema/subschemas/flexible_dimension.py
class FlexibleDimensionSchema(BaseDimensionSchema):
    """
    A flexible dimension schema that allows for user-defined fields while enforcing
    the presence of two mandatory fields:
        - label: A label that describes the dimension value, which can be used
            for display purposes.
        - description: A description of the item in the dimension

    - ForeignKey references:
        As every other dimension, the flexible dimension can only have a self-reference
        but cannot enforce any other references.
    - PrimaryKey:
        The flexible dimension must have a primary key. The key can be a single
        field or a composite key, but it must be explicitly defined by the user.
    """

    _mandatory_fields: ClassVar[list[MandatoryField]] = [
        MandatoryField(
            name="label",
            type="string",
            description=(
                "A label that describes the dimension value, which can be used "
                "for display purposes."
            ),
        ),
        MandatoryField(
            name="description",
            type="string",
            description="A description of the item in the dimension.",
        ),
    ]

    # ignore type error as we want to enforce the table_type for this schema
    # for the pydantic discriminator to work correctly
    table_type: Literal["FlexibleDimension"] = Field(  # type: ignore[assignment]
        default="FlexibleDimension",
        description="Type of the table determines the structure of the schema.",
        exclude=True,
        repr=False,
    )

ValueVariableSchema

Bases: TableSchema

A specialized schema for value variable tables in the CrossContract system.

This schema extends the base TableSchema by adding specific constraints and conventions for value variable tables, which are typically used for categorization and filtering in data models.

Source code in src/crosscontract/contracts/schema/subschemas/value_variable.py
class ValueVariableSchema(TableSchema):
    """
    A specialized schema for value variable tables in the CrossContract system.

    This schema extends the base `TableSchema` by adding specific constraints
    and conventions for value variable tables, which are typically used for
    categorization and filtering in data models.
    """

    # todo add value variable-specific fields or constraints
    # ignore type error as we want to enforce the table_type for this schema
    # for the pydantic discriminator to work correctly
    table_type: Literal["ValueVariable"] = Field(  # type: ignore[assignment]
        default="ValueVariable",
        description="Type of the table determines the structure of the schema.",
        exclude=True,
        repr=False,
    )

Fields

BaseConstraint

Bases: BaseModel, ABC

Base class for constraints. This class can be extended to define specific constraints.

Source code in src/crosscontract/contracts/schema/fields/base.py
class BaseConstraint(BaseModel, ABC):
    """
    Base class for constraints.
    This class can be extended to define specific constraints.
    """

    model_config = ConfigDict(
        extra="forbid",
        str_strip_whitespace=True,
    )

    required: bool = Field(
        default=False,
        description="Indicates whether a property must have a value for each instance.",
    )

    unique: bool | None = Field(
        default=False,
        description="When `true`, each value for the property `MUST` be unique.",
    )

BaseField

Bases: BaseModel, ABC

Base class for frictionless fields. This class can be extended to define specific frictionless fields.

Source code in src/crosscontract/contracts/schema/fields/base.py
class BaseField(BaseModel, ABC):
    """
    Base class for frictionless fields.
    This class can be extended to define specific frictionless fields.
    """

    model_config = ConfigDict(
        extra="forbid",
    )

    name: ValidFieldName = Field(
        description=(
            "The name of the field, which must be unique within the schema."
            f" It must match the pattern {valid_field_name_pattern} and cannot"
            f" exceed {max_field_name_length} characters."
        ),
    )
    title: str | None = Field(
        default=None,
        description="A human-readable title for the field.",
    )
    description: str | None = Field(
        default=None, description="A human-readable description of the field."
    )

    constraints: BaseConstraint = Field(
        description="Constraints for the field",
    )

IntegerField

Bases: BaseField

A class representing an integer field in a frictionless schema. This class can be extended to define specific integer fields.

Source code in src/crosscontract/contracts/schema/fields/numeric_field.py
class IntegerField(BaseField):
    """
    A class representing an integer field in a frictionless schema.
    This class can be extended to define specific integer fields.
    """

    type: Literal["integer"] = Field(
        default="integer",
        description="The type of the field, which is 'integer' for this class.",
    )

    constraints: NumericConstraint[int] = Field(default_factory=NumericConstraint[int])

NumberField

Bases: BaseField

A class representing a number field in a frictionless schema. This class can be extended to define specific number fields.

Source code in src/crosscontract/contracts/schema/fields/numeric_field.py
class NumberField(BaseField):
    """
    A class representing a number field in a frictionless schema.
    This class can be extended to define specific number fields.
    """

    type: Literal["number"] = Field(
        default="number",
        description="The type of the field, which is 'number' for this class.",
    )

    constraints: NumericConstraint[float] = Field(
        default_factory=NumericConstraint[float]
    )

StringConstraint

Bases: BaseConstraint

Constraint for string fields. This class can be extended to define specific string constraints.

Source code in src/crosscontract/contracts/schema/fields/string_field.py
class StringConstraint(BaseConstraint):
    """
    Constraint for string fields.
    This class can be extended to define specific string constraints.
    """

    pattern: str | None = Field(
        default=None,
        description=(
            "A regular expression pattern to test each value of the property "
            "against, where a truthy response indicates validity."
        ),
    )

    minLength: int | None = Field(
        default=None,
        description="An integer that specifies the minimum length of a value.",
    )

    maxLength: int | None = Field(
        default=None,
        description="An integer that specifies the maximum length of a value.",
    )

    enum: list[str] | None = Field(default=None, min_length=1)

StringField

Bases: BaseField

A class representing a string field in a frictionless schema. This class can be extended to define specific string fields.

Source code in src/crosscontract/contracts/schema/fields/string_field.py
class StringField(BaseField):
    """
    A class representing a string field in a frictionless schema.
    This class can be extended to define specific string fields.
    """

    type: Literal["string"] = Field(
        default="string",
        description="The type of the field, which is 'string' for this class.",
    )
    constraints: StringConstraint = Field(
        default_factory=StringConstraint,
        description="Constraints for the `string` field",
    )

ListConstraint

Bases: BaseConstraint

ListConstraint defines constraints for a list of items. The items must be of the same type. The default assumed type is "string".

Source code in src/crosscontract/contracts/schema/fields/list_field.py
class ListConstraint(BaseConstraint):
    """ListConstraint defines constraints for a list of items. The items must
    be of the same type. The default assumed type is "string"."""

    minLength: int | None = Field(
        default=None,
        description=(
            "Minimum length of the array, i.e., the minimum number of elements"
            " in the array"
        ),
        ge=0,  # Ensure minLength is non-negative
    )
    maxLength: int | None = Field(
        default=None,
        description=(
            "Maximum length of the array, i.e., the maximum number of elements"
            " in the array"
        ),
        ge=0,  # Ensure maxLength is non-negative
    )

ListField

Bases: BaseField

ListFields store items into a list-like structure. All items in the list must be of the same type. List fields can have constraints on the length of the list.

Source code in src/crosscontract/contracts/schema/fields/list_field.py
class ListField(BaseField):
    """ListFields store items into a list-like structure. All items in the list
    must be of the same type. List fields can have constraints on the length of
    the list.
    """

    type: Literal["list"] = Field(
        default="list",
        description="The type of the field, which is 'list' for this class.",
    )

    itemType: Literal["string", "integer", "number", "boolean"] = Field(
        default="string", description="The type of items in the array"
    )

    constraints: ListConstraint = Field(
        default_factory=ListConstraint,
        description="Constraints for the list field",
    )

Exceptions

SchemaValidationError

Bases: Exception

Source code in src/crosscontract/contracts/schema/exceptions/validation_error.py
class SchemaValidationError(Exception):
    def __init__(
        self,
        message: str,
        schema_errors: pa.errors.SchemaErrors | pa.errors.SchemaError | None = None,
    ):
        """Initialize SchemaValidationError with optional pandera schema errors.

        Args:
            message (str): The error message.
            schema_errors (pa.errors.SchemaErrors, optional): Pandera SchemaErrors
                exception to parse for detailed error information.
        """
        super().__init__(message)
        self.message = message
        self._schema_errors: pa.errors.SchemaErrors | None = (
            self._convert_schema_error(schema_errors) if schema_errors else None
        )
        self._parsed_errors: list[dict[Hashable, Any]] | None = None

    @staticmethod
    def _convert_schema_error(
        error: pa.errors.SchemaError | pa.errors.SchemaErrors,
    ) -> pa.errors.SchemaErrors:
        """Convert a single pandera SchemaError to a SchemaErrors object
        for consistent parsing."""
        match error:
            case pa.errors.SchemaErrors():
                return error
            case pa.errors.SchemaError():
                # Create a SchemaErrors object with a single error case
                converted = pa.errors.SchemaErrors(
                    schema=error.schema, schema_errors=[error], data=pd.DataFrame()
                )
                return converted
            case _:
                raise TypeError(
                    f"Expected SchemaError or SchemaErrors, got {type(error)}"
                )

    @property
    def errors(self) -> list[dict[Hashable, Any]]:
        """Lazy-loads and parses the error details."""
        if self._parsed_errors is None:  # pragma: no cover
            self._parsed_errors = self._parse_pandera_errors()
        return self._parsed_errors

    def to_list(self) -> list[dict[Hashable, Any]]:
        """Return the errors as a list of dictionaries.

        Useful for API responses (JSON serialization).
        Alias for .errors.
        """
        return self.errors

    def to_pandas(self) -> pd.DataFrame:  # pragma: no cover
        """Return the errors as a pandas DataFrame.

        Useful for client-side debugging in Jupyter Notebooks.
        """
        return pd.DataFrame(self.errors)

    def _parse_pandera_errors(self) -> list[dict[Hashable, Any]]:
        """Parse pandera SchemaErrors into a list of error details.

        Returns:
            list[dict[str, Any]]: A list of error details dictionaries.
        """
        if self._schema_errors is None:
            return []
        e = self._schema_errors
        df_failures: pd.DataFrame = e.failure_cases

        # 1. CLEAN TYPE COERCION ERROR: We only keep the rows that failed coercion,
        # but delete the redundant dtype errors (for the whole column)
        coercion_mask = df_failures["check"].str.startswith("coerce_dtype")
        coercion_failed_cols = df_failures[coercion_mask]["column"].unique()
        is_redundant_dtype = (df_failures["check"].str.startswith("dtype")) & (
            df_failures["column"].isin(coercion_failed_cols)
        )
        df_failures = df_failures[~is_redundant_dtype].copy()

        # 2 CLEAN REFERENCE ERRORS
        df_failures = self._parse_reference_errors(df_failures, data=e.data)

        # 3. Format for Output (JSON safe)
        df_errors = (
            df_failures.replace({float("nan"): None})
            .sort_values(by=["check", "index"])
            .to_dict(orient="records")
        )
        return df_errors

    def _parse_reference_errors(
        self, df_failures: pd.DataFrame, data: pd.DataFrame | None
    ) -> pd.DataFrame:
        """Parse pandera SchemaErrors related to foreign key violations by combining
        the error messages for multiple rows into a single message per reference
        violation.

        Note: The function relies on the column names provided in the name of the
        check. They have to be given as:
        "ForeignKeyError: ['col1', 'col2']"
        PrimaryKeyError: ['col1', 'col2']}

        Args:
            df_failure (pd.DataFrame): The DataFrame containing the pandera failure
                cases.
            data (pd.DataFrame | None): The original DataFrame that was validated.

        Returns:
            pd.DataFrame: DataFrame with combined reference error messages.
        """
        reference_errors = ["ForeignKeyError", "PrimaryKeyError"]

        # 1. Identify reference errors
        is_ref_error = df_failures["check"].str.contains(
            "|".join(reference_errors), regex=True
        )
        df_refs = df_failures[is_ref_error].copy()
        # relax the type constraints on the dataframe as we collect all failure
        # cases
        df_others = df_failures[~is_ref_error]

        # relax the type constraints on the dataframe as we collect all failure cases
        df_refs["failure_case"] = df_refs["failure_case"].astype(object)
        df_others = df_others.astype(object)

        if df_refs.empty:
            return df_failures

        # 2. Remove duplicate rows per check type. We only need one row per check
        # and index to report the failure cases
        df_refs = df_refs.drop_duplicates(subset=["check", "index"]).copy()

        # 3. Lookup values of the failure cases from the original data
        for check_name in df_refs["check"].unique():
            target_cols = self._extract_cols(check_name)
            if not target_cols:  # pragma: no cover
                continue

            # Identify rows belonging to the current check
            mask = df_refs["check"] == check_name
            error_indices = df_refs.loc[mask, "index"]

            # Fetch the failure cases from the original data
            # that is only possible if the data are available which is not the
            # case for non-lazy validation where the error is raised immediately
            # upon the first failure and the data are not attached to the error object.
            if data is not None and not data.empty:
                try:
                    # Different handling for pandas and other backends could be
                    # implemented here
                    actual_values = self._lookup_values_pandas(
                        data, error_indices, target_cols
                    )

                    # Assign back to  the failure report
                    df_refs.loc[mask, "failure_case"] = pd.Series(
                        actual_values, index=df_refs.loc[mask].index
                    )
                    df_refs.loc[mask, "column"] = ", ".join(target_cols)
                except KeyError:  # pragma: no cover
                    # Fallback if indices/columns are missing (edge cases)
                    continue

        # 4. Recombine with non-reference errors and return
        df_out = pd.concat([df_others, df_refs], ignore_index=True)
        return df_out

    def _lookup_values_pandas(
        self, data: pd.DataFrame, indices: pd.Series, cols: list[str]
    ) -> list[Any]:
        """Fetch values from the original dataframe. Here it assumed that the
        original dataframe is a pandas DataFrame.

        Note: Implementations for other backends (e.g., Polars) would need to
            provide their own version of this method.

        Args:
            data (pd.DataFrame): The original DataFrame.
            indices (pd.Series): The indices of the rows to fetch.
            cols (list[str]): The columns to fetch.
        """
        subset = data.loc[indices, cols]

        # Handle potential index duplication in source data
        if len(subset) != len(indices):
            # is tested but coverage does not verify this branch
            subset = subset[~subset.index.duplicated(keep="first")]  # pragma: no cover

        # Return as list of strings/tuples
        out_list = list(subset.itertuples(index=False, name=None))
        return out_list

    @staticmethod
    def _extract_cols(check_name: str) -> list[str]:
        """Helper to parse list string from check name.

        Args:
            check_name (str): The name of the check containing the list string.

        Returns:
            list[str]: The parsed list of column names.
        """
        match = re.search(r"(\[.*?\])", str(check_name))
        # note: code is tested but coverage does not verify this branch
        if match:
            try:
                return ast.literal_eval(match.group(1))
            except (ValueError, SyntaxError):  # pragma: no cover
                pass
        return []  # pragma: no cover

errors property

Lazy-loads and parses the error details.

__init__(message, schema_errors=None)

Initialize SchemaValidationError with optional pandera schema errors.

Parameters:

Name Type Description Default
message str

The error message.

required
schema_errors SchemaErrors

Pandera SchemaErrors exception to parse for detailed error information.

None
Source code in src/crosscontract/contracts/schema/exceptions/validation_error.py
def __init__(
    self,
    message: str,
    schema_errors: pa.errors.SchemaErrors | pa.errors.SchemaError | None = None,
):
    """Initialize SchemaValidationError with optional pandera schema errors.

    Args:
        message (str): The error message.
        schema_errors (pa.errors.SchemaErrors, optional): Pandera SchemaErrors
            exception to parse for detailed error information.
    """
    super().__init__(message)
    self.message = message
    self._schema_errors: pa.errors.SchemaErrors | None = (
        self._convert_schema_error(schema_errors) if schema_errors else None
    )
    self._parsed_errors: list[dict[Hashable, Any]] | None = None

to_list()

Return the errors as a list of dictionaries.

Useful for API responses (JSON serialization). Alias for .errors.

Source code in src/crosscontract/contracts/schema/exceptions/validation_error.py
def to_list(self) -> list[dict[Hashable, Any]]:
    """Return the errors as a list of dictionaries.

    Useful for API responses (JSON serialization).
    Alias for .errors.
    """
    return self.errors

to_pandas()

Return the errors as a pandas DataFrame.

Useful for client-side debugging in Jupyter Notebooks.

Source code in src/crosscontract/contracts/schema/exceptions/validation_error.py
def to_pandas(self) -> pd.DataFrame:  # pragma: no cover
    """Return the errors as a pandas DataFrame.

    Useful for client-side debugging in Jupyter Notebooks.
    """
    return pd.DataFrame(self.errors)