Skip to content

Utilities

General helper functions and shared utilities.


csvw_eo.utils

Utility files.

ContributionLevel

Bases: IntEnum

Represents the level at which contribution bounds are applied in CSVW-EO metadata.

Levels: - TABLE: global table-level contribution bounds - COLUMN: per-column contribution bounds - PARTITION: per-partition contribution bounds

Source code in csvw-eo-library/src/csvw_eo/utils.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
class ContributionLevel(IntEnum):
    """
    Represents the level at which contribution bounds are applied in CSVW-EO metadata.

    Levels:
    - TABLE: global table-level contribution bounds
    - COLUMN: per-column contribution bounds
    - PARTITION: per-partition contribution bounds
    """

    TABLE = 0
    TABLE_WITH_KEYS = 1
    COLUMN = 2
    PARTITION = 3

    @classmethod
    def from_str(cls, value: str) -> "ContributionLevel":
        """
        Convert a string representation into a ContributionLevel enum.

        Parameters
        ----------
        value : str
            One of 'table', 'column', 'partition' (case-insensitive).

        Returns
        -------
        ContributionLevel
            Corresponding enum value.

        Raises
        ------
        ValueError
            If the input string does not match any valid level.

        """
        value = value.lower()
        if value == "table":
            return cls.TABLE
        if value == "table_with_keys":
            return cls.TABLE_WITH_KEYS
        if value == "column":
            return cls.COLUMN
        if value == "partition":
            return cls.PARTITION
        raise ValueError(f"Invalid contribution level: {value}")

from_str(value: str) -> ContributionLevel classmethod

Convert a string representation into a ContributionLevel enum.

Parameters:

Name Type Description Default
value str

One of 'table', 'column', 'partition' (case-insensitive).

required

Returns:

Type Description
ContributionLevel

Corresponding enum value.

Raises:

Type Description
ValueError

If the input string does not match any valid level.

Source code in csvw-eo-library/src/csvw_eo/utils.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
@classmethod
def from_str(cls, value: str) -> "ContributionLevel":
    """
    Convert a string representation into a ContributionLevel enum.

    Parameters
    ----------
    value : str
        One of 'table', 'column', 'partition' (case-insensitive).

    Returns
    -------
    ContributionLevel
        Corresponding enum value.

    Raises
    ------
    ValueError
        If the input string does not match any valid level.

    """
    value = value.lower()
    if value == "table":
        return cls.TABLE
    if value == "table_with_keys":
        return cls.TABLE_WITH_KEYS
    if value == "column":
        return cls.COLUMN
    if value == "partition":
        return cls.PARTITION
    raise ValueError(f"Invalid contribution level: {value}")

get_effective_contrib_level(column_name: str, fine_contributions_level: dict[str, ContributionLevel], default_contributions_level: ContributionLevel) -> ContributionLevel

Determine effective contribution level for a column.

Logic: - Take column-specific fine level if it exists, else default. - Return the maximum of column and default (table < column < partition).

Source code in csvw-eo-library/src/csvw_eo/utils.py
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def get_effective_contrib_level(
    column_name: str,
    fine_contributions_level: dict[str, ContributionLevel],
    default_contributions_level: ContributionLevel,
) -> ContributionLevel:
    """
    Determine effective contribution level for a column.

    Logic:
      - Take column-specific fine level if it exists, else default.
      - Return the maximum of column and default (table < column < partition).
    """
    fine_level = fine_contributions_level.get(column_name, ContributionLevel.TABLE)
    return max(fine_level, default_contributions_level)

get_group_contribution_level(col_group: list[str], fine_contributions_level: dict[str, ContributionLevel], default_contributions_level: ContributionLevel) -> ContributionLevel

Determine the effective contribution level for a column group.

Source code in csvw-eo-library/src/csvw_eo/utils.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def get_group_contribution_level(
    col_group: list[str],
    fine_contributions_level: dict[str, ContributionLevel],
    default_contributions_level: ContributionLevel,
) -> ContributionLevel:
    """Determine the effective contribution level for a column group."""
    levels = [
        get_effective_contrib_level(col, fine_contributions_level, default_contributions_level)
        for col in col_group
    ]

    if any(level == ContributionLevel.TABLE for level in levels):
        raise ValueError(
            f"Invalid contribution level in ColumnGroup {col_group}: contains TABLE-level column."
        )

    # TABLE < TABLE_WITH_KEYS < COLUMN < PARTITION
    return min(levels)

prepare_metadata_inputs(default_contributions_level: str, fine_contributions_level: dict[str, str] | None, continuous_partitions: dict[str, list[Any]] | None, column_groups: list[list[str]] | None) -> tuple[ContributionLevel, dict[str, ContributionLevel], dict[str, list[Any]], list[list[str]]]

Normalize optional metadata configuration inputs.

This helper ensures that optional parameters are initialized with appropriate defaults and applies implicit rules required by the metadata generation process.

In particular: - Missing dictionaries/lists are replaced with empty structures. - Columns with numeric partitions are automatically treated at partition-level contribution granularity.

Parameters:

Name Type Description Default
default_contributions_level str

Default contribution level applied when no column-specific override exists.

required
fine_contributions_level dict[str, str] or None

Optional mapping specifying per-column contribution levels. Values must be one of {"table", "column", "partition"}.

required
continuous_partitions dict[str, list[Any]] or None

Mapping of numeric column names to bin boundaries used to generate partitions.

required
column_groups list[list[str]] or None

List of column groups used to create joint partitions.

required

Returns:

Type Description
tuple

A tuple containing normalized versions of: - default_level : ContributionLevel - fine_level : dict[str, ContributionLevel] - continuous_partitions : dict[str, list[Any]] - column_groups : list[list[str]]

Source code in csvw-eo-library/src/csvw_eo/utils.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def prepare_metadata_inputs(
    default_contributions_level: str,
    fine_contributions_level: dict[str, str] | None,
    continuous_partitions: dict[str, list[Any]] | None,
    column_groups: list[list[str]] | None,
) -> tuple[
    ContributionLevel,
    dict[str, ContributionLevel],
    dict[str, list[Any]],
    list[list[str]],
]:
    """
    Normalize optional metadata configuration inputs.

    This helper ensures that optional parameters are initialized with
    appropriate defaults and applies implicit rules required by the
    metadata generation process.

    In particular:
    - Missing dictionaries/lists are replaced with empty structures.
    - Columns with numeric partitions are automatically treated at
      partition-level contribution granularity.

    Parameters
    ----------
    default_contributions_level : str
        Default contribution level applied when no column-specific override exists.
    fine_contributions_level : dict[str, str] or None
        Optional mapping specifying per-column contribution levels.
        Values must be one of {"table", "column", "partition"}.
    continuous_partitions : dict[str, list[Any]] or None
        Mapping of numeric column names to bin boundaries used
        to generate partitions.
    column_groups : list[list[str]] or None
        List of column groups used to create joint partitions.

    Returns
    -------
    tuple
        A tuple containing normalized versions of:
        - default_level : ContributionLevel
        - fine_level : dict[str, ContributionLevel]
        - continuous_partitions : dict[str, list[Any]]
        - column_groups : list[list[str]]

    """
    default_level = ContributionLevel.from_str(default_contributions_level)

    if continuous_partitions is None:
        continuous_partitions = {}

    if column_groups is None:
        column_groups = []

    if fine_contributions_level is None:
        fine_level = {}
    else:
        fine_level = {k: ContributionLevel.from_str(v) for k, v in fine_contributions_level.items()}

    for col in continuous_partitions:  # Continuous bounds default the column to partition level
        fine_level[col] = ContributionLevel.PARTITION

    return default_level, fine_level, continuous_partitions, column_groups

sanitize(obj: dict[str, Any]) -> dict[str, Any]

Recursively convert objects into JSON/CSVW-EO serializable types.

  • NumPy scalars → Python scalars
  • NaN or Inf → None
  • Other types remain unchanged
Source code in csvw-eo-library/src/csvw_eo/utils.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def sanitize(obj: dict[str, Any]) -> dict[str, Any]:
    """
    Recursively convert objects into JSON/CSVW-EO serializable types.

    - NumPy scalars → Python scalars
    - NaN or Inf → None
    - Other types remain unchanged
    """
    if isinstance(obj, dict):
        return {k: sanitize(v) for k, v in obj.items()}
    if isinstance(obj, list):
        return [sanitize(v) for v in obj]
    if isinstance(obj, np.generic):
        obj = obj.item()  # convert NumPy scalar to native Python

    if isinstance(obj, float):
        if math.isnan(obj) or math.isinf(obj):
            raise ValueError("Value in Nan or infinite")
        return obj  # keep as float

    return obj  # leave everything else unchanged