Skip to content

Metadata Models

Core metadata models and schema definitions.


Metadata Structure

csvw_eo.metadata_structure

Pydantic models for CSVW-EO metadata structure.

CategoricalPredicate

Bases: BaseModel

Predicate describing how a categorical partition is defined.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
45
46
47
48
49
50
51
52
53
54
55
56
57
class CategoricalPredicate(BaseModel):
    """Predicate describing how a categorical partition is defined."""

    partition_value: Any | None

    def to_dict(self) -> dict[str, Any]:
        """Convert the predicate into CSVW-EO JSON format."""
        return {c.PARTITION_VALUE: self.partition_value}

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "CategoricalPredicate":
        """Create a Predicate from CSVW-EO metadata."""
        return cls(partition_value=data[c.PARTITION_VALUE])

from_dict(data: dict[str, Any]) -> CategoricalPredicate classmethod

Create a Predicate from CSVW-EO metadata.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
54
55
56
57
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "CategoricalPredicate":
    """Create a Predicate from CSVW-EO metadata."""
    return cls(partition_value=data[c.PARTITION_VALUE])

to_dict() -> dict[str, Any]

Convert the predicate into CSVW-EO JSON format.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
50
51
52
def to_dict(self) -> dict[str, Any]:
    """Convert the predicate into CSVW-EO JSON format."""
    return {c.PARTITION_VALUE: self.partition_value}

ColumnGroupMetadata

Bases: BaseModel

Metadata describing a group of columns that share partition definitions.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
class ColumnGroupMetadata(BaseModel):
    """Metadata describing a group of columns that share partition definitions."""

    columns: list[str]

    # one of the two is necessary
    partitions: list[MultiColumnPartition] | None = None
    exhaustive_partitions: bool | None = None

    public_keys_values: list[MultiColumnKeys] | None = None
    exhaustive_keys: bool | None = None
    invariant_public_keys: bool | None = None

    max_num_partitions: int | None = None
    public_keys_invariant: bool | None = None

    max_length: int | None = None
    max_groups_per_unit: int | None = None
    max_contributions: int | None = None

    def to_dict(self) -> dict[str, Any]:
        """Serialize the column group metadata."""
        result: dict[str, Any] = {
            "@type": c.COLUMN_GROUP,
            c.COLUMNS_IN_GROUP: self.columns,
        }

        if self.partitions is not None:
            result[c.PUBLIC_PARTITIONS] = [p.to_dict() for p in self.partitions]

        if self.exhaustive_partitions is not None:
            result[c.EXHAUSTIVE_PARTITIONS] = self.exhaustive_partitions

        if self.public_keys_values is not None:
            result[c.KEY_VALUES] = [k.to_dict() for k in self.public_keys_values]

        if self.invariant_public_keys is not None:
            result[c.INVARIANT_PUBLIC_KEYS] = self.invariant_public_keys

        if self.exhaustive_keys is not None:
            result[c.EXHAUSTIVE_KEYS] = self.exhaustive_keys

        if self.max_num_partitions is not None:
            result[c.MAX_NUM_PARTITIONS] = self.max_num_partitions

        if self.max_length is not None:
            result[c.MAX_LENGTH] = self.max_length

        if self.max_groups_per_unit is not None:
            result[c.MAX_GROUPS] = self.max_groups_per_unit

        if self.max_contributions is not None:
            result[c.MAX_CONTRIB] = self.max_contributions

        return result

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "ColumnGroupMetadata":
        """Parse grouped column metadata from JSON."""
        col_group_metadata = ColumnGroupMetadata(
            columns=data[c.COLUMNS_IN_GROUP],
            max_num_partitions=data.get(c.MAX_NUM_PARTITIONS),
            max_length=data.get(c.MAX_LENGTH),
            max_groups_per_unit=data.get(c.MAX_GROUPS),
            max_contributions=data.get(c.MAX_CONTRIB),
            exhaustive_keys=data.get(c.EXHAUSTIVE_KEYS),
            exhaustive_partitions=data.get(c.EXHAUSTIVE_PARTITIONS),
            invariant_public_keys=data.get(c.INVARIANT_PUBLIC_KEYS),
        )
        raw_partitions = data.get(c.PUBLIC_PARTITIONS)
        raw_public_keys_values = data.get(c.KEY_VALUES)

        if raw_partitions:
            col_group_metadata.partitions = [MultiColumnPartition.from_dict(p) for p in raw_partitions]
        if raw_public_keys_values:
            col_group_metadata.public_keys_values = [
                MultiColumnKeys.from_dict(p) for p in raw_public_keys_values
            ]

        return col_group_metadata

from_dict(data: dict[str, Any]) -> ColumnGroupMetadata classmethod

Parse grouped column metadata from JSON.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "ColumnGroupMetadata":
    """Parse grouped column metadata from JSON."""
    col_group_metadata = ColumnGroupMetadata(
        columns=data[c.COLUMNS_IN_GROUP],
        max_num_partitions=data.get(c.MAX_NUM_PARTITIONS),
        max_length=data.get(c.MAX_LENGTH),
        max_groups_per_unit=data.get(c.MAX_GROUPS),
        max_contributions=data.get(c.MAX_CONTRIB),
        exhaustive_keys=data.get(c.EXHAUSTIVE_KEYS),
        exhaustive_partitions=data.get(c.EXHAUSTIVE_PARTITIONS),
        invariant_public_keys=data.get(c.INVARIANT_PUBLIC_KEYS),
    )
    raw_partitions = data.get(c.PUBLIC_PARTITIONS)
    raw_public_keys_values = data.get(c.KEY_VALUES)

    if raw_partitions:
        col_group_metadata.partitions = [MultiColumnPartition.from_dict(p) for p in raw_partitions]
    if raw_public_keys_values:
        col_group_metadata.public_keys_values = [
            MultiColumnKeys.from_dict(p) for p in raw_public_keys_values
        ]

    return col_group_metadata

to_dict() -> dict[str, Any]

Serialize the column group metadata.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
def to_dict(self) -> dict[str, Any]:
    """Serialize the column group metadata."""
    result: dict[str, Any] = {
        "@type": c.COLUMN_GROUP,
        c.COLUMNS_IN_GROUP: self.columns,
    }

    if self.partitions is not None:
        result[c.PUBLIC_PARTITIONS] = [p.to_dict() for p in self.partitions]

    if self.exhaustive_partitions is not None:
        result[c.EXHAUSTIVE_PARTITIONS] = self.exhaustive_partitions

    if self.public_keys_values is not None:
        result[c.KEY_VALUES] = [k.to_dict() for k in self.public_keys_values]

    if self.invariant_public_keys is not None:
        result[c.INVARIANT_PUBLIC_KEYS] = self.invariant_public_keys

    if self.exhaustive_keys is not None:
        result[c.EXHAUSTIVE_KEYS] = self.exhaustive_keys

    if self.max_num_partitions is not None:
        result[c.MAX_NUM_PARTITIONS] = self.max_num_partitions

    if self.max_length is not None:
        result[c.MAX_LENGTH] = self.max_length

    if self.max_groups_per_unit is not None:
        result[c.MAX_GROUPS] = self.max_groups_per_unit

    if self.max_contributions is not None:
        result[c.MAX_CONTRIB] = self.max_contributions

    return result

ColumnMetadata

Bases: BaseModel

Metadata describing a single table column.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
class ColumnMetadata(BaseModel):
    """Metadata describing a single table column."""

    name: str
    datatype: DataTypes

    required: bool | None = None
    privacy_id: bool | None = None
    nullable_proportion: float | None = None

    dependencies: list[Dependency] = Field(default_factory=list)

    minimum: Any | None = None
    maximum: Any | None = None

    max_length: int | None = None
    max_groups_per_unit: int | None = None
    max_contributions: int | None = None

    partitions: list[SingleColumnPartition] | None = None
    exhaustive_partitions: bool | None = None

    public_keys_values: list[SingleColumnKey] | None = None
    exhaustive_keys: bool | None = None
    invariant_public_keys: bool | None = None

    max_num_partitions: int | None = None

    def to_dict(self) -> dict[str, Any]:  # noqa: PLR0912
        """Convert the column metadata to CSVW-EO JSON format."""
        d: dict[str, Any] = {
            "@type": c.COL_TYPE,
            c.COL_NAME: self.name,
            c.DATATYPE: self.datatype,
            c.REQUIRED: self.required,
            c.PRIVACY_ID: self.privacy_id,
            c.NULL_PROP: self.nullable_proportion,
        }
        if self.required:
            d[c.REQUIRED] = self.required

        if self.privacy_id:
            d[c.PRIVACY_ID] = self.privacy_id

        if self.nullable_proportion:
            d[c.NULL_PROP] = self.nullable_proportion

        if self.dependencies:
            d[c.ROW_DEP] = [dep.to_dict() for dep in self.dependencies]

        if self.minimum is not None:
            d[c.MINIMUM] = self.minimum

        if self.maximum is not None:
            d[c.MAXIMUM] = self.maximum

        if self.partitions is not None:
            d[c.PUBLIC_PARTITIONS] = [p.to_dict() for p in self.partitions]

        if self.exhaustive_partitions is not None:
            d[c.EXHAUSTIVE_PARTITIONS] = self.exhaustive_partitions

        if self.public_keys_values is not None:
            d[c.KEY_VALUES] = [p.to_dict() for p in self.public_keys_values]

        if self.invariant_public_keys is not None:
            d[c.INVARIANT_PUBLIC_KEYS] = self.invariant_public_keys

        if self.exhaustive_keys is not None:
            d[c.EXHAUSTIVE_KEYS] = self.exhaustive_keys

        if self.max_num_partitions is not None:
            d[c.MAX_NUM_PARTITIONS] = self.max_num_partitions

        if self.max_length is not None:
            d[c.MAX_LENGTH] = self.max_length

        if self.max_groups_per_unit is not None:
            d[c.MAX_GROUPS] = self.max_groups_per_unit

        if self.max_contributions is not None:
            d[c.MAX_CONTRIB] = self.max_contributions

        return d

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "ColumnMetadata":
        """
        Parse column metadata from CSVW-EO JSON.

        Parameters
        ----------
        data : dict
            Dictionary containing the serialized column metadata.

        Returns
        -------
        ColumnMetadata

        """
        deps = [Dependency.from_dict(d) for d in data.get(c.ROW_DEP, [])]

        col_metadata = ColumnMetadata(
            name=data[c.COL_NAME],
            datatype=data[c.DATATYPE],
            required=data.get(c.REQUIRED),
            privacy_id=data.get(c.PRIVACY_ID),
            nullable_proportion=data.get(c.NULL_PROP),
            dependencies=deps,
            minimum=data.get(c.MINIMUM),
            maximum=data.get(c.MAXIMUM),
            max_num_partitions=data.get(c.MAX_NUM_PARTITIONS),
            max_length=data.get(c.MAX_LENGTH),
            max_groups_per_unit=data.get(c.MAX_GROUPS),
            max_contributions=data.get(c.MAX_CONTRIB),
            exhaustive_keys=data.get(c.EXHAUSTIVE_KEYS),
            exhaustive_partitions=data.get(c.EXHAUSTIVE_PARTITIONS),
            invariant_public_keys=data.get(c.INVARIANT_PUBLIC_KEYS),
        )

        raw_partitions = data.get(c.PUBLIC_PARTITIONS)
        raw_public_keys_values = data.get(c.KEY_VALUES)

        if raw_partitions:
            col_metadata.partitions = [SingleColumnPartition.from_dict(p) for p in raw_partitions]
        if raw_public_keys_values:
            col_metadata.public_keys_values = [SingleColumnKey.from_dict(p) for p in raw_public_keys_values]

        return col_metadata

from_dict(data: dict[str, Any]) -> ColumnMetadata classmethod

Parse column metadata from CSVW-EO JSON.

Parameters:

Name Type Description Default
data dict

Dictionary containing the serialized column metadata.

required

Returns:

Type Description
ColumnMetadata
Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "ColumnMetadata":
    """
    Parse column metadata from CSVW-EO JSON.

    Parameters
    ----------
    data : dict
        Dictionary containing the serialized column metadata.

    Returns
    -------
    ColumnMetadata

    """
    deps = [Dependency.from_dict(d) for d in data.get(c.ROW_DEP, [])]

    col_metadata = ColumnMetadata(
        name=data[c.COL_NAME],
        datatype=data[c.DATATYPE],
        required=data.get(c.REQUIRED),
        privacy_id=data.get(c.PRIVACY_ID),
        nullable_proportion=data.get(c.NULL_PROP),
        dependencies=deps,
        minimum=data.get(c.MINIMUM),
        maximum=data.get(c.MAXIMUM),
        max_num_partitions=data.get(c.MAX_NUM_PARTITIONS),
        max_length=data.get(c.MAX_LENGTH),
        max_groups_per_unit=data.get(c.MAX_GROUPS),
        max_contributions=data.get(c.MAX_CONTRIB),
        exhaustive_keys=data.get(c.EXHAUSTIVE_KEYS),
        exhaustive_partitions=data.get(c.EXHAUSTIVE_PARTITIONS),
        invariant_public_keys=data.get(c.INVARIANT_PUBLIC_KEYS),
    )

    raw_partitions = data.get(c.PUBLIC_PARTITIONS)
    raw_public_keys_values = data.get(c.KEY_VALUES)

    if raw_partitions:
        col_metadata.partitions = [SingleColumnPartition.from_dict(p) for p in raw_partitions]
    if raw_public_keys_values:
        col_metadata.public_keys_values = [SingleColumnKey.from_dict(p) for p in raw_public_keys_values]

    return col_metadata

to_dict() -> dict[str, Any]

Convert the column metadata to CSVW-EO JSON format.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
def to_dict(self) -> dict[str, Any]:  # noqa: PLR0912
    """Convert the column metadata to CSVW-EO JSON format."""
    d: dict[str, Any] = {
        "@type": c.COL_TYPE,
        c.COL_NAME: self.name,
        c.DATATYPE: self.datatype,
        c.REQUIRED: self.required,
        c.PRIVACY_ID: self.privacy_id,
        c.NULL_PROP: self.nullable_proportion,
    }
    if self.required:
        d[c.REQUIRED] = self.required

    if self.privacy_id:
        d[c.PRIVACY_ID] = self.privacy_id

    if self.nullable_proportion:
        d[c.NULL_PROP] = self.nullable_proportion

    if self.dependencies:
        d[c.ROW_DEP] = [dep.to_dict() for dep in self.dependencies]

    if self.minimum is not None:
        d[c.MINIMUM] = self.minimum

    if self.maximum is not None:
        d[c.MAXIMUM] = self.maximum

    if self.partitions is not None:
        d[c.PUBLIC_PARTITIONS] = [p.to_dict() for p in self.partitions]

    if self.exhaustive_partitions is not None:
        d[c.EXHAUSTIVE_PARTITIONS] = self.exhaustive_partitions

    if self.public_keys_values is not None:
        d[c.KEY_VALUES] = [p.to_dict() for p in self.public_keys_values]

    if self.invariant_public_keys is not None:
        d[c.INVARIANT_PUBLIC_KEYS] = self.invariant_public_keys

    if self.exhaustive_keys is not None:
        d[c.EXHAUSTIVE_KEYS] = self.exhaustive_keys

    if self.max_num_partitions is not None:
        d[c.MAX_NUM_PARTITIONS] = self.max_num_partitions

    if self.max_length is not None:
        d[c.MAX_LENGTH] = self.max_length

    if self.max_groups_per_unit is not None:
        d[c.MAX_GROUPS] = self.max_groups_per_unit

    if self.max_contributions is not None:
        d[c.MAX_CONTRIB] = self.max_contributions

    return d

ContinuousPredicate

Bases: BaseModel

Predicate describing how a continuous partition is defined.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
class ContinuousPredicate(BaseModel):
    """Predicate describing how a continuous partition is defined."""

    lower_bound: float | str | None  # TODO type
    upper_bound: float | str | None

    def to_dict(self) -> dict[str, Any]:
        """Convert the predicate into CSVW-EO JSON format."""
        return {
            c.LOWER_BOUND: self.lower_bound,
            c.UPPER_BOUND: self.upper_bound,
        }

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "ContinuousPredicate":
        """Create a Predicate from CSVW-EO metadata."""
        return cls(
            lower_bound=data[c.LOWER_BOUND],
            upper_bound=data[c.UPPER_BOUND],
        )

from_dict(data: dict[str, Any]) -> ContinuousPredicate classmethod

Create a Predicate from CSVW-EO metadata.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
73
74
75
76
77
78
79
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "ContinuousPredicate":
    """Create a Predicate from CSVW-EO metadata."""
    return cls(
        lower_bound=data[c.LOWER_BOUND],
        upper_bound=data[c.UPPER_BOUND],
    )

to_dict() -> dict[str, Any]

Convert the predicate into CSVW-EO JSON format.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
66
67
68
69
70
71
def to_dict(self) -> dict[str, Any]:
    """Convert the predicate into CSVW-EO JSON format."""
    return {
        c.LOWER_BOUND: self.lower_bound,
        c.UPPER_BOUND: self.upper_bound,
    }

Dependency

Bases: BaseModel

Row-level dependency between two columns.

Represents relationships such as mappings or constraints where the value of one column depends on the value of another column.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
class Dependency(BaseModel):
    """
    Row-level dependency between two columns.

    Represents relationships such as mappings or constraints where the value
    of one column depends on the value of another column.
    """

    depends_on: str
    dependency_type: c.DependencyType
    value_map: dict[Any, Any] | None = None

    def to_dict(self) -> dict[str, Any]:
        """Convert the dependency to a CSVW-EO compliant dictionary."""
        d: dict[str, Any] = {
            c.DEPENDS_ON: self.depends_on,
            c.DEPENDENCY_TYPE: self.dependency_type,
        }

        if self.value_map is not None:
            d[c.VALUE_MAP] = self.value_map

        return d

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "Dependency":
        """Create a Dependency instance from CSVW-EO metadata."""
        return cls(
            depends_on=data[c.DEPENDS_ON],
            dependency_type=data[c.DEPENDENCY_TYPE],
            value_map=data.get(c.VALUE_MAP),
        )

from_dict(data: dict[str, Any]) -> Dependency classmethod

Create a Dependency instance from CSVW-EO metadata.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
35
36
37
38
39
40
41
42
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "Dependency":
    """Create a Dependency instance from CSVW-EO metadata."""
    return cls(
        depends_on=data[c.DEPENDS_ON],
        dependency_type=data[c.DEPENDENCY_TYPE],
        value_map=data.get(c.VALUE_MAP),
    )

to_dict() -> dict[str, Any]

Convert the dependency to a CSVW-EO compliant dictionary.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
23
24
25
26
27
28
29
30
31
32
33
def to_dict(self) -> dict[str, Any]:
    """Convert the dependency to a CSVW-EO compliant dictionary."""
    d: dict[str, Any] = {
        c.DEPENDS_ON: self.depends_on,
        c.DEPENDENCY_TYPE: self.dependency_type,
    }

    if self.value_map is not None:
        d[c.VALUE_MAP] = self.value_map

    return d

MultiColumnKeys

Bases: BaseModel

Partition defined for multiple columns with only key informations.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
207
208
209
210
211
212
213
214
215
216
217
218
219
220
class MultiColumnKeys(BaseModel):
    """Partition defined for multiple columns with only key informations."""

    predicate: dict[str, Predicate]

    def to_dict(self) -> dict[str, Any]:
        """Convert the partition to CSVW-EO JSON format."""
        return {k: v.to_dict() for k, v in self.predicate.items()}

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "MultiColumnKeys":
        """Create a MultiColumnKeys from CSVW-EO metadata."""
        predicates = {k: parse_predicate(v) for k, v in data.items()}
        return cls(predicate=predicates)

from_dict(data: dict[str, Any]) -> MultiColumnKeys classmethod

Create a MultiColumnKeys from CSVW-EO metadata.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
216
217
218
219
220
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "MultiColumnKeys":
    """Create a MultiColumnKeys from CSVW-EO metadata."""
    predicates = {k: parse_predicate(v) for k, v in data.items()}
    return cls(predicate=predicates)

to_dict() -> dict[str, Any]

Convert the partition to CSVW-EO JSON format.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
212
213
214
def to_dict(self) -> dict[str, Any]:
    """Convert the partition to CSVW-EO JSON format."""
    return {k: v.to_dict() for k, v in self.predicate.items()}

MultiColumnPartition

Bases: Partition

Partition defined across multiple columns with details.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
class MultiColumnPartition(Partition):
    """Partition defined across multiple columns with details."""

    predicate: dict[str, Predicate]

    def _predicate_to_dict(self) -> dict[str, Any]:
        return {k: v.to_dict() for k, v in self.predicate.items()}

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "MultiColumnPartition":
        """
        Parse a multi-column partition from metadata.

        Parameters
        ----------
        data : dict
            Dictionary containing the serialized multi-column partition metadata.

        Returns
        -------
        MultiColumnPartition

        """
        predicates = {k: parse_predicate(v) for k, v in data[c.PREDICATE].items()}

        return cls(
            predicate=predicates,
            max_length=data[c.MAX_LENGTH],
            max_groups_per_unit=data[c.MAX_GROUPS],
            max_contributions=data[c.MAX_CONTRIB],
        )

from_dict(data: dict[str, Any]) -> MultiColumnPartition classmethod

Parse a multi-column partition from metadata.

Parameters:

Name Type Description Default
data dict

Dictionary containing the serialized multi-column partition metadata.

required

Returns:

Type Description
MultiColumnPartition
Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "MultiColumnPartition":
    """
    Parse a multi-column partition from metadata.

    Parameters
    ----------
    data : dict
        Dictionary containing the serialized multi-column partition metadata.

    Returns
    -------
    MultiColumnPartition

    """
    predicates = {k: parse_predicate(v) for k, v in data[c.PREDICATE].items()}

    return cls(
        predicate=predicates,
        max_length=data[c.MAX_LENGTH],
        max_groups_per_unit=data[c.MAX_GROUPS],
        max_contributions=data[c.MAX_CONTRIB],
    )

Partition

Bases: BaseModel

Base class for partition metadata.

Partitions define how data is grouped when enforcing privacy constraints.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
class Partition(BaseModel):
    """
    Base class for partition metadata.

    Partitions define how data is grouped when enforcing privacy constraints.
    """

    max_length: int
    max_groups_per_unit: int
    max_contributions: int

    def _predicate_to_dict(self) -> dict[str, Any]:
        """Serialize the predicate component."""
        raise NotImplementedError

    def to_dict(self) -> dict[str, Any]:
        """Convert the partition to CSVW-EO JSON format."""
        return {
            "@type": c.PARTITION,
            c.PREDICATE: self._predicate_to_dict(),
            c.MAX_LENGTH: self.max_length,
            c.MAX_GROUPS: self.max_groups_per_unit,
            c.MAX_CONTRIB: self.max_contributions,
        }

to_dict() -> dict[str, Any]

Convert the partition to CSVW-EO JSON format.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
107
108
109
110
111
112
113
114
115
def to_dict(self) -> dict[str, Any]:
    """Convert the partition to CSVW-EO JSON format."""
    return {
        "@type": c.PARTITION,
        c.PREDICATE: self._predicate_to_dict(),
        c.MAX_LENGTH: self.max_length,
        c.MAX_GROUPS: self.max_groups_per_unit,
        c.MAX_CONTRIB: self.max_contributions,
    }

SingleColumnKey

Bases: BaseModel

Partition defined for a single column with only key informations.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
class SingleColumnKey(BaseModel):
    """Partition defined for a single column with only key informations."""

    predicate: Predicate

    def to_dict(self) -> Any:  # noqa: ANN401
        """
        Convert a categorical partition to CSVW-EO JSON format.

        Returns:
            The partition value (e.g., 'blue').

        """
        if not isinstance(self.predicate, CategoricalPredicate):
            raise TypeError(f"Expected CategoricalPredicate, got {type(self.predicate).__name__}")

        return self.predicate.partition_value

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "SingleColumnKey":
        """
        Create a SingleColumnKey from a categorical JSON value.

        Args:
            data: A raw categorical value (e.g., 'blue').

        Returns:
            SingleColumnKey with a CategoricalPredicate.

        Raises:
            TypeError: If input is not a categorical value.

        """
        pred = CategoricalPredicate(partition_value=data)
        return cls(predicate=pred)

from_dict(data: dict[str, Any]) -> SingleColumnKey classmethod

Create a SingleColumnKey from a categorical JSON value.

Args: data: A raw categorical value (e.g., 'blue').

Returns: SingleColumnKey with a CategoricalPredicate.

Raises: TypeError: If input is not a categorical value.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "SingleColumnKey":
    """
    Create a SingleColumnKey from a categorical JSON value.

    Args:
        data: A raw categorical value (e.g., 'blue').

    Returns:
        SingleColumnKey with a CategoricalPredicate.

    Raises:
        TypeError: If input is not a categorical value.

    """
    pred = CategoricalPredicate(partition_value=data)
    return cls(predicate=pred)

to_dict() -> Any

Convert a categorical partition to CSVW-EO JSON format.

Returns: The partition value (e.g., 'blue').

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
175
176
177
178
179
180
181
182
183
184
185
186
def to_dict(self) -> Any:  # noqa: ANN401
    """
    Convert a categorical partition to CSVW-EO JSON format.

    Returns:
        The partition value (e.g., 'blue').

    """
    if not isinstance(self.predicate, CategoricalPredicate):
        raise TypeError(f"Expected CategoricalPredicate, got {type(self.predicate).__name__}")

    return self.predicate.partition_value

SingleColumnPartition

Bases: Partition

Partition defined for a single column with details.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
class SingleColumnPartition(Partition):
    """Partition defined for a single column with details."""

    predicate: Predicate

    def _predicate_to_dict(self) -> dict[str, Any]:
        return self.predicate.to_dict()

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "SingleColumnPartition":
        """Parse a single-column partition from metadata."""
        return cls(
            predicate=parse_predicate(data[c.PREDICATE]),
            max_length=data[c.MAX_LENGTH],
            max_groups_per_unit=data[c.MAX_GROUPS],
            max_contributions=data[c.MAX_CONTRIB],
        )

from_dict(data: dict[str, Any]) -> SingleColumnPartition classmethod

Parse a single-column partition from metadata.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
126
127
128
129
130
131
132
133
134
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "SingleColumnPartition":
    """Parse a single-column partition from metadata."""
    return cls(
        predicate=parse_predicate(data[c.PREDICATE]),
        max_length=data[c.MAX_LENGTH],
        max_groups_per_unit=data[c.MAX_GROUPS],
        max_contributions=data[c.MAX_CONTRIB],
    )

TableMetadata

Bases: BaseModel

Top-level metadata object describing a CSVW-EO table.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
class TableMetadata(BaseModel):
    """Top-level metadata object describing a CSVW-EO table."""

    privacy_unit: str | None = None
    max_contributions: int | None = None
    max_length: int | None = None
    public_length: int | None = None

    columns: list[ColumnMetadata] = Field(default_factory=list)
    column_groups: list[ColumnGroupMetadata] | None = None

    context: list[str] = Field(default_factory=lambda: [c.CSVW_CONTEXT, c.CSVW_SAFE_CONTEXT])

    table_type: str = c.TABLE_TYPE

    def to_dict(self) -> dict[str, Any]:
        """Serialize the full metadata object to CSVW-EO JSON."""
        d: dict[str, Any] = {
            "@context": self.context,
            "@type": self.table_type,
            c.PRIVACY_UNIT: self.privacy_unit,
            c.MAX_CONTRIB: self.max_contributions,
            c.MAX_LENGTH: self.max_length,
            c.PUBLIC_LENGTH: self.public_length,
            c.TABLE_SCHEMA: {c.COL_LIST: [col.to_dict() for col in self.columns]},
        }

        if self.column_groups is not None:
            d[c.ADD_INFO] = [group.to_dict() for group in self.column_groups]

        return d

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "TableMetadata":
        """
        Parse a CSVW-EO metadata document.

        Parameters
        ----------
        data : dict
            JSON metadata structure.

        Returns
        -------
        TableMetadata

        """
        schema = data[c.TABLE_SCHEMA]

        columns = [ColumnMetadata.from_dict(c) for c in schema[c.COL_LIST]]

        column_groups = None
        if c.ADD_INFO in data:
            column_groups = [ColumnGroupMetadata.from_dict(g) for g in data[c.ADD_INFO]]

        return cls(
            privacy_unit=data.get(c.PRIVACY_UNIT),
            max_contributions=data.get(c.MAX_CONTRIB),
            max_length=data.get(c.MAX_LENGTH),
            public_length=data.get(c.PUBLIC_LENGTH),
            columns=columns,
            column_groups=column_groups,
            context=data.get("@context", []),
            table_type=data.get("@type", c.TABLE_TYPE),
        )

from_dict(data: dict[str, Any]) -> TableMetadata classmethod

Parse a CSVW-EO metadata document.

Parameters:

Name Type Description Default
data dict

JSON metadata structure.

required

Returns:

Type Description
TableMetadata
Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "TableMetadata":
    """
    Parse a CSVW-EO metadata document.

    Parameters
    ----------
    data : dict
        JSON metadata structure.

    Returns
    -------
    TableMetadata

    """
    schema = data[c.TABLE_SCHEMA]

    columns = [ColumnMetadata.from_dict(c) for c in schema[c.COL_LIST]]

    column_groups = None
    if c.ADD_INFO in data:
        column_groups = [ColumnGroupMetadata.from_dict(g) for g in data[c.ADD_INFO]]

    return cls(
        privacy_unit=data.get(c.PRIVACY_UNIT),
        max_contributions=data.get(c.MAX_CONTRIB),
        max_length=data.get(c.MAX_LENGTH),
        public_length=data.get(c.PUBLIC_LENGTH),
        columns=columns,
        column_groups=column_groups,
        context=data.get("@context", []),
        table_type=data.get("@type", c.TABLE_TYPE),
    )

to_dict() -> dict[str, Any]

Serialize the full metadata object to CSVW-EO JSON.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
def to_dict(self) -> dict[str, Any]:
    """Serialize the full metadata object to CSVW-EO JSON."""
    d: dict[str, Any] = {
        "@context": self.context,
        "@type": self.table_type,
        c.PRIVACY_UNIT: self.privacy_unit,
        c.MAX_CONTRIB: self.max_contributions,
        c.MAX_LENGTH: self.max_length,
        c.PUBLIC_LENGTH: self.public_length,
        c.TABLE_SCHEMA: {c.COL_LIST: [col.to_dict() for col in self.columns]},
    }

    if self.column_groups is not None:
        d[c.ADD_INFO] = [group.to_dict() for group in self.column_groups]

    return d

full_partition_to_key_multi(partitions: list[MultiColumnPartition]) -> list[MultiColumnKeys]

Convert a list of MultiColumnPartition to MultiColumnKeys,.

keeping only predicate information.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
234
235
236
237
238
239
240
241
242
def full_partition_to_key_multi(
    partitions: list[MultiColumnPartition],
) -> list[MultiColumnKeys]:
    """
    Convert a list of MultiColumnPartition to MultiColumnKeys,.

    keeping only predicate information.
    """
    return [MultiColumnKeys(predicate=p.predicate) for p in partitions]

full_partition_to_key_single(partitions: list[SingleColumnPartition]) -> list[SingleColumnKey]

Convert a list of SingleColumnPartition to SingleColumnKey,.

keeping only predicate information.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
223
224
225
226
227
228
229
230
231
def full_partition_to_key_single(
    partitions: list[SingleColumnPartition],
) -> list[SingleColumnKey]:
    """
    Convert a list of SingleColumnPartition to SingleColumnKey,.

    keeping only predicate information.
    """
    return [SingleColumnKey(predicate=p.predicate) for p in partitions]

parse_predicate(data: dict[str, Any]) -> Predicate

Parse predicate depending on its type.

Source code in csvw-eo-library/src/csvw_eo/metadata_structure.py
85
86
87
88
89
def parse_predicate(data: dict[str, Any]) -> Predicate:
    """Parse predicate depending on its type."""
    if c.PARTITION_VALUE in data:
        return CategoricalPredicate.from_dict(data)
    return ContinuousPredicate.from_dict(data)

Constants

csvw_eo.constants

Defaults, constants and metadata objects for csvw-eo.

DependencyType

Bases: StrEnum

Types of column dependency relationships.

Source code in csvw-eo-library/src/csvw_eo/constants.py
63
64
65
66
67
68
69
70
class DependencyType(StrEnum):
    """Types of column dependency relationships."""

    NO_DEP = "no_dependency"
    MAPPING = "mapping"
    BIGGER = "bigger"
    # SMALLER = "smaller"  # redundant with bigger
    FIXED = "fixedPerEntity"