Source code for lomas_server.tests.test_dummy_generation

import unittest
from typing import Any

from lomas_server.dp_queries.dummy_dataset import make_dummy_dataset
from lomas_server.utils.collection_models import Metadata
from lomas_server.utils.query_examples import DUMMY_NB_ROWS, DUMMY_SEED


[docs] class TestMakeDummyDataset(unittest.TestCase): """ Tests for the generation of dummy datasets. """ metadata: dict[str, Any] = { "max_ids": 1, "rows": 100, "row_privacy": True, "columns": {}, }
[docs] def test_categorical_column(self) -> None: """test_categorical_column""" self.metadata["columns"] = { "col_card_cat": { # cardinality + categories "type": "string", "cardinality": 3, "categories": ["x", "y", "z"], } } metadata = Metadata.model_validate(self.metadata) df = make_dummy_dataset(metadata) # Test shape self.assertEqual(df.shape[0], DUMMY_NB_ROWS) self.assertEqual(df.shape[1], 1) # Test cardinality type and categories self.assertIn("col_card_cat", df.columns) self.assertEqual(df["col_card_cat"].nunique(), 3) self.assertEqual(set(df["col_card_cat"].values), {"x", "y", "z"}) assert isinstance(df["col_card_cat"], object)
[docs] def test_boolean_column(self) -> None: """test_boolean_column""" # Test a boolean column self.metadata["columns"] = { "col_bool": {"type": "boolean", "nullable": True} } metadata = Metadata.model_validate(self.metadata) df = make_dummy_dataset(metadata) # Test length self.assertEqual(len(df), DUMMY_NB_ROWS) # Test col generated is boolean self.assertIn("col_bool", df.columns) self.assertEqual(df.col_bool.dtypes.name, "boolean")
[docs] def test_float_column(self) -> None: """test_float_column""" lower_bound = 10.0 upper_bound = 20.0 self.metadata["columns"] = { "col_float": { "type": "float", "precision": 32, "upper": upper_bound, "lower": lower_bound, } } metadata = Metadata.model_validate(self.metadata) df = make_dummy_dataset(metadata) # Test col generated is of type float self.assertEqual(df.col_float.dtypes.name, "float32") # Test within bounds self.assertTrue((df["col_float"] >= lower_bound).all()) self.assertTrue((df["col_float"] <= upper_bound).all())
[docs] def test_int_column(self) -> None: """test_int_column""" lower_bound = 100 upper_bound = 120 self.metadata["columns"] = { "col_int": { "type": "int", "precision": 64, "upper": upper_bound, "lower": lower_bound, } } metadata = Metadata.model_validate(self.metadata) df = make_dummy_dataset(metadata) # Test col generated is of type int self.assertIn(df.col_int.dtypes.name, ["int64"]) # Test within bounds self.assertTrue((df["col_int"] >= lower_bound).all()) self.assertTrue((df["col_int"] <= upper_bound).all())
[docs] def test_datetime_column(self) -> None: """test_datetime_column""" self.metadata["columns"] = { "col_datetime": { "type": "datetime", "lower": "2000-01-01", "upper": "2010-01-01", } } metadata = Metadata.model_validate(self.metadata) df = make_dummy_dataset(metadata) # Test col generated is of type datetime self.assertEqual(df.col_datetime.dtypes.name, "datetime64[ns]") # Should not have any null values self.assertFalse(df.col_datetime.isnull().values.any())
[docs] def test_nullable_column(self) -> None: """test_nullable_column""" self.metadata["columns"] = { "col_nullable": { "type": "datetime", "nullable": True, "lower": "2000-01-01", "upper": "2010-01-01", } } metadata = Metadata.model_validate(self.metadata) df = make_dummy_dataset(metadata) # Should have null values self.assertTrue(df.col_nullable.isnull().values.any())
[docs] def test_seed(self) -> None: """test_seed""" # Test the behavior with different seeds self.metadata["columns"] = { "col_int": { "type": "int", "nullable": True, "precision": 32, "lower": 0, "upper": 100, } } metadata = Metadata.model_validate(self.metadata) seed1 = DUMMY_SEED seed2 = DUMMY_SEED + 1 df1 = make_dummy_dataset(metadata, seed=seed1) df2 = make_dummy_dataset(metadata, seed=seed2) # Check if datasets generated with different seeds are different self.assertFalse(df1.equals(df2)) # Check if datasets generated with the same seed are identical df1_copy = make_dummy_dataset(metadata, seed=seed1) self.assertTrue(df1.equals(df1_copy))
# TODO maybe remove this, see issue #335 # def test_unknown_column(self) -> None: # """test_unknown_column""" # metadata = { # "columns": { # "col_bool": {"type": "boolean", "nullable": True}, # "col_unknown": {"type": "unknown"}, # } # } # df = make_dummy_dataset(metadata) # # Test col generated # self.assertIn("col_bool", df.columns) # # Test col not generated # self.assertNotIn("col_unknown", df.columns)