Skip to content

Client

Classes:

  • Bound

    Any type that supports ordering comparisons (< and >).

  • Client

    Client class to send requests to the server.

Bound #


              flowchart TD
              lomas_client.client.Bound[Bound]

              

              click lomas_client.client.Bound href "" "lomas_client.client.Bound"
            

Any type that supports ordering comparisons (< and >).

Client #

Client(**kwargs: model_config)

Client class to send requests to the server.

Handle all serialisation and deserialisation steps

Parameters:

  • kwargs #

    (model_config, default: {} ) –

    All keyword arguments will be forwarded to the ClientConfig

Methods:

Source code in client/lomas_client/client.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def __init__(self, **kwargs: ClientConfig.model_config):
    """Initializes the Client with the specified URL, dataset name and authentication parameters.

    Args:
        kwargs: All keyword arguments will be forwarded to the ClientConfig
    """
    try:
        self.config = ClientConfig(**kwargs)
    except ValidationError as exc:
        raise ValueError(
            "Missing client config parameters."
            "If you are using this library from a managed environment and don't know "
            "about your credentials or other parameters, please contact your system administrator."
        ) from exc

    if self.config.telemetry.enabled:
        LoggingInstrumentor().instrument(set_logging_format=True)
        init_telemetry(self.config.telemetry)

    self.http_client = LomasHttpClient(self.config)
    self.smartnoise_sql = SmartnoiseSQLClient(self.http_client)
    self.opendp = OpenDPClient(self.http_client)
    self.diffprivlib = DiffPrivLibClient(self.http_client)

    self.metadata: dict[str, Any] | None = None

get_dataset_metadata #

get_dataset_metadata() -> dict[str, Any]

This function retrieves metadata for the dataset.

Returns: A dictionary containing dataset metadata.

Source code in client/lomas_client/client.py
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def get_dataset_metadata(self) -> dict[str, Any]:
    """This function retrieves metadata for the dataset.

    Returns: A dictionary containing dataset metadata.
    """
    if self.metadata is None:
        body_dict = {"dataset_name": self.config.dataset_name}
        body = LomasRequestModel.model_validate(body_dict)
        res = self.http_client.post("get_dataset_metadata", body)
        if res.status_code == status.HTTP_200_OK:
            metadata = TableMetadata.model_validate(res.json())
            self.metadata = metadata.to_dict()
            return self.metadata

        raise_error(res)
    return self.metadata

get_column_metadata #

get_column_metadata(column_name: str) -> dict[str, Any]

This function retrieves metadata for the column.

Returns: A dictionary containing column metadata.

Source code in client/lomas_client/client.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def get_column_metadata(self, column_name: str) -> dict[str, Any]:
    """This function retrieves metadata for the column.

    Returns: A dictionary containing column metadata.
    """
    if self.metadata is None:
        self.metadata = self.get_dataset_metadata()

    try:
        return next(col for col in self.metadata[TABLE_SCHEMA][COL_LIST] if col[COL_NAME] == column_name)
    except StopIteration as err:
        available = [col[COL_NAME] for col in self.metadata[TABLE_SCHEMA][COL_LIST]]
        raise ValueError(f"Column '{column_name}' not found. Available columns: {available}") from err

get_column_bounds #

get_column_bounds(column_name: str) -> tuple[T, T]

This function retrieves metadata bounds for the column.

Returns: A tuple of (minimum_bound, maximum_bound)

Source code in client/lomas_client/client.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
def get_column_bounds(self, column_name: str) -> tuple[T, T]:
    """This function retrieves metadata  bounds for the column.

    Returns: A tuple of (minimum_bound, maximum_bound)
    """
    column = self.get_column_metadata(column_name)

    minimum = column.get(MINIMUM)
    maximum = column.get(MAXIMUM)

    if minimum is None or maximum is None:
        raise ValueError(f"Column '{column_name}' does not have bounds.")

    return minimum, maximum

get_diffprivlib_bounds #

get_diffprivlib_bounds(columns: list[str]) -> tuple[list[int | float], list[int | float]]

Get bounds for a list of columns in diffprivlib expected format.

Source code in client/lomas_client/client.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def get_diffprivlib_bounds(self, columns: list[str]) -> tuple[list[int | float], list[int | float]]:
    """Get bounds for a list of columns in diffprivlib expected format."""
    if self.metadata is None:
        self.metadata = self.get_dataset_metadata()

    cols = self.metadata[TABLE_SCHEMA][COL_LIST]
    col_map = {col[COL_NAME]: col for col in cols}

    lower, upper = [], []
    for col in columns:
        if col not in col_map:
            raise ValueError(f"Column '{col}' not found")

        metadata = col_map[col]

        if MINIMUM not in metadata or MAXIMUM not in metadata:
            raise ValueError(f"Column '{col}' does not have bounds")

        lower.append(metadata[MINIMUM])
        upper.append(metadata[MAXIMUM])

    return lower, upper

get_dummy_dataset #

get_dummy_dataset(
    nb_rows: int = DUMMY_NB_ROWS, seed: int = DUMMY_SEED, lazy: bool = False
) -> DataFrame | LazyFrame

This function retrieves a dummy dataset with optional parameters.

Parameters:

  • nb_rows #

    (int, default: DUMMY_NB_ROWS ) –

    The number of rows in the dummy dataset. Defaults to DUMMY_NB_ROWS.

  • seed #

    (int, default: DUMMY_SEED ) –

    The random seed for generating the dummy dataset. Defaults to DUMMY_SEED.

  • lazy #

    (bool, default: False ) –

    If True, return a polars LazyFrame. Defaults to False (pandas DataFrame)

Returns:

  • DataFrame | LazyFrame

    pd.DataFrame | pl.LazyFrame: A Pandas DataFrame representing

  • DataFrame | LazyFrame

    the dummy dataset (optionally in LazyFrame format).

Source code in client/lomas_client/client.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def get_dummy_dataset(
    self,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
    lazy: bool = False,
) -> pd.DataFrame | pl.LazyFrame:
    """This function retrieves a dummy dataset with optional parameters.

    Args:
        nb_rows (int, optional): The number of rows in the dummy dataset.
            Defaults to DUMMY_NB_ROWS.
        seed (int, optional): The random seed for generating the dummy dataset.
            Defaults to DUMMY_SEED.
        lazy (bool, optional): If True, return a polars LazyFrame.
            Defaults to False (pandas DataFrame)

    Returns:
        pd.DataFrame | pl.LazyFrame: A Pandas DataFrame representing
        the dummy dataset (optionally in LazyFrame format).
    """
    body_dict = {
        "dataset_name": self.config.dataset_name,
        "dummy_nb_rows": nb_rows,
        "dummy_seed": seed,
    }
    body = GetDummyDataset.model_validate(body_dict)
    res = self.http_client.post("get_dummy_dataset", body)

    if res.status_code == status.HTTP_200_OK:
        data = res.content.decode("utf8")
        dummy_df = DummyDsResponse.model_validate_json(data).dummy_df
        return pl.from_pandas(dummy_df).lazy() if lazy else dummy_df

    raise_error(res)

get_context #

get_context(
    epsilon: float | None = None, delta: float | None = None, rho: float | None = None
) -> Context

Create an OpenDP context based on a dummy dataset.

This can be used to build an OpenDP pipeline locally on the client side.

Parameters:

  • epsilon #

    (float | None, default: None ) –

    Privacy parameter to be spent. Required for pure DP or approximate DP (Laplace mechanism). Defaults to None.

  • delta #

    (float | None, default: None ) –

    Required if the pipeline measurement uses ZeroConcentratedDivergence (e.g., with make_gaussian) and is converted to SmoothedMaxDivergence using make_zCDP_to_approxDP. See: https://docs.smartnoise.org/sql/advanced.html#postprocess Defaults to None.

  • rho #

    (float | None, default: None ) –

    Privacy parameter used for zCDP or approximate zCDP (Gaussian mechanism). Cannot be used if epsilon is provided.

Returns:

  • Context

    dp.Context: OpenDP context object initialized with metadata and

  • Context

    user-provided privacy parameters.

Source code in client/lomas_client/client.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
def get_context(
    self,
    epsilon: float | None = None,
    delta: float | None = None,
    rho: float | None = None,
) -> dp.Context:
    """
    Create an OpenDP context based on a dummy dataset.

    This can be used to build an OpenDP pipeline locally on the client side.

    Args:
        epsilon (float | None, optional): Privacy parameter to be spent.
            Required for pure DP or approximate DP (Laplace mechanism).
            Defaults to None.
        delta (float | None, optional): Required if the pipeline measurement
            uses ZeroConcentratedDivergence (e.g., with make_gaussian) and is
            converted to SmoothedMaxDivergence using
            make_zCDP_to_approxDP. See:
            https://docs.smartnoise.org/sql/advanced.html#postprocess
            Defaults to None.
        rho (float | None, optional): Privacy parameter used for zCDP or
            approximate zCDP (Gaussian mechanism). Cannot be used if
            epsilon is provided.

    Returns:
        dp.Context: OpenDP context object initialized with metadata and
        user-provided privacy parameters.
    """
    dummy_lf = self.get_dummy_dataset(lazy=True)
    if self.metadata is None:
        self.metadata = self.get_dataset_metadata()

    return csvw_to_opendp_context(
        self.metadata, dummy_lf, epsilon=epsilon, delta=delta, rho=rho, split_evenly_over=1
    )

get_initial_budget #

get_initial_budget() -> InitialBudgetResponse

This function retrieves the initial budget.

Returns:

Source code in client/lomas_client/client.py
221
222
223
224
225
226
227
228
229
230
231
232
233
def get_initial_budget(self) -> InitialBudgetResponse:
    """This function retrieves the initial budget.

    Returns:
        InitialBudgetResponse: A dictionary
            containing the initial budget.
    """
    body_dict = {"dataset_name": self.config.dataset_name}

    body = LomasRequestModel.model_validate(body_dict)
    res = self.http_client.post("get_initial_budget", body)

    return validate_model_response_direct(res, InitialBudgetResponse)

get_total_spent_budget #

get_total_spent_budget() -> SpentBudgetResponse

This function retrieves the total spent budget.

Returns:

  • SpentBudgetResponse ( SpentBudgetResponse ) –

    A dictionary containing the total spent budget.

Source code in client/lomas_client/client.py
235
236
237
238
239
240
241
242
243
244
245
246
247
def get_total_spent_budget(self) -> SpentBudgetResponse:
    """This function retrieves the total spent budget.

    Returns:
        SpentBudgetResponse: A dictionary containing
            the total spent budget.
    """
    body_dict = {"dataset_name": self.config.dataset_name}

    body = LomasRequestModel.model_validate(body_dict)
    res = self.http_client.post("get_total_spent_budget", body)

    return validate_model_response_direct(res, SpentBudgetResponse)

get_remaining_budget #

get_remaining_budget() -> RemainingBudgetResponse

This function retrieves the remaining budget.

Returns:

Source code in client/lomas_client/client.py
249
250
251
252
253
254
255
256
257
258
259
260
261
def get_remaining_budget(self) -> RemainingBudgetResponse:
    """This function retrieves the remaining budget.

    Returns:
        RemainingBudgetResponse: A dictionary
            containing the remaining budget.
    """
    body_dict = {"dataset_name": self.config.dataset_name}

    body = LomasRequestModel.model_validate(body_dict)
    res = self.http_client.post("get_remaining_budget", body)

    return validate_model_response_direct(res, RemainingBudgetResponse)

get_previous_queries #

get_previous_queries() -> list[dict]

This function retrieves the previous queries of the user.

Raises:

  • ValueError

    If an unknown query type is encountered during deserialization.

Returns:

  • list[dict]

    List[dict]: A list of dictionary containing

  • list[dict]

    the different queries on the private dataset.

Source code in client/lomas_client/client.py
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
def get_previous_queries(self) -> list[dict]:
    """This function retrieves the previous queries of the user.

    Raises:
        ValueError: If an unknown query type is encountered
            during deserialization.

    Returns:
        List[dict]: A list of dictionary containing
        the different queries on the private dataset.
    """
    body_dict = {"dataset_name": self.config.dataset_name}

    body = LomasRequestModel.model_validate(body_dict)
    res = self.http_client.post("get_previous_queries", body)

    if res.status_code == status.HTTP_200_OK:
        queries = json.loads(res.content.decode("utf8"))["previous_queries"]

        if not queries:
            return queries

        deserialised_queries = []
        for query in queries:
            match query["dp_library"]:
                case DPLibraries.SMARTNOISE_SQL:
                    pass
                case DPLibraries.OPENDP:
                    query_json = OpenDPQueryModel.model_validate(query["client_input"])
                    serialized_bytes = base64.b64decode(query_json.opendp_json)
                    query["client_input"]["opendp_json"] = pl.LazyFrame.deserialize(
                        io.BytesIO(serialized_bytes)
                    )
                case DPLibraries.DIFFPRIVLIB:
                    model = base64.b64decode(query["response"]["result"]["model"])
                    query["response"]["result"]["model"] = pickle.loads(model)
                case _:
                    raise ValueError(f"Cannot deserialise unknown query type: {query['dp_library']}")

            deserialised_queries.append(query)

        return deserialised_queries

    raise_error(res)

Classes:

LomasHttpClient #

LomasHttpClient(config: ClientConfig)

A client for interacting with the Lomas API.

Methods:

  • post

    Executes a POST request to endpoint with the provided JSON body.

  • wait_for_job

    Periodically query the job endpoint sleeping in between until it completes / times-out.

Source code in client/lomas_client/http_client.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def __init__(self, config: ClientConfig) -> None:
    """Initializes the HTTP client with the specified URL, dataset name and authentication parameters."""
    if config.telemetry.enabled:
        RequestsInstrumentor().instrument()

    self.headers = {"Content-type": "application/json", "Accept": "*/*"}
    self.config = config

    if not self.config.oidc_use_tls or not self.config.lomas_service_use_tls:
        logger.warning("OIDC IdP or Lomas service configured without TLS -> using insecure transport")

    self._oauth2_session = OAuth2Session(
        client_id="lomas_client",
        token_endpoint=self.config.oidc_config.token_endpoint,
        scope=OIDC_REQUIRED_SCOPES,
        update_token=self._save_token,
        token=self._load_token(),
        token_endpoint_auth_method="none",
        leeway=30,  # refresh token 30 seconds before expiry
    )

    try:
        self._oauth2_session.refresh_token()
    except (OAuth2Error, OAuthError, AttributeError, requests.HTTPError):
        # Fallback to authorize
        # We catch http errors because dex fails when it cannot link a token to existing user.
        # We catch attribute error in case the token is none
        self._authorize()

post #

post(
    endpoint: str, body: LomasRequestModel, read_timeout: int = DEFAULT_READ_TIMEOUT
) -> Response

Executes a POST request to endpoint with the provided JSON body.

Handles authorization to the api by automatically fetching a token if required.

Parameters:

  • endpoint #

    (str) –

    The API endpoint to which the request will be sent.

  • body #

    (LomasRequestModel) –

    The body to include in the POST request.

  • read_timeout #

    (int, default: DEFAULT_READ_TIMEOUT ) –

    number of seconds that client wait for the server to send a response. Defaults to DEFAULT_READ_TIMEOUT.

Returns:

  • Response

    requests.Response: The response object resulting from the POST request.

Source code in client/lomas_client/http_client.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def post(
    self,
    endpoint: str,
    body: LomasRequestModel,
    read_timeout: int = DEFAULT_READ_TIMEOUT,
) -> requests.Response:
    """Executes a POST request to endpoint with the provided JSON body.

    Handles authorization to the api by automatically fetching a token if required.

    Args:
        endpoint (str): The API endpoint to which the request will be sent.
        body (LomasRequestModel): The body to include in the POST request.
        read_timeout (int): number of seconds that client wait for the server
            to send a response.
            Defaults to DEFAULT_READ_TIMEOUT.

    Returns:
        requests.Response: The response object resulting from the POST request.
    """
    logger.debug(
        f"User '{self.config.user_name}') is making a request "
        + f"to url '{self.config.app_url}' "
        + f"at the endpoint '{endpoint}' "
        + f"with query params: {body.model_dump()}."
    )

    try:
        r = self._oauth2_session.post(
            url_append(self.config.app_url, endpoint),
            json=body.model_dump(),
            headers=self.headers,
            timeout=(CONNECT_TIMEOUT, read_timeout),
        )
    except OAuth2Error:
        # Handle expired refresh token
        self._authorize()

        r = self._oauth2_session.post(
            url_append(self.config.app_url, endpoint),
            json=body.model_dump(),
            headers=self.headers,
            timeout=(CONNECT_TIMEOUT, read_timeout),
        )
    return r

wait_for_job #

wait_for_job(job_uid: str, n_retry: int = 1800, sleep_sec: float = 1) -> Job

Periodically query the job endpoint sleeping in between until it completes / times-out.

Source code in client/lomas_client/http_client.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
def wait_for_job(self, job_uid: str, n_retry: int = 1800, sleep_sec: float = 1) -> Job:
    """Periodically query the job endpoint sleeping in between until it completes / times-out."""
    for _ in range(n_retry):
        try:
            job_query = self._oauth2_session.get(
                url_append(self.config.app_url, f"/status/{job_uid}"),
                headers=self.headers,
                timeout=(CONNECT_TIMEOUT),
            ).json()
        except OAuth2Error:
            # Handle expired refresh token
            self._authorize()
            job_query = self._oauth2_session.get(
                url_append(self.config.app_url, f"/status/{job_uid}"),
                headers=self.headers,
                timeout=(CONNECT_TIMEOUT),
            ).json()
        # Check for error before accessing "status"
        if "status" in job_query and job_query["status"] in {"complete", "failed"}:
            return Job.model_validate(job_query)

        time.sleep(sleep_sec)

    raise TimeoutError(f"Job {job_uid} didn't complete in time ({sleep_sec * n_retry})")

Classes:

  • DiffPrivLibClient

    A client for executing and estimating the cost of DiffPrivLib queries.

DiffPrivLibClient #

DiffPrivLibClient(http_client: LomasHttpClient)

A client for executing and estimating the cost of DiffPrivLib queries.

Methods:

  • cost

    This function estimates the cost of executing a DiffPrivLib query.

  • query

    Trains a DiffPrivLib pipeline and return a trained Pipeline.

Source code in client/lomas_client/libraries/diffprivlib.py
21
22
def __init__(self, http_client: LomasHttpClient) -> None:
    self.http_client = http_client

cost #

cost(
    pipeline: Pipeline,
    feature_columns: list[str] | None = None,
    target_columns: list[str] | None = None,
    test_size: float = 0.2,
    test_train_split_seed: int = 1,
    imputer_strategy: str = "drop",
) -> CostResponse

This function estimates the cost of executing a DiffPrivLib query.

Parameters:

  • pipeline #

    (pipeline) –

    DiffPrivLib pipeline with three conditions: - The pipeline MUST start with a models.StandardScaler. Otherwise a PrivacyLeakWarning is raised by DiffPrivLib library and is treated as an error in lomas server.

    • random_state fields can only be int (RandomState will not work).
    • accountant fields must be None.

    Note: as in DiffPrivLib, avoid any DiffprivlibCompatibilityWarning to ensure that the pipeline does what is intended.

  • feature_columns #

    (list[str], default: None ) –

    the list of feature column to train

  • target_columns #

    (list[str], default: None ) –

    the list of target column to predict May be None for certain models.

  • test_size #

    (float, default: 0.2 ) –

    proportion of the test set Defaults to 0.2.

  • test_train_split_seed #

    (int, default: 1 ) –

    seed for random train test split Defaults to 1.

  • imputer_strategy #

    (str, default: 'drop' ) –

    imputation strategy. Defaults to "drop". "drop": will drop all rows with missing values "mean": will replace values by the mean of the column values "median": will replace values by the median of the column values "most_frequent": will replace values by the most frequent values

Returns:

  • CostResponse

    Optional[dict[str, float]]: A dictionary containing the estimated cost.

Source code in client/lomas_client/libraries/diffprivlib.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def cost(
    self,
    pipeline: Pipeline,
    feature_columns: list[str] | None = None,
    target_columns: list[str] | None = None,
    test_size: float = 0.2,
    test_train_split_seed: int = 1,
    imputer_strategy: str = "drop",
) -> CostResponse:
    """This function estimates the cost of executing a DiffPrivLib query.

    Args:
        pipeline (sklearn.pipeline): DiffPrivLib pipeline with three conditions:
            - The pipeline MUST start with a `models.StandardScaler`.
            Otherwise a PrivacyLeakWarning is raised by DiffPrivLib library and
            is treated as an error in lomas server.

            - `random_state` fields can only be int (`RandomState` will not work).
            - `accountant` fields must be None.

            Note: as in DiffPrivLib, avoid any DiffprivlibCompatibilityWarning
            to ensure that the pipeline does what is intended.
        feature_columns (list[str]): the list of feature column to train
        target_columns (list[str], optional): the list of target column to predict
            May be None for certain models.
        test_size (float, optional): proportion of the test set
            Defaults to 0.2.
        test_train_split_seed (int, optional): seed for random train test split
            Defaults to 1.
        imputer_strategy (str, optional): imputation strategy. Defaults to "drop".
            "drop": will drop all rows with missing values
            "mean": will replace values by the mean of the column values
            "median": will replace values by the median of the column values
            "most_frequent": will replace values by the most frequent values

    Returns:
        Optional[dict[str, float]]: A dictionary containing the estimated cost.
    """
    if target_columns is None:
        target_columns = [""]
    if feature_columns is None:
        feature_columns = [""]
    body_dict = {
        "dataset_name": self.http_client.config.dataset_name,
        "diffprivlib_json": serialise_pipeline(pipeline),
        "feature_columns": feature_columns,
        "target_columns": target_columns,
        "test_size": test_size,
        "test_train_split_seed": test_train_split_seed,
        "imputer_strategy": imputer_strategy,
    }

    body = DiffPrivLibRequestModel.model_validate(body_dict)
    res = self.http_client.post("estimate_diffprivlib_cost", body)

    return validate_model_response(self.http_client, res, CostResponse)

query #

query(
    pipeline: Pipeline,
    feature_columns: list[str],
    target_columns: list[str] | None = None,
    test_size: float = 0.2,
    test_train_split_seed: int = 1,
    imputer_strategy: str = "drop",
    dummy: bool = False,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse

Trains a DiffPrivLib pipeline and return a trained Pipeline.

Parameters:

  • pipeline #

    (pipeline) –

    DiffPrivLib pipeline with three conditions: - The pipeline MUST start with a models.StandardScaler. Otherwise a PrivacyLeakWarning is raised by DiffPrivLib library and is treated as an error in lomas server. - random_state fields can only be int (RandomState will not work). - accountant fields must be None.

    Note: as in DiffPrivLib, avoid any DiffprivlibCompatibilityWarning to ensure that the pipeline does what is intended.

  • feature_columns #

    (list[str]) –

    the list of feature column to train

  • target_columns #

    (list[str], default: None ) –

    the list of target column to predict May be None for certain models.

  • test_size #

    (float, default: 0.2 ) –

    proportion of the test set Defaults to 0.2.

  • test_train_split_seed #

    (int, default: 1 ) –

    seed for random train test split Defaults to 1.

  • imputer_strategy #

    (str, default: 'drop' ) –

    imputation strategy. Defaults to "drop". "drop": will drop all rows with missing values "mean": will replace values by the mean of the column values "median": will replace values by the median of the column values "most_frequent": : will replace values by the most frequent values

  • dummy #

    (bool, default: False ) –

    Whether to use a dummy dataset. Defaults to False.

  • nb_rows #

    (int, default: DUMMY_NB_ROWS ) –

    The number of rows in the dummy dataset. Defaults to DUMMY_NB_ROWS.

  • seed #

    (int, default: DUMMY_SEED ) –

    The random seed for generating the dummy dataset. Defaults to DUMMY_SEED.

Returns:

  • QueryResponse

    Optional[Pipeline]: A trained DiffPrivLip pipeline

Source code in client/lomas_client/libraries/diffprivlib.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
def query(
    self,
    pipeline: Pipeline,
    feature_columns: list[str],
    target_columns: list[str] | None = None,
    test_size: float = 0.2,
    test_train_split_seed: int = 1,
    imputer_strategy: str = "drop",
    dummy: bool = False,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse:
    """Trains a DiffPrivLib pipeline and return a trained Pipeline.

    Args:
        pipeline (sklearn.pipeline): DiffPrivLib pipeline with three conditions:
            - The pipeline MUST start with a `models.StandardScaler`.
            Otherwise a PrivacyLeakWarning is raised by DiffPrivLib library and
            is treated as an error in lomas server.
            - `random_state` fields can only be int (`RandomState` will not work).
            - `accountant` fields must be None.

            Note: as in DiffPrivLib, avoid any DiffprivlibCompatibilityWarning
            to ensure that the pipeline does what is intended.
        feature_columns (list[str]): the list of feature column to train
        target_columns (list[str], optional): the list of target column to predict
            May be None for certain models.
        test_size (float, optional): proportion of the test set
            Defaults to 0.2.
        test_train_split_seed (int, optional): seed for random train test split
            Defaults to 1.
        imputer_strategy (str, optional): imputation strategy. Defaults to "drop".
            "drop": will drop all rows with missing values
            "mean": will replace values by the mean of the column values
            "median": will replace values by the median of the column values
            "most_frequent": : will replace values by the most frequent values
        dummy (bool, optional): Whether to use a dummy dataset. Defaults to False.
        nb_rows (int, optional): The number of rows in the dummy dataset.
            Defaults to DUMMY_NB_ROWS.
        seed (int, optional): The random seed for generating the dummy dataset.
            Defaults to DUMMY_SEED.

    Returns:
        Optional[Pipeline]: A trained DiffPrivLip pipeline
    """
    body_dict = {
        "dataset_name": self.http_client.config.dataset_name,
        "diffprivlib_json": serialise_pipeline(pipeline),
        "feature_columns": feature_columns,
        "target_columns": target_columns,
        "test_size": test_size,
        "test_train_split_seed": test_train_split_seed,
        "imputer_strategy": imputer_strategy,
    }

    request_model: type[DiffPrivLibRequestModel]
    if dummy:
        endpoint = "dummy_diffprivlib_query"
        body_dict["dummy_nb_rows"] = nb_rows
        body_dict["dummy_seed"] = seed
        request_model = DiffPrivLibDummyQueryModel
    else:
        endpoint = "diffprivlib_query"
        request_model = DiffPrivLibQueryModel

    body = request_model.model_validate(body_dict)
    res = self.http_client.post(endpoint, body)

    return validate_model_response(self.http_client, res, QueryResponse)

Classes:

  • OpenDPClient

    A client for executing and estimating the cost of OpenDP queries.

OpenDPClient #

OpenDPClient(http_client: LomasHttpClient)

A client for executing and estimating the cost of OpenDP queries.

Methods:

  • cost

    This function estimates the cost of executing an OpenDP query.

  • query

    This function executes an OpenDP query.

Source code in client/lomas_client/libraries/opendp.py
21
22
def __init__(self, http_client: LomasHttpClient) -> None:
    self.http_client = http_client

cost #

cost(
    opendp_pipeline: LazyFrameQuery | LazyFrame,
    epsilon: float | None = None,
    delta: float | None = None,
    rho: float | None = None,
    approx_zcdp: bool = True,
) -> CostResponse

This function estimates the cost of executing an OpenDP query.

Parameters:

  • opendp_pipeline #

    (Measurement) –

    The OpenDP pipeline for the query.

  • epsilon #

    (float, default: None ) –

    Privacy parameter that will be spent. For pure-DP or approximate DP this must be set. (Laplace mechanism)

  • delta #

    (Optional[float], default: None ) –

    If the pipeline measurement is of type “ZeroConcentratedDivergence” (e.g. with make_gaussian) then it is converted to “SmoothedMaxDivergence” with make_zCDP_to_approxDP (See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#postprocess>__). In that case a delta must be provided by the user. Defaults to None.

  • rho #

    (float, default: None ) –

    Privacy parameter used for zCDP or approximate-zCDP (Gaussian mechanism). Cannot be used if epsilon is not None.

  • approx_zcdp #

    (bool, default: True ) –

    If false, delta is used to compute the epsilon consumption equivalent when user wants to use zCDP. Default True.

Raises: Exception: If the opendp_pipeline type is not suppported.

Returns:

Source code in client/lomas_client/libraries/opendp.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def cost(
    self,
    opendp_pipeline: dp.extras.polars.LazyFrameQuery | pl.LazyFrame,
    epsilon: float | None = None,
    delta: float | None = None,
    rho: float | None = None,
    approx_zcdp: bool = True,
) -> CostResponse:
    """This function estimates the cost of executing an OpenDP query.

    Args:
        opendp_pipeline (dp.Measurement): The OpenDP pipeline for the query.
        epsilon (float): Privacy parameter that will be spent. For pure-DP or approximate DP\
             this must be set. (Laplace mechanism)
        delta (Optional[float], optional): If the pipeline measurement is of\
            type “ZeroConcentratedDivergence” (e.g. with make_gaussian) then it is\
            converted to “SmoothedMaxDivergence” with make_zCDP_to_approxDP\
            (`See Smartnoise-SQL postprocessing documentation.\
            <https://docs.smartnoise.org/sql/advanced.html#postprocess>`__).\
            In that case a delta must be provided by the user.\
            Defaults to None.
        rho (float): Privacy parameter used for zCDP or approximate-zCDP (Gaussian mechanism).\
             Cannot be used if epsilon is not None.
        approx_zcdp (bool): If false, delta is used to compute the epsilon consumption equivalent when user wants to use zCDP.
            Default True.
    Raises:
        Exception: If the opendp_pipeline type is not suppported.

    Returns:
        CostResponse: The estimated cost.
    """
    body_json = self._get_opendp_request_body(
        opendp_pipeline,
        epsilon=epsilon,
        delta=delta,
        rho=rho,
        approx_zcdp=approx_zcdp,
    )
    body = OpenDPRequestModel.model_validate(body_json)
    res = self.http_client.post("estimate_opendp_cost", body)

    return validate_model_response(self.http_client, res, CostResponse)

query #

query(
    opendp_pipeline: LazyFrameQuery | LazyFrame,
    epsilon: float | None = None,
    delta: float | None = None,
    rho: float | None = None,
    approx_zcdp: bool = True,
    dummy: bool = False,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse

This function executes an OpenDP query.

Parameters:

  • opendp_pipeline #

    (Measurement) –

    The OpenDP pipeline for the query. Can be a dp.Measurement or a polars LazyFrame (plan) for opendp.polars pipelines.

  • epsilon #

    (float, default: None ) –

    Privacy parameter that will be spent. For pure-DP or approximate DP this must be set. (Laplace mechanism)

  • delta #

    (Optional[float], default: None ) –

    If the pipeline measurement is of type “ZeroConcentratedDivergence” (e.g. with make_gaussian) then it is converted to “SmoothedMaxDivergence” with make_zCDP_to_approxDP (See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#postprocess>__). In that case a delta must be provided by the user. Defaults to None.

  • rho #

    (float, default: None ) –

    Privacy parameter used for zCDP or approximate-zCDP (Gaussian mechanism). Cannot be used if epsilon is not None.

  • approx_zcdp #

    (bool, default: True ) –

    If false, delta is used to compute the epsilon consumption equivalent when user wants to use zCDP. Default True.

  • dummy #

    (bool, default: False ) –

    Whether to use a dummy dataset. Defaults to False.

  • nb_rows #

    (int, default: DUMMY_NB_ROWS ) –

    The number of rows in the dummy dataset. Defaults to DUMMY_NB_ROWS.

  • seed #

    (int, default: DUMMY_SEED ) –

    The random seed for generating the dummy dataset. Defaults to DUMMY_SEED.

Raises:

  • Exception

    If the opendp_pipeline type is not suppported.

Returns:

  • QueryResponse ( QueryResponse ) –

    A dictionary of the response body containing the deserialized pipeline result.

Source code in client/lomas_client/libraries/opendp.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def query(
    self,
    opendp_pipeline: dp.extras.polars.LazyFrameQuery | pl.LazyFrame,
    epsilon: float | None = None,
    delta: float | None = None,
    rho: float | None = None,
    approx_zcdp: bool = True,
    dummy: bool = False,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse:
    """This function executes an OpenDP query.

    Args:
        opendp_pipeline (dp.Measurement): The OpenDP pipeline for the query. \
            Can be a dp.Measurement or a polars LazyFrame (plan) for opendp.polars\
            pipelines.
        epsilon (float): Privacy parameter that will be spent. For pure-DP or approximate DP\
             this must be set. (Laplace mechanism)
        delta (Optional[float], optional): If the pipeline measurement is of\
            type “ZeroConcentratedDivergence” (e.g. with make_gaussian) then it is\
            converted to “SmoothedMaxDivergence” with make_zCDP_to_approxDP\
            (`See Smartnoise-SQL postprocessing documentation.
            <https://docs.smartnoise.org/sql/advanced.html#postprocess>`__).
            In that case a delta must be provided by the user.
            Defaults to None.
        rho (float): Privacy parameter used for zCDP or approximate-zCDP (Gaussian mechanism).\
             Cannot be used if epsilon is not None.
        approx_zcdp (bool): If false, delta is used to compute the epsilon consumption equivalent when user wants to use zCDP.
            Default True.
        dummy (bool, optional): Whether to use a dummy dataset. Defaults to False.
        nb_rows (int, optional): The number of rows in the dummy dataset.\
            Defaults to DUMMY_NB_ROWS.
        seed (int, optional): The random seed for generating the dummy dataset.\
        Defaults to DUMMY_SEED.

    Raises:
        Exception: If the opendp_pipeline type is not suppported.

    Returns:
        QueryResponse: A dictionary of the response body containing the deserialized pipeline result.
    """
    body_json = self._get_opendp_request_body(
        opendp_pipeline,
        epsilon=epsilon,
        delta=delta,
        rho=rho,
        approx_zcdp=approx_zcdp,
    )

    request_model: type[OpenDPRequestModel]
    if dummy:
        endpoint = "dummy_opendp_query"
        body_json["dummy_nb_rows"] = nb_rows
        body_json["dummy_seed"] = seed
        request_model = OpenDPDummyQueryModel
    else:
        endpoint = "opendp_query"
        request_model = OpenDPQueryModel

    body = request_model.model_validate(body_json)
    res = self.http_client.post(endpoint, body)

    return validate_model_response(self.http_client, res, QueryResponse)

Classes:

  • SmartnoiseSQLClient

    A client for executing and estimating the cost of SmartNoise SQL queries.

SmartnoiseSQLClient #

SmartnoiseSQLClient(http_client: LomasHttpClient)

A client for executing and estimating the cost of SmartNoise SQL queries.

Methods:

  • cost

    This function estimates the cost of executing a SmartNoise query.

  • query

    This function executes a SmartNoise SQL query.

Source code in client/lomas_client/libraries/smartnoise_sql.py
15
16
def __init__(self, http_client: LomasHttpClient) -> None:
    self.http_client = http_client

cost #

cost(
    query: str, epsilon: float, delta: float, mechanisms: dict[str, str] | None = None
) -> CostResponse

This function estimates the cost of executing a SmartNoise query.

Parameters:

  • query #

    (str) –

    The SQL query to estimate the cost for. NOTE: the table name is df, the query must end with “FROM df”.

  • epsilon #

    (float) –

    Privacy parameter (e.g., 0.1).

  • delta #

    (float) –

    Privacy parameter (e.g., 1e-5). mechanisms (dict[str, str], optional): Dictionary of mechanisms for the query See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#postprocess>__ Defaults to {}.

Returns:

Source code in client/lomas_client/libraries/smartnoise_sql.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def cost(
    self,
    query: str,
    epsilon: float,
    delta: float,
    mechanisms: dict[str, str] | None = None,
) -> CostResponse:
    """This function estimates the cost of executing a SmartNoise query.

    Args:
        query (str): The SQL query to estimate the cost for. NOTE: the table name \
            is df, the query must end with “FROM df”.
        epsilon (float): Privacy parameter (e.g., 0.1).
        delta (float): Privacy parameter (e.g., 1e-5).
            mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\
            query `See Smartnoise-SQL postprocessing documentation.
            <https://docs.smartnoise.org/sql/advanced.html#postprocess>`__
            Defaults to {}.

    Returns:
        CostResponse: The estimated cost.
    """
    if mechanisms is None:
        mechanisms = {}
    body_dict = {
        "query_str": query,
        "dataset_name": self.http_client.config.dataset_name,
        "epsilon": epsilon,
        "delta": delta,
        "mechanisms": mechanisms,
    }
    body = SmartnoiseSQLRequestModel.model_validate(body_dict)
    res = self.http_client.post("estimate_smartnoise_sql_cost", body)

    return validate_model_response(self.http_client, res, CostResponse)

query #

query(
    query: str,
    epsilon: float,
    delta: float,
    mechanisms: dict[str, str] | None = None,
    postprocess: bool = True,
    dummy: bool = False,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse

This function executes a SmartNoise SQL query.

Parameters:

  • query #

    (str) –

    The SQL query to execute. NOTE: the table name is df, the query must end with “FROM df”.

  • epsilon #

    (float) –

    Privacy parameter (e.g., 0.1).

  • delta #

    (float) –

    Privacy parameter (e.g., 1e-5).

  • mechanisms #

    (dict[str, str], default: None ) –

    Dictionary of mechanisms for the query See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#overriding-mechanisms>__

    Defaults to {}.

  • postprocess #

    (bool, default: True ) –

    Whether to postprocess the query results. See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#postprocess>__

    Defaults to True.

  • dummy #

    (bool, default: False ) –

    Whether to use a dummy dataset.

    Defaults to False.

  • nb_rows #

    (int, default: DUMMY_NB_ROWS ) –

    The number of rows in the dummy dataset.

    Defaults to DUMMY_NB_ROWS.

  • seed #

    (int, default: DUMMY_SEED ) –

    The random seed for generating the dummy dataset.

    Defaults to DUMMY_SEED.

Returns:

  • QueryResponse ( QueryResponse ) –

    A Pandas DataFrame containing the query results.

Source code in client/lomas_client/libraries/smartnoise_sql.py
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def query(
    self,
    query: str,
    epsilon: float,
    delta: float,
    mechanisms: dict[str, str] | None = None,
    postprocess: bool = True,
    dummy: bool = False,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse:
    """This function executes a SmartNoise SQL query.

    Args:
        query (str): The SQL query to execute.
            NOTE: the table name is df, the query must end with “FROM df”.
        epsilon (float): Privacy parameter (e.g., 0.1).
        delta (float): Privacy parameter (e.g., 1e-5).
        mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\
            query `See Smartnoise-SQL postprocessing documentation.
            <https://docs.smartnoise.org/sql/advanced.html#overriding-mechanisms>`__

            Defaults to {}.
        postprocess (bool, optional): Whether to postprocess the query results.\
            `See Smartnoise-SQL postprocessing documentation.
            <https://docs.smartnoise.org/sql/advanced.html#postprocess>`__

            Defaults to True.
        dummy (bool, optional): Whether to use a dummy dataset.

            Defaults to False.
        nb_rows (int, optional): The number of rows in the dummy dataset.

            Defaults to DUMMY_NB_ROWS.
        seed (int, optional): The random seed for generating the dummy dataset.

            Defaults to DUMMY_SEED.

    Returns:
        QueryResponse: A Pandas DataFrame containing the query results.
    """
    if mechanisms is None:
        mechanisms = {}
    body_dict = {
        "query_str": query,
        "dataset_name": self.http_client.config.dataset_name,
        "epsilon": epsilon,
        "delta": delta,
        "mechanisms": mechanisms,
        "postprocess": postprocess,
    }

    request_model: type[SmartnoiseSQLRequestModel]
    if dummy:
        endpoint = "dummy_smartnoise_sql_query"
        body_dict["dummy_nb_rows"] = nb_rows
        body_dict["dummy_seed"] = seed
        request_model = SmartnoiseSQLDummyQueryModel
    else:
        endpoint = "smartnoise_sql_query"
        request_model = SmartnoiseSQLQueryModel

    body = request_model.model_validate(body_dict)
    res = self.http_client.post(endpoint, body)

    return validate_model_response(self.http_client, res, QueryResponse)

Classes:

SmartnoiseSynthClient #

SmartnoiseSynthClient(http_client: LomasHttpClient)

A client for executing and estimating the cost of SmartNoiseSynth queries.

Methods:

  • cost

    This function estimates the cost of executing a SmartNoise query.

  • query

    This function executes a SmartNoise Synthetic query.

Source code in client/lomas_client/libraries/smartnoise_synth.py
20
21
def __init__(self, http_client: LomasHttpClient) -> None:
    self.http_client = http_client

cost #

cost(
    synth_name: str,
    epsilon: float,
    delta: float | None = None,
    select_cols: list[str] | None = None,
    synth_params: dict | None = None,
    nullable: bool = True,
    constraints: dict | None = None,
) -> CostResponse

This function estimates the cost of executing a SmartNoise query.

Parameters:

  • synth_name #

    (str) –

    name of the Synthesizer model to use. Available synthesizer are - "aim", - "mwem", - "dpctgan" with disabled_dp=False and warning (cryptographically secure random generator) - "patectgan" - "dpgan" with warning (cryptographically secure random generator)

    Available under certain conditions: - "mst" if return_model=False - "pategan" if the dataset has enough rows

    Not available: - "pacsynth" due to Rust panic error - "quail" currently unavailable in Smartnoise Synth

    For further documentation on models, please see here: https://docs.smartnoise.org/synth/index.html#synthesizers-reference

  • epsilon #

    (float) –

    Privacy parameter (e.g., 0.1).

  • delta #

    (float, default: None ) –

    Privacy parameter (e.g., 1e-5).

  • select_cols #

    (List[str], default: None ) –

    List of columns to select. Defaults to None.

  • synth_params #

    (dict, default: None ) –

    Keyword arguments to pass to the synthesizer constructor. See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide all parameters of the model except epsilon and delta. Defaults to None.

  • nullable #

    (bool, default: True ) –

    True if some data cells may be null Defaults to True.

  • constraints #

    (dict, default: None ) –

    Dictionnary for custom table transformer constraints. Column that are not specified will be inferred based on metadata. Defaults to {}. For further documentation on constraints, please see here: https://docs.smartnoise.org/synth/transforms/index.html. Note: lambda function in AnonimizationTransformer are not supported.

Returns:

Source code in client/lomas_client/libraries/smartnoise_synth.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
def cost(
    self,
    synth_name: str,
    epsilon: float,
    delta: float | None = None,
    select_cols: list[str] | None = None,
    synth_params: dict | None = None,
    nullable: bool = True,
    constraints: dict | None = None,
) -> CostResponse:
    """This function estimates the cost of executing a SmartNoise query.

    Args:
        synth_name (str): name of the Synthesizer model to use.
            Available synthesizer are
            - "aim",
            - "mwem",
            - "dpctgan" with `disabled_dp=False` and warning (cryptographically secure random generator)
            - "patectgan"
            - "dpgan" with warning (cryptographically secure random generator)

            Available under certain conditions:
            - "mst" if `return_model=False`
            - "pategan" if the dataset has enough rows

            Not available:
            - "pacsynth" due to Rust panic error
            - "quail" currently unavailable in Smartnoise Synth

            For further documentation on models, please see here:
            https://docs.smartnoise.org/synth/index.html#synthesizers-reference
        epsilon (float): Privacy parameter (e.g., 0.1).
        delta (float): Privacy parameter (e.g., 1e-5).
        select_cols (List[str]): List of columns to select.
            Defaults to None.
        synth_params (dict): Keyword arguments to pass to the synthesizer
            constructor.
            See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide
            all parameters of the model except `epsilon` and `delta`.
            Defaults to None.
        nullable (bool): True if some data cells may be null
            Defaults to True.
        constraints (dict): Dictionnary for custom table transformer constraints.
            Column that are not specified will be inferred based on metadata.
            Defaults to {}.
            For further documentation on constraints, please see here:
            https://docs.smartnoise.org/synth/transforms/index.html.
            Note: lambda function in `AnonimizationTransformer` are not supported.

    Returns:
        CostResponse: The estimated cost.
    """
    if constraints is None:
        constraints = {}
    if synth_params is None:
        synth_params = {}
    if select_cols is None:
        select_cols = []
    # constraints_str = serialise_constraints(constraints) if constraints else ""

    body_dict = {
        "dataset_name": self.http_client.config.dataset_name,
        "synth_name": synth_name,
        "epsilon": epsilon,
        "delta": delta,
        "select_cols": select_cols,
        "synth_params": synth_params,
        "nullable": nullable,
        "constraints": "",
    }
    body = SmartnoiseSynthRequestModel.model_validate(body_dict)
    res = self.http_client.post("estimate_smartnoise_synth_cost", body, SMARTNOISE_SYNTH_READ_TIMEOUT)

    return validate_model_response(self.http_client, res, CostResponse)

query #

query(
    synth_name: str,
    epsilon: float,
    delta: float | None = None,
    select_cols: list[str] | None = None,
    synth_params: dict | None = None,
    nullable: bool = True,
    constraints: dict | None = None,
    dummy: bool = False,
    return_model: bool = False,
    condition: str = "",
    nb_samples: int = SNSYNTH_DEFAULT_SAMPLES_NB,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse

This function executes a SmartNoise Synthetic query.

Parameters:

  • synth_name #

    (str) –

    name of the Synthesizer model to use. Available synthesizer are - "aim", - "mwem", - "dpctgan" with disabled_dp=False and warning (cryptographically secure random generator) - "patectgan" - "dpgan" with warning (cryptographically secure random generator)

    Available under certain conditions: - "mst" if return_model=False - "pategan" if the dataset has enough rows

    Not available: - "pacsynth" due to Rust panic error - "quail" currently unavailable in Smartnoise Synth

    For further documentation on models, please see here: https://docs.smartnoise.org/synth/index.html#synthesizers-reference

  • epsilon #

    (float) –

    Privacy parameter (e.g., 0.1).

  • delta #

    (float, default: None ) –

    Privacy parameter (e.g., 1e-5).

  • select_cols #

    (List[str], default: None ) –

    List of columns to select. Defaults to None.

  • synth_params #

    (dict, default: None ) –

    Keyword arguments to pass to the synthesizer constructor. See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide all parameters of the model except epsilon and delta. Defaults to None.

  • nullable #

    (bool, default: True ) –

    True if some data cells may be null Defaults to True.

  • constraints #

    (dict | None, default: None ) –

    Dictionnary for custom table transformer constraints. Column that are not specified will be inferred based on metadata. Defaults to {}. For further documentation on constraints, please see here: https://docs.smartnoise.org/synth/transforms/index.html. Note: lambda function in AnonimizationTransformer are not supported.

  • return_model #

    (bool, default: False ) –

    True to get Synthesizer model, False to get samples Defaults to False

  • condition #

    (Optional[str], default: '' ) –

    sampling condition in model.sample (only relevant if return_model is False) Defaults to "".

  • nb_samples #

    (Optional[int], default: SNSYNTH_DEFAULT_SAMPLES_NB ) –

    number of samples to generate. (only relevant if return_model is False) Defaults to SNSYNTH_DEFAULT_SAMPLES_NB

  • dummy #

    (bool, default: False ) –

    Whether to use a dummy dataset. Defaults to False.

  • nb_rows #

    (int, default: DUMMY_NB_ROWS ) –

    The number of rows in the dummy dataset. Defaults to DUMMY_NB_ROWS.

  • seed #

    (int, default: DUMMY_SEED ) –

    The random seed for generating the dummy dataset. Defaults to DUMMY_SEED.

Returns:

  • QueryResponse ( QueryResponse ) –

    A Pandas DataFrame containing the query results.

Source code in client/lomas_client/libraries/smartnoise_synth.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
def query(
    self,
    synth_name: str,
    epsilon: float,
    delta: float | None = None,
    select_cols: list[str] | None = None,
    synth_params: dict | None = None,
    nullable: bool = True,
    constraints: dict | None = None,
    dummy: bool = False,
    return_model: bool = False,
    condition: str = "",
    nb_samples: int = SNSYNTH_DEFAULT_SAMPLES_NB,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse:
    """This function executes a SmartNoise Synthetic query.

    Args:
        synth_name (str): name of the Synthesizer model to use.
            Available synthesizer are
            - "aim",
            - "mwem",
            - "dpctgan" with `disabled_dp=False` and warning (cryptographically secure random generator)
            - "patectgan"
            - "dpgan" with warning (cryptographically secure random generator)

            Available under certain conditions:
            - "mst" if `return_model=False`
            - "pategan" if the dataset has enough rows

            Not available:
            - "pacsynth" due to Rust panic error
            - "quail" currently unavailable in Smartnoise Synth

            For further documentation on models, please see here:
            https://docs.smartnoise.org/synth/index.html#synthesizers-reference
        epsilon (float): Privacy parameter (e.g., 0.1).
        delta (float): Privacy parameter (e.g., 1e-5).
        select_cols (List[str]): List of columns to select.
            Defaults to None.
        synth_params (dict): Keyword arguments to pass to the synthesizer
            constructor.
            See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide
            all parameters of the model except `epsilon` and `delta`.
            Defaults to None.
        nullable (bool): True if some data cells may be null
            Defaults to True.
        constraints: Dictionnary for custom table transformer constraints.
            Column that are not specified will be inferred based on metadata.
            Defaults to {}.
            For further documentation on constraints, please see here:
            https://docs.smartnoise.org/synth/transforms/index.html.
            Note: lambda function in `AnonimizationTransformer` are not supported.
        return_model (bool): True to get Synthesizer model, False to get samples
            Defaults to False
        condition (Optional[str]): sampling condition in `model.sample`
            (only relevant if return_model is False)
            Defaults to "".
        nb_samples (Optional[int]): number of samples to generate.
            (only relevant if return_model is False)
            Defaults to SNSYNTH_DEFAULT_SAMPLES_NB
        dummy (bool, optional): Whether to use a dummy dataset.
            Defaults to False.
        nb_rows (int, optional): The number of rows in the dummy dataset.
            Defaults to DUMMY_NB_ROWS.
        seed (int, optional): The random seed for generating the dummy dataset.
            Defaults to DUMMY_SEED.

    Returns:
        QueryResponse: A Pandas DataFrame containing the query results.
    """
    if constraints is None:
        constraints = {}
    if synth_params is None:
        synth_params = {}
    if select_cols is None:
        select_cols = []
    # constraints_str = serialise_constraints(constraints) if constraints else ""

    body_dict = {
        "dataset_name": self.http_client.config.dataset_name,
        "synth_name": synth_name,
        "epsilon": epsilon,
        "delta": delta,
        "select_cols": select_cols,
        "synth_params": synth_params,
        "nullable": nullable,
        "constraints": "",
        "return_model": return_model,
        "condition": condition,
        "nb_samples": nb_samples,
    }
    request_model: type[SmartnoiseSynthRequestModel]
    if dummy:
        endpoint = "dummy_smartnoise_synth_query"
        body_dict["dummy_nb_rows"] = nb_rows
        body_dict["dummy_seed"] = seed
        request_model = SmartnoiseSynthDummyQueryModel
    else:
        endpoint = "smartnoise_synth_query"
        request_model = SmartnoiseSynthQueryModel

    body = request_model.model_validate(body_dict)
    res = self.http_client.post(endpoint, body, SMARTNOISE_SYNTH_READ_TIMEOUT)

    return validate_model_response(self.http_client, res, QueryResponse)

Classes:

ClientConfig #


              flowchart TD
              lomas_client.models.config.ClientConfig[ClientConfig]

              

              click lomas_client.models.config.ClientConfig href "" "lomas_client.models.config.ClientConfig"
            

Config model for the HTTP client.

Methods:

Attributes:

app_url instance-attribute #

app_url: HttpUrl

The base URL for the API server.

dataset_name instance-attribute #

dataset_name: str

The name of the dataset to be accessed or manipulated.

use_password_flow class-attribute instance-attribute #

use_password_flow: bool = False

If true, uses the legacy password auth flow.

user_name class-attribute instance-attribute #

user_name: str | None = None

User name.

user_password class-attribute instance-attribute #

user_password: str | None = None

User password.

oidc_discovery_url instance-attribute #

oidc_discovery_url: HttpUrl

The oidc provier discovery Url.

telemetry instance-attribute #

telemetry: Telemetry

Telemetry Settings.

oidc_config cached property #

oidc_config: OIDCConfig

Returns the oidc provider config.

oidc_use_tls #

oidc_use_tls() -> bool

Using TLS for OIDC?

Source code in client/lomas_client/models/config.py
36
37
38
39
@computed_field
def oidc_use_tls(self) -> bool:
    """Using TLS for OIDC?"""
    return self.oidc_discovery_url.scheme == "https"

lomas_service_use_tls #

lomas_service_use_tls() -> bool

Using TLS for lomas service?

Source code in client/lomas_client/models/config.py
41
42
43
44
@computed_field
def lomas_service_use_tls(self) -> bool:
    """Using TLS for lomas service?"""
    return self.app_url.scheme == "https"

Functions:

get_client_notebook_files #

get_client_notebook_files() -> list[Path]

Returns a list of the client notebook file names (absolute paths).

Assumes the file layout is the same as in the code repository.

Source code in client/lomas_client/scripts/run_notebook.py
16
17
18
19
20
21
22
def get_client_notebook_files() -> list[Path]:
    """
    Returns a list of the client notebook file names (absolute paths).

    Assumes the file layout is the same as in the code repository.
    """
    return [nb.resolve() for nb in Path(__file__).parent.glob("../../notebooks/*.ipynb")]

run_notebook #

run_notebook(
    notebook_file: Path,
    run_demo_setup: bool,
    save_output: bool = False,
    skip_smartnoise_synth: bool = True,
) -> None

Runs the notebook in the given file.

Assumes all services in the process compose are up and the file layout is same as in the code repository.

Parameters:

  • notebook_file #

    (str) –

    description

  • run_demo_setup #

    (bool) –

    Runs the lomas_demo_setup before running the notebook.

  • save_output #

    (bool, default: False ) –

    Saves the output to the original file. Defaults to False.

  • skip_smartnoise_synth #

    (bool, default: True ) –

    Skip smartnoise synth demo notebook

Source code in client/lomas_client/scripts/run_notebook.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def run_notebook(
    notebook_file: Path, run_demo_setup: bool, save_output: bool = False, skip_smartnoise_synth: bool = True
) -> None:
    """Runs the notebook in the given file.

    Assumes all services in the process compose are up and
    the file layout is same as in the code repository.

    Args:
        notebook_file (str): _description_
        run_demo_setup (bool): Runs the lomas_demo_setup before running the notebook.
        save_output (bool, optional): Saves the output to the original file. Defaults to False.
        skip_smartnoise_synth (bool, optional): Skip smartnoise synth demo notebook
    """
    # TODO issue 423
    if skip_smartnoise_synth and notebook_file.name == "Demo_Client_Notebook_Smartnoise-Synth.ipynb":
        print("Skiping smartnoise synth notebook.")
        return

    # Reset demo users and budgets
    if run_demo_setup:
        if importlib.util.find_spec("lomas_server") is None:
            raise ImportError("lomas_server library not found, cannot run lomas_demo_setup.")

        lomas_demo_setup()

    nb = nbformat.read(notebook_file, as_version=4)
    nb_client = NotebookClient(
        nb, resources={"metadata": {"path": str(notebook_file.parent)}}, timeout=60 * 5
    )
    nb_client.execute()

    if save_output:
        nbformat.write(nb, notebook_file)

Functions:

dex_config #

dex_config()

Dex config.

Removes all dex users before yield.

Source code in client/lomas_client/tests/test_integrations.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
@pytest.fixture
def dex_config():
    """Dex config.

    Removes all dex users before yield.
    """
    admin_config = AdminConfig()
    dex_config = admin_config.dex_config
    assert dex_config is not None
    # Cleanup for tests
    del_all_dex_users(dex_config)

    yield dex_config

    # Cleanup: delete all users to start fresh
    del_all_dex_users(dex_config)

Functions:

test_run_notebook #

test_run_notebook(notebook: Path) -> None

Runs the notebook and fails if the notebook fails.

Parameters:

  • notebook #

    (str) –

    The notebook file path.

Source code in client/lomas_client/tests/test_run_notebooks.py
24
25
26
27
28
29
30
31
@pytest.mark.parametrize("notebook", mark_notebook(get_client_notebook_files()), ids=lambda file: file.name)
def test_run_notebook(notebook: Path) -> None:
    """Runs the notebook and fails if the notebook fails.

    Args:
        notebook (str): The notebook file path.
    """
    run_notebook(notebook, run_demo_setup=True, save_output=False, skip_smartnoise_synth=True)

Functions:

raise_error #

raise_error(response: Response) -> Never

Raise error message based on the HTTP response.

Parameters:

  • response #

    (Response) –

    The response object from an HTTP request.

Raise

Server Error

Source code in client/lomas_client/utils.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
def raise_error(response: requests.Response) -> Never:
    """Raise error message based on the HTTP response.

    Args:
        response (requests.Response): The response object from an HTTP request.

    Raise:
        Server Error
    """
    try:
        error_model = LomasServerExceptionTypeAdapter.validate_python(response.json())
    except (ValidationError, JSONDecodeError) as e:
        raise InternalServerException(f"Could not parse server error: {response.content}") from e

    raise_error_from_model(error_model)

validate_model_response_direct #

validate_model_response_direct(response: Response, response_model: Any) -> Any

Validate and process a HTTP response.

Parameters:

  • response #

    (Response) –

    The response object from an HTTP request.

Returns:

  • response_model ( Any ) –

    Model for responses requests.

Source code in client/lomas_client/utils.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def validate_model_response_direct(response: requests.Response, response_model: Any) -> Any:
    """Validate and process a HTTP response.

    Args:
        response (requests.Response): The response object from an HTTP request.

    Returns:
        response_model: Model for responses requests.
    """
    if response.status_code == status.HTTP_200_OK:
        data = response.content.decode("utf8")
        r_model = response_model.model_validate_json(data)
        return r_model

    raise_error(response)

validate_model_response #

validate_model_response(
    client: LomasHttpClient, response: Response, response_model: type[ResponseT]
) -> ResponseT

Validate and process a HTTP response.

Parameters:

  • response #

    (Response) –

    The response object from an HTTP request.

Returns:

  • response_model ( ResponseT ) –

    Model for responses requests.

Source code in client/lomas_client/utils.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def validate_model_response(
    client: LomasHttpClient, response: requests.Response, response_model: type[ResponseT]
) -> ResponseT:
    """Validate and process a HTTP response.

    Args:
        response (requests.Response): The response object from an HTTP request.

    Returns:
        response_model: Model for responses requests.
    """
    if response.status_code != status.HTTP_202_ACCEPTED:
        raise_error(response)

    job_uid = response.json()["uid"]
    job = client.wait_for_job(job_uid)
    if job.status == "failed":
        assert job.error is not None, f"job {job_uid} failed without error !"
        raise_error_from_model(job.error)

    return response_model.model_validate(job.result)