Client

Classes:

Bound –

Any type that supports ordering comparisons (< and >).
Client –

Client class to send requests to the server.

Bound #


              flowchart TD
              lomas_client.client.Bound[Bound]

              

              click lomas_client.client.Bound href "" "lomas_client.client.Bound"

Any type that supports ordering comparisons (< and >).

Client #

Client(**kwargs: model_config)

Client class to send requests to the server.

Handle all serialisation and deserialisation steps

Parameters:

kwargs #
(model_config, default: {} ) –

All keyword arguments will be forwarded to the ClientConfig

Methods:

get_dataset_metadata –

This function retrieves metadata for the dataset.
get_column_metadata –

This function retrieves metadata for the column.
get_column_bounds –

This function retrieves metadata bounds for the column.
get_diffprivlib_bounds –

Get bounds for a list of columns in diffprivlib expected format.
get_dummy_dataset –

This function retrieves a dummy dataset with optional parameters.
get_context –

Create an OpenDP context based on a dummy dataset.
get_initial_budget –

This function retrieves the initial budget.
get_total_spent_budget –

This function retrieves the total spent budget.
get_remaining_budget –

This function retrieves the remaining budget.
get_previous_queries –

This function retrieves the previous queries of the user.

Source code in client/lomas_client/client.py

def __init__(self, **kwargs: ClientConfig.model_config):
    """Initializes the Client with the specified URL, dataset name and authentication parameters.

    Args:
        kwargs: All keyword arguments will be forwarded to the ClientConfig
    """
    try:
        self.config = ClientConfig(**kwargs)
    except ValidationError as exc:
        raise ValueError(
            "Missing client config parameters."
            "If you are using this library from a managed environment and don't know "
            "about your credentials or other parameters, please contact your system administrator."
        ) from exc

    if self.config.telemetry.enabled:
        LoggingInstrumentor().instrument(set_logging_format=True)
        init_telemetry(self.config.telemetry)

    self.http_client = LomasHttpClient(self.config)
    self.smartnoise_sql = SmartnoiseSQLClient(self.http_client)
    self.opendp = OpenDPClient(self.http_client)
    self.diffprivlib = DiffPrivLibClient(self.http_client)

    self.metadata: dict[str, Any] | None = None

get_dataset_metadata #

get_dataset_metadata() -> dict[str, Any]

This function retrieves metadata for the dataset.

Returns: A dictionary containing dataset metadata.

Source code in client/lomas_client/client.py

def get_dataset_metadata(self) -> dict[str, Any]:
    """This function retrieves metadata for the dataset.

    Returns: A dictionary containing dataset metadata.
    """
    if self.metadata is None:
        body_dict = {"dataset_name": self.config.dataset_name}
        body = LomasRequestModel.model_validate(body_dict)
        res = self.http_client.post("get_dataset_metadata", body)
        if res.status_code == status.HTTP_200_OK:
            metadata = TableMetadata.model_validate(res.json())
            self.metadata = metadata.to_dict()
            return self.metadata

        raise_error(res)
    return self.metadata

get_column_metadata #

get_column_metadata(column_name: str) -> dict[str, Any]

This function retrieves metadata for the column.

Returns: A dictionary containing column metadata.

Source code in client/lomas_client/client.py

def get_column_metadata(self, column_name: str) -> dict[str, Any]:
    """This function retrieves metadata for the column.

    Returns: A dictionary containing column metadata.
    """
    if self.metadata is None:
        self.metadata = self.get_dataset_metadata()

    try:
        return next(col for col in self.metadata[TABLE_SCHEMA][COL_LIST] if col[COL_NAME] == column_name)
    except StopIteration as err:
        available = [col[COL_NAME] for col in self.metadata[TABLE_SCHEMA][COL_LIST]]
        raise ValueError(f"Column '{column_name}' not found. Available columns: {available}") from err

get_column_bounds #

get_column_bounds(column_name: str) -> tuple[T, T]

This function retrieves metadata bounds for the column.

Returns: A tuple of (minimum_bound, maximum_bound)

Source code in client/lomas_client/client.py

def get_column_bounds(self, column_name: str) -> tuple[T, T]:
    """This function retrieves metadata  bounds for the column.

    Returns: A tuple of (minimum_bound, maximum_bound)
    """
    column = self.get_column_metadata(column_name)

    minimum = column.get(MINIMUM)
    maximum = column.get(MAXIMUM)

    if minimum is None or maximum is None:
        raise ValueError(f"Column '{column_name}' does not have bounds.")

    return minimum, maximum

get_diffprivlib_bounds #

get_diffprivlib_bounds(columns: list[str]) -> tuple[list[int | float], list[int | float]]

Get bounds for a list of columns in diffprivlib expected format.

Source code in client/lomas_client/client.py

def get_diffprivlib_bounds(self, columns: list[str]) -> tuple[list[int | float], list[int | float]]:
    """Get bounds for a list of columns in diffprivlib expected format."""
    if self.metadata is None:
        self.metadata = self.get_dataset_metadata()

    cols = self.metadata[TABLE_SCHEMA][COL_LIST]
    col_map = {col[COL_NAME]: col for col in cols}

    lower, upper = [], []
    for col in columns:
        if col not in col_map:
            raise ValueError(f"Column '{col}' not found")

        metadata = col_map[col]

        if MINIMUM not in metadata or MAXIMUM not in metadata:
            raise ValueError(f"Column '{col}' does not have bounds")

        lower.append(metadata[MINIMUM])
        upper.append(metadata[MAXIMUM])

    return lower, upper

get_dummy_dataset #

get_dummy_dataset(
    nb_rows: int = DUMMY_NB_ROWS, seed: int = DUMMY_SEED, lazy: bool = False
) -> DataFrame | LazyFrame

This function retrieves a dummy dataset with optional parameters.

Parameters:

nb_rows #
(int, default: DUMMY_NB_ROWS ) –

The number of rows in the dummy dataset. Defaults to DUMMY_NB_ROWS.
seed #
(int, default: DUMMY_SEED ) –

The random seed for generating the dummy dataset. Defaults to DUMMY_SEED.
lazy #
(bool, default: False ) –

If True, return a polars LazyFrame. Defaults to False (pandas DataFrame)

Returns:

DataFrame | LazyFrame –

pd.DataFrame | pl.LazyFrame: A Pandas DataFrame representing
DataFrame | LazyFrame –

the dummy dataset (optionally in LazyFrame format).

Source code in client/lomas_client/client.py

def get_dummy_dataset(
    self,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
    lazy: bool = False,
) -> pd.DataFrame | pl.LazyFrame:
    """This function retrieves a dummy dataset with optional parameters.

    Args:
        nb_rows (int, optional): The number of rows in the dummy dataset.
            Defaults to DUMMY_NB_ROWS.
        seed (int, optional): The random seed for generating the dummy dataset.
            Defaults to DUMMY_SEED.
        lazy (bool, optional): If True, return a polars LazyFrame.
            Defaults to False (pandas DataFrame)

    Returns:
        pd.DataFrame | pl.LazyFrame: A Pandas DataFrame representing
        the dummy dataset (optionally in LazyFrame format).
    """
    body_dict = {
        "dataset_name": self.config.dataset_name,
        "dummy_nb_rows": nb_rows,
        "dummy_seed": seed,
    }
    body = GetDummyDataset.model_validate(body_dict)
    res = self.http_client.post("get_dummy_dataset", body)

    if res.status_code == status.HTTP_200_OK:
        data = res.content.decode("utf8")
        dummy_df = DummyDsResponse.model_validate_json(data).dummy_df
        return pl.from_pandas(dummy_df).lazy() if lazy else dummy_df

    raise_error(res)

get_context #

get_context(
    epsilon: float | None = None, delta: float | None = None, rho: float | None = None
) -> Context

Create an OpenDP context based on a dummy dataset.

This can be used to build an OpenDP pipeline locally on the client side.

Parameters:

epsilon #
(float | None, default: None ) –

Privacy parameter to be spent. Required for pure DP or approximate DP (Laplace mechanism). Defaults to None.
delta #
(float | None, default: None ) –

Required if the pipeline measurement uses ZeroConcentratedDivergence (e.g., with make_gaussian) and is converted to SmoothedMaxDivergence using make_zCDP_to_approxDP. See: https://docs.smartnoise.org/sql/advanced.html#postprocess Defaults to None.
rho #
(float | None, default: None ) –

Privacy parameter used for zCDP or approximate zCDP (Gaussian mechanism). Cannot be used if epsilon is provided.

Returns:

Context –

dp.Context: OpenDP context object initialized with metadata and
Context –

user-provided privacy parameters.

Source code in client/lomas_client/client.py

def get_context(
    self,
    epsilon: float | None = None,
    delta: float | None = None,
    rho: float | None = None,
) -> dp.Context:
    """
    Create an OpenDP context based on a dummy dataset.

    This can be used to build an OpenDP pipeline locally on the client side.

    Args:
        epsilon (float | None, optional): Privacy parameter to be spent.
            Required for pure DP or approximate DP (Laplace mechanism).
            Defaults to None.
        delta (float | None, optional): Required if the pipeline measurement
            uses ZeroConcentratedDivergence (e.g., with make_gaussian) and is
            converted to SmoothedMaxDivergence using
            make_zCDP_to_approxDP. See:
            https://docs.smartnoise.org/sql/advanced.html#postprocess
            Defaults to None.
        rho (float | None, optional): Privacy parameter used for zCDP or
            approximate zCDP (Gaussian mechanism). Cannot be used if
            epsilon is provided.

    Returns:
        dp.Context: OpenDP context object initialized with metadata and
        user-provided privacy parameters.
    """
    dummy_lf = self.get_dummy_dataset(lazy=True)
    if self.metadata is None:
        self.metadata = self.get_dataset_metadata()

    return csvw_to_opendp_context(
        self.metadata, dummy_lf, epsilon=epsilon, delta=delta, rho=rho, split_evenly_over=1
    )

get_initial_budget #

get_initial_budget() -> InitialBudgetResponse

This function retrieves the initial budget.

Returns:

InitialBudgetResponse ( InitialBudgetResponse ) –

A dictionary containing the initial budget.

Source code in client/lomas_client/client.py

def get_initial_budget(self) -> InitialBudgetResponse:
    """This function retrieves the initial budget.

    Returns:
        InitialBudgetResponse: A dictionary
            containing the initial budget.
    """
    body_dict = {"dataset_name": self.config.dataset_name}

    body = LomasRequestModel.model_validate(body_dict)
    res = self.http_client.post("get_initial_budget", body)

    return validate_model_response_direct(res, InitialBudgetResponse)

get_total_spent_budget #

get_total_spent_budget() -> SpentBudgetResponse

This function retrieves the total spent budget.

Returns:

SpentBudgetResponse ( SpentBudgetResponse ) –

A dictionary containing the total spent budget.

Source code in client/lomas_client/client.py

def get_total_spent_budget(self) -> SpentBudgetResponse:
    """This function retrieves the total spent budget.

    Returns:
        SpentBudgetResponse: A dictionary containing
            the total spent budget.
    """
    body_dict = {"dataset_name": self.config.dataset_name}

    body = LomasRequestModel.model_validate(body_dict)
    res = self.http_client.post("get_total_spent_budget", body)

    return validate_model_response_direct(res, SpentBudgetResponse)

get_remaining_budget #

get_remaining_budget() -> RemainingBudgetResponse

This function retrieves the remaining budget.

Returns:

RemainingBudgetResponse ( RemainingBudgetResponse ) –

A dictionary containing the remaining budget.

Source code in client/lomas_client/client.py

def get_remaining_budget(self) -> RemainingBudgetResponse:
    """This function retrieves the remaining budget.

    Returns:
        RemainingBudgetResponse: A dictionary
            containing the remaining budget.
    """
    body_dict = {"dataset_name": self.config.dataset_name}

    body = LomasRequestModel.model_validate(body_dict)
    res = self.http_client.post("get_remaining_budget", body)

    return validate_model_response_direct(res, RemainingBudgetResponse)

get_previous_queries #

get_previous_queries() -> list[dict]

This function retrieves the previous queries of the user.

Raises:

ValueError –

If an unknown query type is encountered during deserialization.

Returns:

list[dict] –

List[dict]: A list of dictionary containing
list[dict] –

the different queries on the private dataset.

Source code in client/lomas_client/client.py

def get_previous_queries(self) -> list[dict]:
    """This function retrieves the previous queries of the user.

    Raises:
        ValueError: If an unknown query type is encountered
            during deserialization.

    Returns:
        List[dict]: A list of dictionary containing
        the different queries on the private dataset.
    """
    body_dict = {"dataset_name": self.config.dataset_name}

    body = LomasRequestModel.model_validate(body_dict)
    res = self.http_client.post("get_previous_queries", body)

    if res.status_code == status.HTTP_200_OK:
        queries = json.loads(res.content.decode("utf8"))["previous_queries"]

        if not queries:
            return queries

        deserialised_queries = []
        for query in queries:
            match query["dp_library"]:
                case DPLibraries.SMARTNOISE_SQL:
                    pass
                case DPLibraries.OPENDP:
                    query_json = OpenDPQueryModel.model_validate(query["client_input"])
                    serialized_bytes = base64.b64decode(query_json.opendp_json)
                    query["client_input"]["opendp_json"] = pl.LazyFrame.deserialize(
                        io.BytesIO(serialized_bytes)
                    )
                case DPLibraries.DIFFPRIVLIB:
                    model = base64.b64decode(query["response"]["result"]["model"])
                    query["response"]["result"]["model"] = pickle.loads(model)
                case _:
                    raise ValueError(f"Cannot deserialise unknown query type: {query['dp_library']}")

            deserialised_queries.append(query)

        return deserialised_queries

    raise_error(res)

Classes:

LomasHttpClient –

A client for interacting with the Lomas API.

LomasHttpClient #

LomasHttpClient(config: ClientConfig)

A client for interacting with the Lomas API.

Used by:

API Reference Client

Methods:

post –

Executes a POST request to endpoint with the provided JSON body.
wait_for_job –

Periodically query the job endpoint sleeping in between until it completes / times-out.

Source code in client/lomas_client/http_client.py

def __init__(self, config: ClientConfig) -> None:
    """Initializes the HTTP client with the specified URL, dataset name and authentication parameters."""
    if config.telemetry.enabled:
        RequestsInstrumentor().instrument()

    self.headers = {"Content-type": "application/json", "Accept": "*/*"}
    self.config = config

    if not self.config.oidc_use_tls or not self.config.lomas_service_use_tls:
        logger.warning("OIDC IdP or Lomas service configured without TLS -> using insecure transport")

    self._oauth2_session = OAuth2Session(
        client_id="lomas_client",
        token_endpoint=self.config.oidc_config.token_endpoint,
        scope=OIDC_REQUIRED_SCOPES,
        update_token=self._save_token,
        token=self._load_token(),
        token_endpoint_auth_method="none",
        leeway=30,  # refresh token 30 seconds before expiry
    )

    try:
        self._oauth2_session.refresh_token()
    except (OAuth2Error, OAuthError, AttributeError, requests.HTTPError):
        # Fallback to authorize
        # We catch http errors because dex fails when it cannot link a token to existing user.
        # We catch attribute error in case the token is none
        self._authorize()

post #

post(
    endpoint: str, body: LomasRequestModel, read_timeout: int = DEFAULT_READ_TIMEOUT
) -> Response

Executes a POST request to endpoint with the provided JSON body.

Handles authorization to the api by automatically fetching a token if required.

Parameters:

endpoint #
(str) –

The API endpoint to which the request will be sent.
body #
(LomasRequestModel) –

The body to include in the POST request.
read_timeout #
(int, default: DEFAULT_READ_TIMEOUT ) –

number of seconds that client wait for the server to send a response. Defaults to DEFAULT_READ_TIMEOUT.

Returns:

Response –

requests.Response: The response object resulting from the POST request.

Source code in client/lomas_client/http_client.py

def post(
    self,
    endpoint: str,
    body: LomasRequestModel,
    read_timeout: int = DEFAULT_READ_TIMEOUT,
) -> requests.Response:
    """Executes a POST request to endpoint with the provided JSON body.

    Handles authorization to the api by automatically fetching a token if required.

    Args:
        endpoint (str): The API endpoint to which the request will be sent.
        body (LomasRequestModel): The body to include in the POST request.
        read_timeout (int): number of seconds that client wait for the server
            to send a response.
            Defaults to DEFAULT_READ_TIMEOUT.

    Returns:
        requests.Response: The response object resulting from the POST request.
    """
    logger.debug(
        f"User '{self.config.user_name}') is making a request "
        + f"to url '{self.config.app_url}' "
        + f"at the endpoint '{endpoint}' "
        + f"with query params: {body.model_dump()}."
    )

    try:
        r = self._oauth2_session.post(
            url_append(self.config.app_url, endpoint),
            json=body.model_dump(),
            headers=self.headers,
            timeout=(CONNECT_TIMEOUT, read_timeout),
        )
    except OAuth2Error:
        # Handle expired refresh token
        self._authorize()

        r = self._oauth2_session.post(
            url_append(self.config.app_url, endpoint),
            json=body.model_dump(),
            headers=self.headers,
            timeout=(CONNECT_TIMEOUT, read_timeout),
        )
    return r

wait_for_job #

wait_for_job(job_uid: str, n_retry: int = 1800, sleep_sec: float = 1) -> Job

Periodically query the job endpoint sleeping in between until it completes / times-out.

Source code in client/lomas_client/http_client.py

def wait_for_job(self, job_uid: str, n_retry: int = 1800, sleep_sec: float = 1) -> Job:
    """Periodically query the job endpoint sleeping in between until it completes / times-out."""
    for _ in range(n_retry):
        try:
            job_query = self._oauth2_session.get(
                url_append(self.config.app_url, f"/status/{job_uid}"),
                headers=self.headers,
                timeout=(CONNECT_TIMEOUT),
            ).json()
        except OAuth2Error:
            # Handle expired refresh token
            self._authorize()
            job_query = self._oauth2_session.get(
                url_append(self.config.app_url, f"/status/{job_uid}"),
                headers=self.headers,
                timeout=(CONNECT_TIMEOUT),
            ).json()
        # Check for error before accessing "status"
        if "status" in job_query and job_query["status"] in {"complete", "failed"}:
            return Job.model_validate(job_query)

        time.sleep(sleep_sec)

    raise TimeoutError(f"Job {job_uid} didn't complete in time ({sleep_sec * n_retry})")

Classes:

DiffPrivLibClient –

A client for executing and estimating the cost of DiffPrivLib queries.

DiffPrivLibClient #

DiffPrivLibClient(http_client: LomasHttpClient)

A client for executing and estimating the cost of DiffPrivLib queries.

Methods:

cost –

This function estimates the cost of executing a DiffPrivLib query.
query –

Trains a DiffPrivLib pipeline and return a trained Pipeline.

Source code in client/lomas_client/libraries/diffprivlib.py

def __init__(self, http_client: LomasHttpClient) -> None:
    self.http_client = http_client

cost #

cost(
    pipeline: Pipeline,
    feature_columns: list[str] | None = None,
    target_columns: list[str] | None = None,
    test_size: float = 0.2,
    test_train_split_seed: int = 1,
    imputer_strategy: str = "drop",
) -> CostResponse

This function estimates the cost of executing a DiffPrivLib query.

Parameters:

pipeline #
(pipeline) –
DiffPrivLib pipeline with three conditions: - The pipeline MUST start with a models.StandardScaler. Otherwise a PrivacyLeakWarning is raised by DiffPrivLib library and is treated as an error in lomas server.
- random_state fields can only be int (RandomState will not work).
- accountant fields must be None.
Note: as in DiffPrivLib, avoid any DiffprivlibCompatibilityWarning to ensure that the pipeline does what is intended.
feature_columns #
(list[str], default: None ) –

the list of feature column to train
target_columns #
(list[str], default: None ) –

the list of target column to predict May be None for certain models.
test_size #
(float, default: 0.2 ) –

proportion of the test set Defaults to 0.2.
test_train_split_seed #
(int, default: 1 ) –

seed for random train test split Defaults to 1.
imputer_strategy #
(str, default: 'drop' ) –

imputation strategy. Defaults to "drop". "drop": will drop all rows with missing values "mean": will replace values by the mean of the column values "median": will replace values by the median of the column values "most_frequent": will replace values by the most frequent values

Returns:

CostResponse –

Optional[dict[str, float]]: A dictionary containing the estimated cost.

Source code in client/lomas_client/libraries/diffprivlib.py

def cost(
    self,
    pipeline: Pipeline,
    feature_columns: list[str] | None = None,
    target_columns: list[str] | None = None,
    test_size: float = 0.2,
    test_train_split_seed: int = 1,
    imputer_strategy: str = "drop",
) -> CostResponse:
    """This function estimates the cost of executing a DiffPrivLib query.

    Args:
        pipeline (sklearn.pipeline): DiffPrivLib pipeline with three conditions:
            - The pipeline MUST start with a `models.StandardScaler`.
            Otherwise a PrivacyLeakWarning is raised by DiffPrivLib library and
            is treated as an error in lomas server.

            - `random_state` fields can only be int (`RandomState` will not work).
            - `accountant` fields must be None.

            Note: as in DiffPrivLib, avoid any DiffprivlibCompatibilityWarning
            to ensure that the pipeline does what is intended.
        feature_columns (list[str]): the list of feature column to train
        target_columns (list[str], optional): the list of target column to predict
            May be None for certain models.
        test_size (float, optional): proportion of the test set
            Defaults to 0.2.
        test_train_split_seed (int, optional): seed for random train test split
            Defaults to 1.
        imputer_strategy (str, optional): imputation strategy. Defaults to "drop".
            "drop": will drop all rows with missing values
            "mean": will replace values by the mean of the column values
            "median": will replace values by the median of the column values
            "most_frequent": will replace values by the most frequent values

    Returns:
        Optional[dict[str, float]]: A dictionary containing the estimated cost.
    """
    if target_columns is None:
        target_columns = [""]
    if feature_columns is None:
        feature_columns = [""]
    body_dict = {
        "dataset_name": self.http_client.config.dataset_name,
        "diffprivlib_json": serialise_pipeline(pipeline),
        "feature_columns": feature_columns,
        "target_columns": target_columns,
        "test_size": test_size,
        "test_train_split_seed": test_train_split_seed,
        "imputer_strategy": imputer_strategy,
    }

    body = DiffPrivLibRequestModel.model_validate(body_dict)
    res = self.http_client.post("estimate_diffprivlib_cost", body)

    return validate_model_response(self.http_client, res, CostResponse)

query #

query(
    pipeline: Pipeline,
    feature_columns: list[str],
    target_columns: list[str] | None = None,
    test_size: float = 0.2,
    test_train_split_seed: int = 1,
    imputer_strategy: str = "drop",
    dummy: bool = False,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse

Trains a DiffPrivLib pipeline and return a trained Pipeline.

Parameters:

pipeline #
(pipeline) –

DiffPrivLib pipeline with three conditions: - The pipeline MUST start with a models.StandardScaler. Otherwise a PrivacyLeakWarning is raised by DiffPrivLib library and is treated as an error in lomas server. - random_state fields can only be int (RandomState will not work). - accountant fields must be None.

Note: as in DiffPrivLib, avoid any DiffprivlibCompatibilityWarning to ensure that the pipeline does what is intended.
feature_columns #
(list[str]) –

the list of feature column to train
target_columns #
(list[str], default: None ) –

the list of target column to predict May be None for certain models.
test_size #
(float, default: 0.2 ) –

proportion of the test set Defaults to 0.2.
test_train_split_seed #
(int, default: 1 ) –

seed for random train test split Defaults to 1.
imputer_strategy #
(str, default: 'drop' ) –

imputation strategy. Defaults to "drop". "drop": will drop all rows with missing values "mean": will replace values by the mean of the column values "median": will replace values by the median of the column values "most_frequent": : will replace values by the most frequent values
dummy #
(bool, default: False ) –

Whether to use a dummy dataset. Defaults to False.
nb_rows #
(int, default: DUMMY_NB_ROWS ) –

The number of rows in the dummy dataset. Defaults to DUMMY_NB_ROWS.
seed #
(int, default: DUMMY_SEED ) –

The random seed for generating the dummy dataset. Defaults to DUMMY_SEED.

Returns:

QueryResponse –

Optional[Pipeline]: A trained DiffPrivLip pipeline

Source code in client/lomas_client/libraries/diffprivlib.py

def query(
    self,
    pipeline: Pipeline,
    feature_columns: list[str],
    target_columns: list[str] | None = None,
    test_size: float = 0.2,
    test_train_split_seed: int = 1,
    imputer_strategy: str = "drop",
    dummy: bool = False,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse:
    """Trains a DiffPrivLib pipeline and return a trained Pipeline.

    Args:
        pipeline (sklearn.pipeline): DiffPrivLib pipeline with three conditions:
            - The pipeline MUST start with a `models.StandardScaler`.
            Otherwise a PrivacyLeakWarning is raised by DiffPrivLib library and
            is treated as an error in lomas server.
            - `random_state` fields can only be int (`RandomState` will not work).
            - `accountant` fields must be None.

            Note: as in DiffPrivLib, avoid any DiffprivlibCompatibilityWarning
            to ensure that the pipeline does what is intended.
        feature_columns (list[str]): the list of feature column to train
        target_columns (list[str], optional): the list of target column to predict
            May be None for certain models.
        test_size (float, optional): proportion of the test set
            Defaults to 0.2.
        test_train_split_seed (int, optional): seed for random train test split
            Defaults to 1.
        imputer_strategy (str, optional): imputation strategy. Defaults to "drop".
            "drop": will drop all rows with missing values
            "mean": will replace values by the mean of the column values
            "median": will replace values by the median of the column values
            "most_frequent": : will replace values by the most frequent values
        dummy (bool, optional): Whether to use a dummy dataset. Defaults to False.
        nb_rows (int, optional): The number of rows in the dummy dataset.
            Defaults to DUMMY_NB_ROWS.
        seed (int, optional): The random seed for generating the dummy dataset.
            Defaults to DUMMY_SEED.

    Returns:
        Optional[Pipeline]: A trained DiffPrivLip pipeline
    """
    body_dict = {
        "dataset_name": self.http_client.config.dataset_name,
        "diffprivlib_json": serialise_pipeline(pipeline),
        "feature_columns": feature_columns,
        "target_columns": target_columns,
        "test_size": test_size,
        "test_train_split_seed": test_train_split_seed,
        "imputer_strategy": imputer_strategy,
    }

    request_model: type[DiffPrivLibRequestModel]
    if dummy:
        endpoint = "dummy_diffprivlib_query"
        body_dict["dummy_nb_rows"] = nb_rows
        body_dict["dummy_seed"] = seed
        request_model = DiffPrivLibDummyQueryModel
    else:
        endpoint = "diffprivlib_query"
        request_model = DiffPrivLibQueryModel

    body = request_model.model_validate(body_dict)
    res = self.http_client.post(endpoint, body)

    return validate_model_response(self.http_client, res, QueryResponse)

Classes:

OpenDPClient –

A client for executing and estimating the cost of OpenDP queries.

OpenDPClient #

OpenDPClient(http_client: LomasHttpClient)

A client for executing and estimating the cost of OpenDP queries.

Methods:

cost –

This function estimates the cost of executing an OpenDP query.
query –

This function executes an OpenDP query.

Source code in client/lomas_client/libraries/opendp.py

def __init__(self, http_client: LomasHttpClient) -> None:
    self.http_client = http_client

cost #

cost(
    opendp_pipeline: LazyFrameQuery | LazyFrame,
    epsilon: float | None = None,
    delta: float | None = None,
    rho: float | None = None,
    approx_zcdp: bool = True,
) -> CostResponse

This function estimates the cost of executing an OpenDP query.

Parameters:

opendp_pipeline #
(Measurement) –

The OpenDP pipeline for the query.
epsilon #
(float, default: None ) –

Privacy parameter that will be spent. For pure-DP or approximate DP this must be set. (Laplace mechanism)
delta #
(Optional[float], default: None ) –

If the pipeline measurement is of type “ZeroConcentratedDivergence” (e.g. with make_gaussian) then it is converted to “SmoothedMaxDivergence” with make_zCDP_to_approxDP (See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#postprocess>__). In that case a delta must be provided by the user. Defaults to None.
rho #
(float, default: None ) –

Privacy parameter used for zCDP or approximate-zCDP (Gaussian mechanism). Cannot be used if epsilon is not None.
approx_zcdp #
(bool, default: True ) –

If false, delta is used to compute the epsilon consumption equivalent when user wants to use zCDP. Default True.

Raises: Exception: If the opendp_pipeline type is not suppported.

Returns:

CostResponse ( CostResponse ) –

The estimated cost.

Source code in client/lomas_client/libraries/opendp.py

def cost(
    self,
    opendp_pipeline: dp.extras.polars.LazyFrameQuery | pl.LazyFrame,
    epsilon: float | None = None,
    delta: float | None = None,
    rho: float | None = None,
    approx_zcdp: bool = True,
) -> CostResponse:
    """This function estimates the cost of executing an OpenDP query.

    Args:
        opendp_pipeline (dp.Measurement): The OpenDP pipeline for the query.
        epsilon (float): Privacy parameter that will be spent. For pure-DP or approximate DP\
             this must be set. (Laplace mechanism)
        delta (Optional[float], optional): If the pipeline measurement is of\
            type “ZeroConcentratedDivergence” (e.g. with make_gaussian) then it is\
            converted to “SmoothedMaxDivergence” with make_zCDP_to_approxDP\
            (`See Smartnoise-SQL postprocessing documentation.\
            <https://docs.smartnoise.org/sql/advanced.html#postprocess>`__).\
            In that case a delta must be provided by the user.\
            Defaults to None.
        rho (float): Privacy parameter used for zCDP or approximate-zCDP (Gaussian mechanism).\
             Cannot be used if epsilon is not None.
        approx_zcdp (bool): If false, delta is used to compute the epsilon consumption equivalent when user wants to use zCDP.
            Default True.
    Raises:
        Exception: If the opendp_pipeline type is not suppported.

    Returns:
        CostResponse: The estimated cost.
    """
    body_json = self._get_opendp_request_body(
        opendp_pipeline,
        epsilon=epsilon,
        delta=delta,
        rho=rho,
        approx_zcdp=approx_zcdp,
    )
    body = OpenDPRequestModel.model_validate(body_json)
    res = self.http_client.post("estimate_opendp_cost", body)

    return validate_model_response(self.http_client, res, CostResponse)

query #

query(
    opendp_pipeline: LazyFrameQuery | LazyFrame,
    epsilon: float | None = None,
    delta: float | None = None,
    rho: float | None = None,
    approx_zcdp: bool = True,
    dummy: bool = False,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse

This function executes an OpenDP query.

Parameters:

opendp_pipeline #
(Measurement) –

The OpenDP pipeline for the query. Can be a dp.Measurement or a polars LazyFrame (plan) for opendp.polars pipelines.
epsilon #
(float, default: None ) –

Privacy parameter that will be spent. For pure-DP or approximate DP this must be set. (Laplace mechanism)
delta #
(Optional[float], default: None ) –

If the pipeline measurement is of type “ZeroConcentratedDivergence” (e.g. with make_gaussian) then it is converted to “SmoothedMaxDivergence” with make_zCDP_to_approxDP (See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#postprocess>__). In that case a delta must be provided by the user. Defaults to None.
rho #
(float, default: None ) –

Privacy parameter used for zCDP or approximate-zCDP (Gaussian mechanism). Cannot be used if epsilon is not None.
approx_zcdp #
(bool, default: True ) –

If false, delta is used to compute the epsilon consumption equivalent when user wants to use zCDP. Default True.
dummy #
(bool, default: False ) –

Whether to use a dummy dataset. Defaults to False.
nb_rows #
(int, default: DUMMY_NB_ROWS ) –

The number of rows in the dummy dataset. Defaults to DUMMY_NB_ROWS.
seed #
(int, default: DUMMY_SEED ) –

The random seed for generating the dummy dataset. Defaults to DUMMY_SEED.

Raises:

Exception –

If the opendp_pipeline type is not suppported.

Returns:

QueryResponse ( QueryResponse ) –

A dictionary of the response body containing the deserialized pipeline result.

Source code in client/lomas_client/libraries/opendp.py

def query(
    self,
    opendp_pipeline: dp.extras.polars.LazyFrameQuery | pl.LazyFrame,
    epsilon: float | None = None,
    delta: float | None = None,
    rho: float | None = None,
    approx_zcdp: bool = True,
    dummy: bool = False,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse:
    """This function executes an OpenDP query.

    Args:
        opendp_pipeline (dp.Measurement): The OpenDP pipeline for the query. \
            Can be a dp.Measurement or a polars LazyFrame (plan) for opendp.polars\
            pipelines.
        epsilon (float): Privacy parameter that will be spent. For pure-DP or approximate DP\
             this must be set. (Laplace mechanism)
        delta (Optional[float], optional): If the pipeline measurement is of\
            type “ZeroConcentratedDivergence” (e.g. with make_gaussian) then it is\
            converted to “SmoothedMaxDivergence” with make_zCDP_to_approxDP\
            (`See Smartnoise-SQL postprocessing documentation.
            <https://docs.smartnoise.org/sql/advanced.html#postprocess>`__).
            In that case a delta must be provided by the user.
            Defaults to None.
        rho (float): Privacy parameter used for zCDP or approximate-zCDP (Gaussian mechanism).\
             Cannot be used if epsilon is not None.
        approx_zcdp (bool): If false, delta is used to compute the epsilon consumption equivalent when user wants to use zCDP.
            Default True.
        dummy (bool, optional): Whether to use a dummy dataset. Defaults to False.
        nb_rows (int, optional): The number of rows in the dummy dataset.\
            Defaults to DUMMY_NB_ROWS.
        seed (int, optional): The random seed for generating the dummy dataset.\
        Defaults to DUMMY_SEED.

    Raises:
        Exception: If the opendp_pipeline type is not suppported.

    Returns:
        QueryResponse: A dictionary of the response body containing the deserialized pipeline result.
    """
    body_json = self._get_opendp_request_body(
        opendp_pipeline,
        epsilon=epsilon,
        delta=delta,
        rho=rho,
        approx_zcdp=approx_zcdp,
    )

    request_model: type[OpenDPRequestModel]
    if dummy:
        endpoint = "dummy_opendp_query"
        body_json["dummy_nb_rows"] = nb_rows
        body_json["dummy_seed"] = seed
        request_model = OpenDPDummyQueryModel
    else:
        endpoint = "opendp_query"
        request_model = OpenDPQueryModel

    body = request_model.model_validate(body_json)
    res = self.http_client.post(endpoint, body)

    return validate_model_response(self.http_client, res, QueryResponse)

Classes:

SmartnoiseSQLClient –

A client for executing and estimating the cost of SmartNoise SQL queries.

SmartnoiseSQLClient #

SmartnoiseSQLClient(http_client: LomasHttpClient)

A client for executing and estimating the cost of SmartNoise SQL queries.

Methods:

cost –

This function estimates the cost of executing a SmartNoise query.
query –

This function executes a SmartNoise SQL query.

Source code in client/lomas_client/libraries/smartnoise_sql.py

def __init__(self, http_client: LomasHttpClient) -> None:
    self.http_client = http_client

cost #

cost(
    query: str, epsilon: float, delta: float, mechanisms: dict[str, str] | None = None
) -> CostResponse

This function estimates the cost of executing a SmartNoise query.

Parameters:

query #
(str) –

The SQL query to estimate the cost for. NOTE: the table name is df, the query must end with “FROM df”.
epsilon #
(float) –

Privacy parameter (e.g., 0.1).
delta #
(float) –

Privacy parameter (e.g., 1e-5). mechanisms (dict[str, str], optional): Dictionary of mechanisms for the query See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#postprocess>__ Defaults to {}.

Returns:

CostResponse ( CostResponse ) –

The estimated cost.

Source code in client/lomas_client/libraries/smartnoise_sql.py

def cost(
    self,
    query: str,
    epsilon: float,
    delta: float,
    mechanisms: dict[str, str] | None = None,
) -> CostResponse:
    """This function estimates the cost of executing a SmartNoise query.

    Args:
        query (str): The SQL query to estimate the cost for. NOTE: the table name \
            is df, the query must end with “FROM df”.
        epsilon (float): Privacy parameter (e.g., 0.1).
        delta (float): Privacy parameter (e.g., 1e-5).
            mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\
            query `See Smartnoise-SQL postprocessing documentation.
            <https://docs.smartnoise.org/sql/advanced.html#postprocess>`__
            Defaults to {}.

    Returns:
        CostResponse: The estimated cost.
    """
    if mechanisms is None:
        mechanisms = {}
    body_dict = {
        "query_str": query,
        "dataset_name": self.http_client.config.dataset_name,
        "epsilon": epsilon,
        "delta": delta,
        "mechanisms": mechanisms,
    }
    body = SmartnoiseSQLRequestModel.model_validate(body_dict)
    res = self.http_client.post("estimate_smartnoise_sql_cost", body)

    return validate_model_response(self.http_client, res, CostResponse)

query #

query(
    query: str,
    epsilon: float,
    delta: float,
    mechanisms: dict[str, str] | None = None,
    postprocess: bool = True,
    dummy: bool = False,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse

This function executes a SmartNoise SQL query.

Parameters:

query #
(str) –

The SQL query to execute. NOTE: the table name is df, the query must end with “FROM df”.
epsilon #
(float) –

Privacy parameter (e.g., 0.1).
delta #
(float) –

Privacy parameter (e.g., 1e-5).
mechanisms #
(dict[str, str], default: None ) –

Dictionary of mechanisms for the query See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#overriding-mechanisms>__

Defaults to {}.
postprocess #
(bool, default: True ) –

Whether to postprocess the query results. See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#postprocess>__

Defaults to True.
dummy #
(bool, default: False ) –

Whether to use a dummy dataset.

Defaults to False.
nb_rows #
(int, default: DUMMY_NB_ROWS ) –

The number of rows in the dummy dataset.

Defaults to DUMMY_NB_ROWS.
seed #
(int, default: DUMMY_SEED ) –

The random seed for generating the dummy dataset.

Defaults to DUMMY_SEED.

Returns:

QueryResponse ( QueryResponse ) –

A Pandas DataFrame containing the query results.

Source code in client/lomas_client/libraries/smartnoise_sql.py

def query(
    self,
    query: str,
    epsilon: float,
    delta: float,
    mechanisms: dict[str, str] | None = None,
    postprocess: bool = True,
    dummy: bool = False,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse:
    """This function executes a SmartNoise SQL query.

    Args:
        query (str): The SQL query to execute.
            NOTE: the table name is df, the query must end with “FROM df”.
        epsilon (float): Privacy parameter (e.g., 0.1).
        delta (float): Privacy parameter (e.g., 1e-5).
        mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\
            query `See Smartnoise-SQL postprocessing documentation.
            <https://docs.smartnoise.org/sql/advanced.html#overriding-mechanisms>`__

            Defaults to {}.
        postprocess (bool, optional): Whether to postprocess the query results.\
            `See Smartnoise-SQL postprocessing documentation.
            <https://docs.smartnoise.org/sql/advanced.html#postprocess>`__

            Defaults to True.
        dummy (bool, optional): Whether to use a dummy dataset.

            Defaults to False.
        nb_rows (int, optional): The number of rows in the dummy dataset.

            Defaults to DUMMY_NB_ROWS.
        seed (int, optional): The random seed for generating the dummy dataset.

            Defaults to DUMMY_SEED.

    Returns:
        QueryResponse: A Pandas DataFrame containing the query results.
    """
    if mechanisms is None:
        mechanisms = {}
    body_dict = {
        "query_str": query,
        "dataset_name": self.http_client.config.dataset_name,
        "epsilon": epsilon,
        "delta": delta,
        "mechanisms": mechanisms,
        "postprocess": postprocess,
    }

    request_model: type[SmartnoiseSQLRequestModel]
    if dummy:
        endpoint = "dummy_smartnoise_sql_query"
        body_dict["dummy_nb_rows"] = nb_rows
        body_dict["dummy_seed"] = seed
        request_model = SmartnoiseSQLDummyQueryModel
    else:
        endpoint = "smartnoise_sql_query"
        request_model = SmartnoiseSQLQueryModel

    body = request_model.model_validate(body_dict)
    res = self.http_client.post(endpoint, body)

    return validate_model_response(self.http_client, res, QueryResponse)

Classes:

SmartnoiseSynthClient –

A client for executing and estimating the cost of SmartNoiseSynth queries.

SmartnoiseSynthClient #

SmartnoiseSynthClient(http_client: LomasHttpClient)

A client for executing and estimating the cost of SmartNoiseSynth queries.

Methods:

cost –

This function estimates the cost of executing a SmartNoise query.
query –

This function executes a SmartNoise Synthetic query.

Source code in client/lomas_client/libraries/smartnoise_synth.py

def __init__(self, http_client: LomasHttpClient) -> None:
    self.http_client = http_client

cost #

cost(
    synth_name: str,
    epsilon: float,
    delta: float | None = None,
    select_cols: list[str] | None = None,
    synth_params: dict | None = None,
    nullable: bool = True,
    constraints: dict | None = None,
) -> CostResponse

This function estimates the cost of executing a SmartNoise query.

Parameters:

synth_name #
(str) –

name of the Synthesizer model to use. Available synthesizer are - "aim", - "mwem", - "dpctgan" with disabled_dp=False and warning (cryptographically secure random generator) - "patectgan" - "dpgan" with warning (cryptographically secure random generator)

Available under certain conditions: - "mst" if return_model=False - "pategan" if the dataset has enough rows

Not available: - "pacsynth" due to Rust panic error - "quail" currently unavailable in Smartnoise Synth

For further documentation on models, please see here: https://docs.smartnoise.org/synth/index.html#synthesizers-reference
epsilon #
(float) –

Privacy parameter (e.g., 0.1).
delta #
(float, default: None ) –

Privacy parameter (e.g., 1e-5).
select_cols #
(List[str], default: None ) –

List of columns to select. Defaults to None.
synth_params #
(dict, default: None ) –

Keyword arguments to pass to the synthesizer constructor. See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide all parameters of the model except epsilon and delta. Defaults to None.
nullable #
(bool, default: True ) –

True if some data cells may be null Defaults to True.
constraints #
(dict, default: None ) –

Dictionnary for custom table transformer constraints. Column that are not specified will be inferred based on metadata. Defaults to {}. For further documentation on constraints, please see here: https://docs.smartnoise.org/synth/transforms/index.html. Note: lambda function in AnonimizationTransformer are not supported.

Returns:

CostResponse ( CostResponse ) –

The estimated cost.

Source code in client/lomas_client/libraries/smartnoise_synth.py

def cost(
    self,
    synth_name: str,
    epsilon: float,
    delta: float | None = None,
    select_cols: list[str] | None = None,
    synth_params: dict | None = None,
    nullable: bool = True,
    constraints: dict | None = None,
) -> CostResponse:
    """This function estimates the cost of executing a SmartNoise query.

    Args:
        synth_name (str): name of the Synthesizer model to use.
            Available synthesizer are
            - "aim",
            - "mwem",
            - "dpctgan" with `disabled_dp=False` and warning (cryptographically secure random generator)
            - "patectgan"
            - "dpgan" with warning (cryptographically secure random generator)

            Available under certain conditions:
            - "mst" if `return_model=False`
            - "pategan" if the dataset has enough rows

            Not available:
            - "pacsynth" due to Rust panic error
            - "quail" currently unavailable in Smartnoise Synth

            For further documentation on models, please see here:
            https://docs.smartnoise.org/synth/index.html#synthesizers-reference
        epsilon (float): Privacy parameter (e.g., 0.1).
        delta (float): Privacy parameter (e.g., 1e-5).
        select_cols (List[str]): List of columns to select.
            Defaults to None.
        synth_params (dict): Keyword arguments to pass to the synthesizer
            constructor.
            See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide
            all parameters of the model except `epsilon` and `delta`.
            Defaults to None.
        nullable (bool): True if some data cells may be null
            Defaults to True.
        constraints (dict): Dictionnary for custom table transformer constraints.
            Column that are not specified will be inferred based on metadata.
            Defaults to {}.
            For further documentation on constraints, please see here:
            https://docs.smartnoise.org/synth/transforms/index.html.
            Note: lambda function in `AnonimizationTransformer` are not supported.

    Returns:
        CostResponse: The estimated cost.
    """
    if constraints is None:
        constraints = {}
    if synth_params is None:
        synth_params = {}
    if select_cols is None:
        select_cols = []
    # constraints_str = serialise_constraints(constraints) if constraints else ""

    body_dict = {
        "dataset_name": self.http_client.config.dataset_name,
        "synth_name": synth_name,
        "epsilon": epsilon,
        "delta": delta,
        "select_cols": select_cols,
        "synth_params": synth_params,
        "nullable": nullable,
        "constraints": "",
    }
    body = SmartnoiseSynthRequestModel.model_validate(body_dict)
    res = self.http_client.post("estimate_smartnoise_synth_cost", body, SMARTNOISE_SYNTH_READ_TIMEOUT)

    return validate_model_response(self.http_client, res, CostResponse)

query #

query(
    synth_name: str,
    epsilon: float,
    delta: float | None = None,
    select_cols: list[str] | None = None,
    synth_params: dict | None = None,
    nullable: bool = True,
    constraints: dict | None = None,
    dummy: bool = False,
    return_model: bool = False,
    condition: str = "",
    nb_samples: int = SNSYNTH_DEFAULT_SAMPLES_NB,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse

This function executes a SmartNoise Synthetic query.

Parameters:

synth_name #
(str) –

name of the Synthesizer model to use. Available synthesizer are - "aim", - "mwem", - "dpctgan" with disabled_dp=False and warning (cryptographically secure random generator) - "patectgan" - "dpgan" with warning (cryptographically secure random generator)

Available under certain conditions: - "mst" if return_model=False - "pategan" if the dataset has enough rows

Not available: - "pacsynth" due to Rust panic error - "quail" currently unavailable in Smartnoise Synth

For further documentation on models, please see here: https://docs.smartnoise.org/synth/index.html#synthesizers-reference
epsilon #
(float) –

Privacy parameter (e.g., 0.1).
delta #
(float, default: None ) –

Privacy parameter (e.g., 1e-5).
select_cols #
(List[str], default: None ) –

List of columns to select. Defaults to None.
synth_params #
(dict, default: None ) –

Keyword arguments to pass to the synthesizer constructor. See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide all parameters of the model except epsilon and delta. Defaults to None.
nullable #
(bool, default: True ) –

True if some data cells may be null Defaults to True.
constraints #
(dict | None, default: None ) –

Dictionnary for custom table transformer constraints. Column that are not specified will be inferred based on metadata. Defaults to {}. For further documentation on constraints, please see here: https://docs.smartnoise.org/synth/transforms/index.html. Note: lambda function in AnonimizationTransformer are not supported.
return_model #
(bool, default: False ) –

True to get Synthesizer model, False to get samples Defaults to False
condition #
(Optional[str], default: '' ) –

sampling condition in model.sample (only relevant if return_model is False) Defaults to "".
nb_samples #
(Optional[int], default: SNSYNTH_DEFAULT_SAMPLES_NB ) –

number of samples to generate. (only relevant if return_model is False) Defaults to SNSYNTH_DEFAULT_SAMPLES_NB
dummy #
(bool, default: False ) –

Whether to use a dummy dataset. Defaults to False.
nb_rows #
(int, default: DUMMY_NB_ROWS ) –

The number of rows in the dummy dataset. Defaults to DUMMY_NB_ROWS.
seed #
(int, default: DUMMY_SEED ) –

The random seed for generating the dummy dataset. Defaults to DUMMY_SEED.

Returns:

QueryResponse ( QueryResponse ) –

A Pandas DataFrame containing the query results.

Source code in client/lomas_client/libraries/smartnoise_synth.py

def query(
    self,
    synth_name: str,
    epsilon: float,
    delta: float | None = None,
    select_cols: list[str] | None = None,
    synth_params: dict | None = None,
    nullable: bool = True,
    constraints: dict | None = None,
    dummy: bool = False,
    return_model: bool = False,
    condition: str = "",
    nb_samples: int = SNSYNTH_DEFAULT_SAMPLES_NB,
    nb_rows: int = DUMMY_NB_ROWS,
    seed: int = DUMMY_SEED,
) -> QueryResponse:
    """This function executes a SmartNoise Synthetic query.

    Args:
        synth_name (str): name of the Synthesizer model to use.
            Available synthesizer are
            - "aim",
            - "mwem",
            - "dpctgan" with `disabled_dp=False` and warning (cryptographically secure random generator)
            - "patectgan"
            - "dpgan" with warning (cryptographically secure random generator)

            Available under certain conditions:
            - "mst" if `return_model=False`
            - "pategan" if the dataset has enough rows

            Not available:
            - "pacsynth" due to Rust panic error
            - "quail" currently unavailable in Smartnoise Synth

            For further documentation on models, please see here:
            https://docs.smartnoise.org/synth/index.html#synthesizers-reference
        epsilon (float): Privacy parameter (e.g., 0.1).
        delta (float): Privacy parameter (e.g., 1e-5).
        select_cols (List[str]): List of columns to select.
            Defaults to None.
        synth_params (dict): Keyword arguments to pass to the synthesizer
            constructor.
            See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide
            all parameters of the model except `epsilon` and `delta`.
            Defaults to None.
        nullable (bool): True if some data cells may be null
            Defaults to True.
        constraints: Dictionnary for custom table transformer constraints.
            Column that are not specified will be inferred based on metadata.
            Defaults to {}.
            For further documentation on constraints, please see here:
            https://docs.smartnoise.org/synth/transforms/index.html.
            Note: lambda function in `AnonimizationTransformer` are not supported.
        return_model (bool): True to get Synthesizer model, False to get samples
            Defaults to False
        condition (Optional[str]): sampling condition in `model.sample`
            (only relevant if return_model is False)
            Defaults to "".
        nb_samples (Optional[int]): number of samples to generate.
            (only relevant if return_model is False)
            Defaults to SNSYNTH_DEFAULT_SAMPLES_NB
        dummy (bool, optional): Whether to use a dummy dataset.
            Defaults to False.
        nb_rows (int, optional): The number of rows in the dummy dataset.
            Defaults to DUMMY_NB_ROWS.
        seed (int, optional): The random seed for generating the dummy dataset.
            Defaults to DUMMY_SEED.

    Returns:
        QueryResponse: A Pandas DataFrame containing the query results.
    """
    if constraints is None:
        constraints = {}
    if synth_params is None:
        synth_params = {}
    if select_cols is None:
        select_cols = []
    # constraints_str = serialise_constraints(constraints) if constraints else ""

    body_dict = {
        "dataset_name": self.http_client.config.dataset_name,
        "synth_name": synth_name,
        "epsilon": epsilon,
        "delta": delta,
        "select_cols": select_cols,
        "synth_params": synth_params,
        "nullable": nullable,
        "constraints": "",
        "return_model": return_model,
        "condition": condition,
        "nb_samples": nb_samples,
    }
    request_model: type[SmartnoiseSynthRequestModel]
    if dummy:
        endpoint = "dummy_smartnoise_synth_query"
        body_dict["dummy_nb_rows"] = nb_rows
        body_dict["dummy_seed"] = seed
        request_model = SmartnoiseSynthDummyQueryModel
    else:
        endpoint = "smartnoise_synth_query"
        request_model = SmartnoiseSynthQueryModel

    body = request_model.model_validate(body_dict)
    res = self.http_client.post(endpoint, body, SMARTNOISE_SYNTH_READ_TIMEOUT)

    return validate_model_response(self.http_client, res, QueryResponse)

Classes:

ClientConfig –

Config model for the HTTP client.

ClientConfig #


              flowchart TD
              lomas_client.models.config.ClientConfig[ClientConfig]

              

              click lomas_client.models.config.ClientConfig href "" "lomas_client.models.config.ClientConfig"

Config model for the HTTP client.

Used by:

API Reference Client LomasHttpClient

Methods:

oidc_use_tls –

Using TLS for OIDC?
lomas_service_use_tls –

Using TLS for lomas service?

Attributes:

app_url (HttpUrl) –

The base URL for the API server.
dataset_name (str) –

The name of the dataset to be accessed or manipulated.
use_password_flow (bool) –

If true, uses the legacy password auth flow.
user_name (str | None) –

User name.
user_password (str | None) –

User password.
oidc_discovery_url (HttpUrl) –

The oidc provier discovery Url.
telemetry (Telemetry) –

Telemetry Settings.
oidc_config (OIDCConfig) –

Returns the oidc provider config.

app_url `instance-attribute` #

app_url: HttpUrl

The base URL for the API server.

dataset_name `instance-attribute` #

dataset_name: str

The name of the dataset to be accessed or manipulated.

use_password_flow `class-attribute` `instance-attribute` #

use_password_flow: bool = False

If true, uses the legacy password auth flow.

user_name `class-attribute` `instance-attribute` #

user_name: str | None = None

User name.

user_password `class-attribute` `instance-attribute` #

user_password: str | None = None

User password.

oidc_discovery_url `instance-attribute` #

oidc_discovery_url: HttpUrl

The oidc provier discovery Url.

telemetry `instance-attribute` #

telemetry: Telemetry

Telemetry Settings.

oidc_config `cached` `property` #

oidc_config: OIDCConfig

Returns the oidc provider config.

oidc_use_tls #

oidc_use_tls() -> bool

Using TLS for OIDC?

Source code in client/lomas_client/models/config.py

@computed_field
def oidc_use_tls(self) -> bool:
    """Using TLS for OIDC?"""
    return self.oidc_discovery_url.scheme == "https"

lomas_service_use_tls #

lomas_service_use_tls() -> bool

Using TLS for lomas service?

Source code in client/lomas_client/models/config.py

@computed_field
def lomas_service_use_tls(self) -> bool:
    """Using TLS for lomas service?"""
    return self.app_url.scheme == "https"

Functions:

get_client_notebook_files –

Returns a list of the client notebook file names (absolute paths).
run_notebook –

Runs the notebook in the given file.

get_client_notebook_files #

get_client_notebook_files() -> list[Path]

Returns a list of the client notebook file names (absolute paths).

Assumes the file layout is the same as in the code repository.

Source code in client/lomas_client/scripts/run_notebook.py

def get_client_notebook_files() -> list[Path]:
    """
    Returns a list of the client notebook file names (absolute paths).

    Assumes the file layout is the same as in the code repository.
    """
    return [nb.resolve() for nb in Path(__file__).parent.glob("../../notebooks/*.ipynb")]

run_notebook #

run_notebook(
    notebook_file: Path,
    run_demo_setup: bool,
    save_output: bool = False,
    skip_smartnoise_synth: bool = True,
) -> None

Runs the notebook in the given file.

Assumes all services in the process compose are up and the file layout is same as in the code repository.

Parameters:

notebook_file #
(str) –

description
run_demo_setup #
(bool) –

Runs the lomas_demo_setup before running the notebook.
save_output #
(bool, default: False ) –

Saves the output to the original file. Defaults to False.
skip_smartnoise_synth #
(bool, default: True ) –

Skip smartnoise synth demo notebook

Source code in client/lomas_client/scripts/run_notebook.py

def run_notebook(
    notebook_file: Path, run_demo_setup: bool, save_output: bool = False, skip_smartnoise_synth: bool = True
) -> None:
    """Runs the notebook in the given file.

    Assumes all services in the process compose are up and
    the file layout is same as in the code repository.

    Args:
        notebook_file (str): _description_
        run_demo_setup (bool): Runs the lomas_demo_setup before running the notebook.
        save_output (bool, optional): Saves the output to the original file. Defaults to False.
        skip_smartnoise_synth (bool, optional): Skip smartnoise synth demo notebook
    """
    # TODO issue 423
    if skip_smartnoise_synth and notebook_file.name == "Demo_Client_Notebook_Smartnoise-Synth.ipynb":
        print("Skiping smartnoise synth notebook.")
        return

    # Reset demo users and budgets
    if run_demo_setup:
        if importlib.util.find_spec("lomas_server") is None:
            raise ImportError("lomas_server library not found, cannot run lomas_demo_setup.")

        config = ServerConfig()
        config.database.wipe()
        config.database.set_bootstrap(config.bootstrap)

        lomas_demo_setup()

    nb = nbformat.read(notebook_file, as_version=4)
    nb_client = NotebookClient(
        nb, resources={"metadata": {"path": str(notebook_file.parent)}}, timeout=60 * 5
    )
    nb_client.execute()

    if save_output:
        nbformat.write(nb, notebook_file)

Functions:

dex_config –

Dex config.

dex_config #

dex_config()

Dex config.

Removes all dex users before yield.

Source code in client/lomas_client/tests/test_integrations.py

@pytest.fixture
def dex_config():
    """Dex config.

    Removes all dex users before yield.
    """
    admin_config = AdminConfig()
    dex_config = admin_config.dex_config
    assert dex_config is not None
    # Cleanup for tests
    del_all_dex_users(dex_config)

    yield dex_config

    # Cleanup: delete all users to start fresh
    del_all_dex_users(dex_config)

Functions:

test_run_notebook –

Runs the notebook and fails if the notebook fails.

test_run_notebook #

test_run_notebook(notebook: Path) -> None

Runs the notebook and fails if the notebook fails.

Parameters:

notebook #
(str) –

The notebook file path.

Source code in client/lomas_client/tests/test_run_notebooks.py

@pytest.mark.parametrize("notebook", mark_notebook(get_client_notebook_files()), ids=lambda file: file.name)
def test_run_notebook(notebook: Path) -> None:
    """Runs the notebook and fails if the notebook fails.

    Args:
        notebook (str): The notebook file path.
    """
    run_notebook(notebook, run_demo_setup=True, save_output=False, skip_smartnoise_synth=True)

Functions:

raise_error –

Raise error message based on the HTTP response.
validate_model_response_direct –

Validate and process a HTTP response.
validate_model_response –

Validate and process a HTTP response.

raise_error #

raise_error(response: Response) -> Never

Raise error message based on the HTTP response.

Parameters:

response #
(Response) –

The response object from an HTTP request.

Raise

Server Error

Source code in client/lomas_client/utils.py

def raise_error(response: requests.Response) -> Never:
    """Raise error message based on the HTTP response.

    Args:
        response (requests.Response): The response object from an HTTP request.

    Raise:
        Server Error
    """
    try:
        error_model = LomasServerExceptionTypeAdapter.validate_python(response.json())
    except (ValidationError, JSONDecodeError) as e:
        raise InternalServerException(f"Could not parse server error: {response.content}") from e

    raise_error_from_model(error_model)

validate_model_response_direct #

validate_model_response_direct(response: Response, response_model: Any) -> Any

Validate and process a HTTP response.

Parameters:

response #
(Response) –

The response object from an HTTP request.

Returns:

response_model ( Any ) –

Model for responses requests.

Source code in client/lomas_client/utils.py

def validate_model_response_direct(response: requests.Response, response_model: Any) -> Any:
    """Validate and process a HTTP response.

    Args:
        response (requests.Response): The response object from an HTTP request.

    Returns:
        response_model: Model for responses requests.
    """
    if response.status_code == status.HTTP_200_OK:
        data = response.content.decode("utf8")
        r_model = response_model.model_validate_json(data)
        return r_model

    raise_error(response)

validate_model_response #

validate_model_response(
    client: LomasHttpClient, response: Response, response_model: type[ResponseT]
) -> ResponseT

Validate and process a HTTP response.

Parameters:

response #
(Response) –

The response object from an HTTP request.

Returns:

response_model ( ResponseT ) –

Model for responses requests.

Source code in client/lomas_client/utils.py

def validate_model_response(
    client: LomasHttpClient, response: requests.Response, response_model: type[ResponseT]
) -> ResponseT:
    """Validate and process a HTTP response.

    Args:
        response (requests.Response): The response object from an HTTP request.

    Returns:
        response_model: Model for responses requests.
    """
    if response.status_code != status.HTTP_202_ACCEPTED:
        raise_error(response)

    job_uid = response.json()["uid"]
    job = client.wait_for_job(job_uid)
    if job.status == "failed":
        assert job.error is not None, f"job {job_uid} failed without error !"
        raise_error_from_model(job.error)

    return response_model.model_validate(job.result)

Client

Bound #

Client #

kwargs #

get_dataset_metadata #

get_column_metadata #

get_column_bounds #

get_diffprivlib_bounds #

get_dummy_dataset #

nb_rows #

seed #

lazy #

get_context #

epsilon #

delta #

rho #

get_initial_budget #

get_total_spent_budget #

get_remaining_budget #

get_previous_queries #

LomasHttpClient #

post #

endpoint #

body #

read_timeout #

wait_for_job #

DiffPrivLibClient #

cost #

pipeline #

feature_columns #

target_columns #

test_size #

test_train_split_seed #

imputer_strategy #

query #

pipeline #

feature_columns #

target_columns #

test_size #

test_train_split_seed #

imputer_strategy #

dummy #

nb_rows #

seed #

OpenDPClient #

cost #

opendp_pipeline #

epsilon #

delta #

rho #

approx_zcdp #

query #

opendp_pipeline #

epsilon #

delta #

rho #

approx_zcdp #

dummy #

nb_rows #

seed #

SmartnoiseSQLClient #

cost #

query #

epsilon #

delta #

query #

query #

epsilon #

delta #

mechanisms #

postprocess #

dummy #

nb_rows #

seed #

SmartnoiseSynthClient #

cost #

synth_name #

epsilon #

delta #

select_cols #

`kwargs` #

`nb_rows` #

`seed` #

`lazy` #

`epsilon` #

`delta` #

`rho` #

`endpoint` #

`body` #

`read_timeout` #

`pipeline` #

`feature_columns` #

`target_columns` #

`test_size` #

`test_train_split_seed` #

`imputer_strategy` #

`pipeline` #

`feature_columns` #

`target_columns` #

`test_size` #

`test_train_split_seed` #

`imputer_strategy` #

`dummy` #

`nb_rows` #

`seed` #

`opendp_pipeline` #

`epsilon` #

`delta` #

`rho` #

`approx_zcdp` #

`opendp_pipeline` #

`epsilon` #

`delta` #

`rho` #

`approx_zcdp` #

`dummy` #

`nb_rows` #

`seed` #

`query` #

`epsilon` #

`delta` #

`query` #

`epsilon` #

`delta` #

`mechanisms` #

`postprocess` #

`dummy` #

`nb_rows` #

`seed` #

`synth_name` #

`epsilon` #

`delta` #

`select_cols` #

`synth_params` #

`nullable` #

`constraints` #

`synth_name` #

`epsilon` #

`delta` #

`select_cols` #

`synth_params` #

`nullable` #

`constraints` #

`return_model` #

`condition` #

`nb_samples` #

`dummy` #

`nb_rows` #

`seed` #

app_url `instance-attribute` #

dataset_name `instance-attribute` #

use_password_flow `class-attribute` `instance-attribute` #

user_name `class-attribute` `instance-attribute` #

user_password `class-attribute` `instance-attribute` #

oidc_discovery_url `instance-attribute` #

telemetry `instance-attribute` #

oidc_config `cached` `property` #

`notebook_file` #

`run_demo_setup` #

`save_output` #