Source code for lomas_server.routes.routes_dp

from fastapi import APIRouter, Body, Depends, Header, Request
from fastapi.responses import JSONResponse

from lomas_server.constants import DPLibraries
from lomas_server.routes.utils import (
    handle_cost_query,
    handle_query_on_dummy_dataset,
    handle_query_on_private_dataset,
    server_live,
)
from lomas_server.utils.query_examples import (
    example_diffprivlib,
    example_dummy_diffprivlib,
    example_dummy_opendp,
    example_dummy_smartnoise_sql,
    example_dummy_smartnoise_synth_query,
    example_opendp,
    example_smartnoise_sql,
    example_smartnoise_sql_cost,
    example_smartnoise_synth_cost,
    example_smartnoise_synth_query,
)
from lomas_server.utils.query_models import (
    DiffPrivLibDummyQueryModel,
    DiffPrivLibQueryModel,
    DiffPrivLibRequestModel,
    OpenDPDummyQueryModel,
    OpenDPQueryModel,
    OpenDPRequestModel,
    SmartnoiseSQLDummyQueryModel,
    SmartnoiseSQLQueryModel,
    SmartnoiseSQLRequestModel,
    SmartnoiseSynthDummyQueryModel,
    SmartnoiseSynthQueryModel,
    SmartnoiseSynthRequestModel,
)

router = APIRouter()


[docs] @router.post( "/smartnoise_sql_query", dependencies=[Depends(server_live)], tags=["USER_QUERY"], ) def smartnoise_sql_handler( request: Request, query_json: SmartnoiseSQLQueryModel = Body(example_smartnoise_sql), user_name: str = Header(None), ) -> JSONResponse: """ Handles queries for the SmartNoiseSQL library. Args: request (Request): Raw request object query_json (SmartnoiseSQLModel): A JSON object containing: - query: The SQL query to execute. NOTE: the table name is "df", the query must end with "FROM df". - epsilon (float): Privacy parameter (e.g., 0.1). - delta (float): Privacy parameter (e.g., 1e-5). - mechanisms (dict, optional): Dictionary of mechanisms for the query (default: {}). See "Smartnoise-SQL mechanisms documentation https://docs.smartnoise.org/sql/advanced.html#overriding-mechanisms. - postprocess (bool, optional): Whether to postprocess the query results (default: True). See "Smartnoise-SQL postprocessing documentation https://docs.smartnoise.org/sql/advanced.html#postprocess. Defaults to Body(example_smartnoise_sql). user_name (str, optional): The user name. Defaults to Header(None). Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: If there is not enough budget or the dataset does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. Returns: JSONResponse: A JSON object containing the following: - requested_by (str): The user name. - query_response (pd.DataFrame): A DataFrame containing the query response. - spent_epsilon (float): The amount of epsilon budget spent for the query. - spent_delta (float): The amount of delta budget spent for the query. """ return handle_query_on_private_dataset( request, query_json, user_name, DPLibraries.SMARTNOISE_SQL )
# Smartnoise SQL Dummy query
[docs] @router.post( "/dummy_smartnoise_sql_query", dependencies=[Depends(server_live)], tags=["USER_DUMMY"], ) def dummy_smartnoise_sql_handler( request: Request, query_json: SmartnoiseSQLDummyQueryModel = Body( example_dummy_smartnoise_sql ), user_name: str = Header(None), ) -> JSONResponse: """ Handles queries on dummy datasets for the SmartNoiseSQL library. Args: request (Request): Raw request object query_json (DummySmartnoiseSQLModel, optional): A JSON object containing: - query: The SQL query to execute. NOTE: the table name is "df", the query must end with "FROM df". - epsilon (float): Privacy parameter (e.g., 0.1). - delta (float): Privacy parameter (e.g., 1e-5). - mechanisms (dict, optional): Dictionary of mechanisms for the query (default: {}). See Smartnoise-SQL mechanisms documentation https://docs.smartnoise.org/sql/advanced.html#overriding-mechanisms. - postprocess (bool, optional): Whether to postprocess the query results (default: True). See Smartnoise-SQL postprocessing documentation https://docs.smartnoise.org/sql/advanced.html#postprocess. - nb_rows (int, optional): The number of rows in the dummy dataset (default: 100). - seed (int, optional): The random seed for generating the dummy dataset (default: 42). Defaults to Body(example_dummy_smartnoise_sql). Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: If there is not enough budget or the dataset does not exist. Returns: JSONResponse: A JSON object containing: - query_response (pd.DataFrame): a DataFrame containing the query response. """ return handle_query_on_dummy_dataset( request, query_json, user_name, DPLibraries.SMARTNOISE_SQL )
[docs] @router.post( "/estimate_smartnoise_sql_cost", dependencies=[Depends(server_live)], tags=["USER_QUERY"], ) def estimate_smartnoise_sql_cost( request: Request, query_json: SmartnoiseSQLRequestModel = Body(example_smartnoise_sql_cost), user_name: str = Header(None), ) -> JSONResponse: """ Estimates the privacy loss budget cost of a SmartNoiseSQL query. Args: request (Request): Raw request object query_json (SmartnoiseSQLRequestModel, optional): A JSON object containing the following: - query: The SQL query to estimate the cost for. NOTE: the table name is "df", the query must end with "FROM df". - epsilon (float): Privacy parameter (e.g., 0.1). - delta (float): Privacy parameter (e.g., 1e-5). - mechanisms (dict, optional): Dictionary of mechanisms for the query (default: {}). See Smartnoise-SQL mechanisms documentation https://docs.smartnoise.org/sql/advanced.html#overriding-mechanisms. Defaults to Body(example_smartnoise_sql_cost). Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: The dataset does not exist. Returns: JSONResponse: A JSON object containing: - epsilon_cost (float): The estimated epsilon cost. - delta_cost (float): The estimated delta cost. """ return handle_cost_query( request, query_json, user_name, DPLibraries.SMARTNOISE_SQL )
[docs] @router.post( "/smartnoise_synth_query", dependencies=[Depends(server_live)], tags=["USER_QUERY"], ) def smartnoise_synth_handler( request: Request, query_json: SmartnoiseSynthQueryModel = Body( example_smartnoise_synth_query ), user_name: str = Header(None), ) -> JSONResponse: """ Handles queries for the SmartNoise Synth library. Args: request (Request): Raw request object query_json (SmartnoiseSynthQueryModel): A JSON object containing: - synth_name (str): name of the Synthesizer model to use. - epsilon (float): Privacy parameter (e.g., 0.1). - delta (float): Privacy parameter (e.g., 1e-5). mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\ query `See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#postprocess>`__ - select_cols (List[str]): List of columns to select. - synth_params (dict): Keyword arguments to pass to the synthesizer constructor. See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide all parameters of the model except `epsilon` and `delta`. - nullable (bool): True if some data cells may be null - constraints (dict): Dictionnary for custom table transformer constraints. Column that are not specified will be inferred based on metadata. - return_model (bool): True to get Synthesizer model, False to get samples - condition (Optional[str]): sampling condition in `model.sample` (only relevant if return_model is False) - nb_samples (Optional[int]): number of samples to generate. (only relevant if return_model is False) - Defaults to Body(example_smartnoise_synth). user_name (str): The user name. Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: If there is not enough budget or the dataset does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. Returns: JSONResponse: A JSON object containing the following: - requested_by (str): The user name. - query_response (pd.DataFrame): A DataFrame containing the query response. - spent_epsilon (float): The amount of epsilon budget spent for the query. - spent_delta (float): The amount of delta budget spent for the query. """ return handle_query_on_private_dataset( request, query_json, user_name, DPLibraries.SMARTNOISE_SYNTH )
[docs] @router.post( "/dummy_smartnoise_synth_query", dependencies=[Depends(server_live)], tags=["USER_QUERY"], ) def dummy_smartnoise_synth_handler( request: Request, query_json: SmartnoiseSynthDummyQueryModel = Body( example_dummy_smartnoise_synth_query ), user_name: str = Header(None), ) -> JSONResponse: """ Handles queries for the SmartNoise Synth library. Args: request (Request): Raw request object query_json (SmartnoiseSynthDummyQueryModel): A JSON object containing: - synth_name (str): name of the Synthesizer model to use. - epsilon (float): Privacy parameter (e.g., 0.1). - delta (float): Privacy parameter (e.g., 1e-5). mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\ query `See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#postprocess>`__ - select_cols (List[str]): List of columns to select. - synth_params (dict): Keyword arguments to pass to the synthesizer constructor. See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide all parameters of the model except `epsilon` and `delta`. - nullable (bool): True if some data cells may be null - constraints (dict): Dictionnary for custom table transformer constraints. Column that are not specified will be inferred based on metadata. - return_model (bool): True to get Synthesizer model, False to get samples - condition (Optional[str]): sampling condition in `model.sample` (only relevant if return_model is False) - nb_samples (Optional[int]): number of samples to generate. (only relevant if return_model is False) - nb_rows (int, optional): The number of rows in the dummy dataset (default: 100). - seed (int, optional): The random seed for generating the dummy dataset (default: 42). Defaults to Body(example_smartnoise_synth). user_name (str): The user name. Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: If there is not enough budget or the dataset does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. Returns: JSONResponse: A JSON object containing the following: - requested_by (str): The user name. - query_response (pd.DataFrame): A DataFrame containing the query response. - spent_epsilon (float): The amount of epsilon budget spent for the query. - spent_delta (float): The amount of delta budget spent for the query. """ return handle_query_on_dummy_dataset( request, query_json, user_name, DPLibraries.SMARTNOISE_SYNTH )
[docs] @router.post( "/estimate_smartnoise_synth_cost", dependencies=[Depends(server_live)], tags=["USER_QUERY"], ) def estimate_smartnoise_synth_cost( request: Request, query_json: SmartnoiseSynthRequestModel = Body( example_smartnoise_synth_cost ), user_name: str = Header(None), ) -> JSONResponse: """ Handles queries for the SmartNoise Synth library. Args: request (Request): Raw request object query_json (SmartnoiseSynthRequestModel): A JSON object containing: - synth_name (str): name of the Synthesizer model to use. - epsilon (float): Privacy parameter (e.g., 0.1). - delta (float): Privacy parameter (e.g., 1e-5). mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\ query `See Smartnoise-SQL postprocessing documentation. <https://docs.smartnoise.org/sql/advanced.html#postprocess>`__ - select_cols (List[str]): List of columns to select. - synth_params (dict): Keyword arguments to pass to the synthesizer constructor. See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide all parameters of the model except `epsilon` and `delta`. - nullable (bool): True if some data cells may be null - constraints - nb_rows (int, optional): The number of rows in the dummy dataset - seed (int, optional): The random seed for generating the dummy dataset (default: 42). Defaults to Body(example_smartnoise_synth). user_name (str): The user name. Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: If there is not enough budget or the dataset does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. Returns: JSONResponse: A JSON object containing: - epsilon_cost (float): The estimated epsilon cost. - delta_cost (float): The estimated delta cost. """ return handle_cost_query( request, query_json, user_name, DPLibraries.SMARTNOISE_SYNTH )
[docs] @router.post( "/opendp_query", dependencies=[Depends(server_live)], tags=["USER_QUERY"] ) def opendp_query_handler( request: Request, query_json: OpenDPQueryModel = Body(example_opendp), user_name: str = Header(None), ) -> JSONResponse: """ Handles queries for the OpenDP Library. Args: request (Request): Raw request object. query_json (OpenDPQueryModel, optional): A JSON object containing the following: - opendp_pipeline: The OpenDP pipeline for the query. - fixed_delta: If the pipeline measurement is of type "ZeroConcentratedDivergence" (e.g. with "make_gaussian") then it is converted to "SmoothedMaxDivergence" with "make_zCDP_to_approxDP" (see "opendp measurements documentation at https://docs.opendp.org/en/stable/api/python/opendp.combinators.html#opendp.combinators.make_zCDP_to_approxDP). # noqa # pylint: disable=C0301 In that case a "fixed_delta" must be provided by the user. Defaults to Body(example_opendp). user_name (str, optional): The user name. Defaults to Header(None). Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: The pipeline does not contain a "measurement", there is not enough budget or the dataset does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. Returns: JSONResponse: A JSON object containing the following: - requested_by (str): The user name. - query_response (pd.DataFrame): A DataFrame containing the query response. - spent_epsilon (float): The amount of epsilon budget spent for the query. - spent_delta (float): The amount of delta budget spent for the query. """ response = handle_query_on_private_dataset( request, query_json, user_name, DPLibraries.OPENDP ) return JSONResponse(content=response)
[docs] @router.post( "/dummy_opendp_query", dependencies=[Depends(server_live)], tags=["USER_DUMMY"], ) def dummy_opendp_query_handler( request: Request, query_json: OpenDPDummyQueryModel = Body(example_dummy_opendp), user_name: str = Header(None), ) -> JSONResponse: """ Handles queries on dummy datasets for the OpenDP library. Args: request (Request): Raw request object. query_json (OpenDPDummyQueryModel, optional): A JSON object containing the following: - opendp_pipeline: The OpenDP pipeline for the query. - fixed_delta: If the pipeline measurement is of type\ "ZeroConcentratedDivergence" (e.g. with "make_gaussian") then it is converted to "SmoothedMaxDivergence" with "make_zCDP_to_approxDP" (see opendp measurements documentation at https://docs.opendp.org/en/stable/api/python/opendp.combinators.html#opendp.combinators.make_zCDP_to_approxDP). # noqa # pylint: disable=C0301 In that case a "fixed_delta" must be provided by the user. - nb_rows (int, optional): The number of rows in the dummy dataset (default: 100). - seed (int, optional): The random seed for generating the dummy dataset (default: 42). Defaults to Body(example_dummy_opendp). Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: If there is not enough budget or the dataset does not exist. Returns: JSONResponse: A JSON object containing: - query_response (pd.DataFrame): a DataFrame containing the query response. """ return handle_query_on_dummy_dataset( request, query_json, user_name, DPLibraries.OPENDP )
[docs] @router.post( "/estimate_opendp_cost", dependencies=[Depends(server_live)], tags=["USER_QUERY"], ) def estimate_opendp_cost( request: Request, query_json: OpenDPRequestModel = Body(example_opendp), user_name: str = Header(None), ) -> JSONResponse: """ Estimates the privacy loss budget cost of an OpenDP query. Args: request (Request): Raw request object query_json (OpenDPRequestModel, optional): A JSON object containing the following: - "opendp_pipeline": The OpenDP pipeline for the query. Defaults to Body(example_opendp). Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: The dataset does not exist or the pipeline does not contain a measurement. Returns: JSONResponse: A JSON object containing: - epsilon_cost (float): The estimated epsilon cost. - delta_cost (float): The estimated delta cost. """ return handle_cost_query( request, query_json, user_name, DPLibraries.OPENDP )
[docs] @router.post( "/diffprivlib_query", dependencies=[Depends(server_live)], tags=["USER_QUERY"], ) def diffprivlib_query_handler( request: Request, query_json: DiffPrivLibQueryModel = Body(example_diffprivlib), user_name: str = Header(None), ): """ Handles queries for the DiffPrivLib Library. Args: request (Request): Raw request object. query_json (DiffPrivLibQueryModel, optional): A JSON object containing the following: - pipeline: The DiffPrivLib pipeline for the query. - feature_columns: the list of feature column to train - target_columns: the list of target column to predict - test_size: proportion of the test set - test_train_split_seed: seed for the random train test split, - imputer_strategy: imputation strategy Defaults to Body(example_diffprivlib). user_name (str, optional): The user name. Defaults to Header(None). Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: The pipeline does not contain a "measurement", there is not enough budget or the dataset does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. Returns: JSONResponse: A JSON object containing the following: - requested_by (str): The user name. - query_response (pd.DataFrame): A DataFrame containing the query response. - spent_epsilon (float): The amount of epsilon budget spent for the query. - spent_delta (float): The amount of delta budget spent for the query. """ return handle_query_on_private_dataset( request, query_json, user_name, DPLibraries.DIFFPRIVLIB )
[docs] @router.post( "/dummy_diffprivlib_query", dependencies=[Depends(server_live)], tags=["USER_DUMMY"], ) def dummy_diffprivlib_query_handler( request: Request, query_json: DiffPrivLibDummyQueryModel = Body(example_dummy_diffprivlib), user_name: str = Header(None), ): """ Handles queries on dummy datasets for the DiffPrivLib library. Args: request (Request): Raw request object. query_json (DiffPrivLibDummyQueryModel, optional): A JSON object containing the following: - pipeline: The DiffPrivLib pipeline for the query. - feature_columns: the list of feature column to train - target_columns: the list of target column to predict - test_size: proportion of the test set - test_train_split_seed: seed for the random train test split, - imputer_strategy: imputation strategy - nb_rows (int, optional): The number of rows in the dummy dataset (default: 100). - seed (int, optional): The random seed for generating the dummy dataset (default: 42). Defaults to Body(example_dummy_diffprivlib) Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: If there is not enough budget or the dataset does not exist. Returns: JSONResponse: A JSON object containing: - query_response (pd.DataFrame): a DataFrame containing the query response. """ return handle_query_on_dummy_dataset( request, query_json, user_name, DPLibraries.DIFFPRIVLIB )
[docs] @router.post( "/estimate_diffprivlib_cost", dependencies=[Depends(server_live)], tags=["USER_QUERY"], ) def estimate_diffprivlib_cost( request: Request, query_json: DiffPrivLibRequestModel = Body(example_diffprivlib), user_name: str = Header(None), ): """ Estimates the privacy loss budget cost of an DiffPrivLib query. Args: request (Request): Raw request object query_json (DiffPrivLibRequestModel, optional): A JSON object containing the following: - pipeline: The DiffPrivLib pipeline for the query. - feature_columns: the list of feature column to train - target_columns: the list of target column to predict - test_size: proportion of the test set - test_train_split_seed: seed for the random train test split, - imputer_strategy: imputation strategy Defaults to Body(example_dummy_diffprivlib). Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: The dataset does not exist or the pipeline does not contain a measurement. Returns: JSONResponse: A JSON object containing: - epsilon_cost (float): The estimated epsilon cost. - delta_cost (float): The estimated delta cost. """ return handle_cost_query( request, query_json, user_name, DPLibraries.DIFFPRIVLIB )