Source code for lomas_server.utils.query_examples

from constants import (
    DIFFPRIVLIB_PIPELINE,
    OPENDP_PIPELINE,
    SSynthGanSynthesizer,
)

# Dummy queries
DUMMY_EPSILON = 100.0
DUMMY_DELTA = 0.99

# Dummy dataset generation
DUMMY_NB_ROWS = 100
DUMMY_SEED = 42

# Query constants
PENGUIN_DATASET = "PENGUIN"
QUERY_EPSILON = 0.1
QUERY_DELTA = 0.00001
SQL_QUERY = "SELECT COUNT(*) AS NB_ROW FROM df"
DP_MECHANISM = {"count": "gaussian"}
FEATURE_COLUMNS = [
    "bill_length_mm",
    "bill_depth_mm",
    "flipper_length_mm",
    "body_mass_g",
]
TARGET_COLUMNS = ["species"]
SPLIT_SEED = 4
TEST_SIZE = 0.2
IMPUTER_STRATEGY = "drop"
SNSYNTH_NB_SAMPLES = 200


[docs] def make_dummy(example_query): """Make dummy example dummy query based on example query""" example_query_dummy = dict(example_query) example_query_dummy["dummy_nb_rows"] = DUMMY_NB_ROWS example_query_dummy["dummy_seed"] = DUMMY_SEED return example_query_dummy
# Lomas logic example_get_admin_db_data = { "dataset_name": PENGUIN_DATASET, } example_get_dummy_dataset = { "dataset_name": PENGUIN_DATASET, "dummy_nb_rows": DUMMY_NB_ROWS, "dummy_seed": DUMMY_SEED, } # Smartnoise-SQL example_smartnoise_sql_cost = { "query_str": SQL_QUERY, "dataset_name": PENGUIN_DATASET, "epsilon": QUERY_EPSILON, "delta": QUERY_DELTA, "mechanisms": DP_MECHANISM, } example_smartnoise_sql = dict(example_smartnoise_sql_cost) example_smartnoise_sql["postprocess"] = True example_dummy_smartnoise_sql = make_dummy(example_smartnoise_sql) # Smartnoise-Synth example_smartnoise_synth_cost = { "dataset_name": PENGUIN_DATASET, "synth_name": SSynthGanSynthesizer.DP_CTGAN, "epsilon": QUERY_EPSILON, "delta": QUERY_DELTA, "select_cols": [], "synth_params": { "embedding_dim": 128, "batch_size": 50, "epochs": 5, }, "nullable": True, "constraints": "", } example_smartnoise_synth_query = dict(example_smartnoise_synth_cost) example_smartnoise_synth_query["return_model"] = True example_smartnoise_synth_query["condition"] = "" example_smartnoise_synth_query["nb_samples"] = SNSYNTH_NB_SAMPLES example_dummy_smartnoise_synth_query = make_dummy( example_smartnoise_synth_query ) # OpenDP example_opendp = { "dataset_name": PENGUIN_DATASET, "opendp_json": OPENDP_PIPELINE, "fixed_delta": QUERY_DELTA, } example_dummy_opendp = make_dummy(example_opendp) # DiffPrivLib example_diffprivlib = { "dataset_name": PENGUIN_DATASET, "diffprivlib_json": DIFFPRIVLIB_PIPELINE, "feature_columns": FEATURE_COLUMNS, "target_columns": TARGET_COLUMNS, "test_size": TEST_SIZE, "test_train_split_seed": SPLIT_SEED, "imputer_strategy": IMPUTER_STRATEGY, } example_dummy_diffprivlib = make_dummy(example_diffprivlib)