Source code for lomas_server.constants

import os
import string
from enum import StrEnum

import pkg_resources

# Config
# -----------------------------------------------------------------------------

# Get config and secrets from correct location
if "LOMAS_CONFIG_PATH" in os.environ:
    CONFIG_PATH = f"""{os.environ.get("LOMAS_CONFIG_PATH")}"""
    print(CONFIG_PATH)
else:
    CONFIG_PATH = "/usr/lomas_server/runtime.yaml"

if "LOMAS_SECRETS_PATH" in os.environ:
    SECRETS_PATH = f"""{os.environ.get("LOMAS_SECRETS_PATH")}"""
else:
    SECRETS_PATH = "/usr/lomas_server/secrets.yaml"


# Misc
# -----------------------------------------------------------------------------

# Server states
DB_NOT_LOADED = "User database not loaded"
CONFIG_NOT_LOADED = "Config not loaded"
SERVER_LIVE = "LIVE"

# General values
SECONDS_IN_A_DAY = 60 * 60 * 24

# DP constants (max budget per user per dataset)
EPSILON_LIMIT: float = 10.0
DELTA_LIMIT: float = 0.01

# Dummy dataset generation
DUMMY_NB_ROWS = 100
DUMMY_SEED = 42
RANDOM_STRINGS = list(string.ascii_lowercase + string.ascii_uppercase + string.digits)
NB_RANDOM_NONE = 5  # if nullable, how many random none to add

# Data preprocessing
NUMERICAL_DTYPES = ["int16", "int32", "int64", "float16", "float32", "float64"]


# DP Libraries
# -----------------------------------------------------------------------------

# Smartnoise sql
SSQL_STATS = ["count", "sum_int", "sum_large_int", "sum_float", "threshold"]
SSQL_MAX_ITERATION = 5


# Smartnoise synth
[docs] class SSynthTableTransStyle(StrEnum): """Transformer style for smartnoise synth.""" GAN = "gan" # for SSynthGanSynthesizer CUBE = "cube" # for SSynthMarginalSynthesizer
[docs] class SSynthColumnType(StrEnum): """Type of columns for SmartnoiseSynth transformer pre-processing.""" PRIVATE_ID = "private_id" CATEGORICAL = "categorical" CONTINUOUS = "continuous" DATETIME = "datetime"
SSYNTH_PRIVATE_COLUMN = "uuid4" SSYNTH_DEFAULT_BINS = 10 SSYNTH_MIN_ROWS_PATE_GAN = 1000 # OpenDP
[docs] class OpenDPMeasurement(StrEnum): """Type of divergence for opendp measurement. see https://docs.opendp.org/en/stable/api/python/opendp.measurements.html """ FIXED_SMOOTHED_MAX_DIVERGENCE = "fixed_smoothed_max_divergence" MAX_DIVERGENCE = "max_divergence" SMOOTHED_MAX_DIVERGENCE = "smoothed_max_divergence" ZERO_CONCENTRATED_DIVERGENCE = "zero_concentrated_divergence"
[docs] class OpenDPDatasetInputMetric(StrEnum): """Type of opendp input metric for datasets. see https://docs.opendp.org/en/stable/api/python/opendp.metrics.html see https://github.com/opendp/opendp/blob/main/rust/src/metrics/mod.rs """ SYMMETRIC_DISTANCE = "SymmetricDistance" INSERT_DELETE_DISTANCE = "InsertDeleteDistance" CHANGE_ONE_DISTANCE = "ChangeOneDistance" HAMMING_DISTANCE = "HammingDistance" INT_DISTANCE = "u32" # opendp type for distance between datasets
# Example pipeline inputs OPENDP_VERSION = pkg_resources.get_distribution("opendp").version DIFFPRIVLIB_VERSION = pkg_resources.get_distribution("diffprivlib").version # Example inputs # ----------------------------------------------------------------------------- OPENDP_PIPELINE = ( f'{{"version": "{OPENDP_VERSION}", ' '"ast": {' '"_type": "partial_chain", "lhs": {' '"_type": "partial_chain", "lhs": {' '"_type": "partial_chain", "lhs": {' '"_type": "partial_chain", "lhs": {' '"_type": "partial_chain", "lhs": {' '"_type": "constructor", ' '"func": "make_chain_tt", ' '"module": "combinators", ' '"args": [' "{" '"_type": "constructor", ' '"func": "make_select_column", ' '"module": "transformations", ' '"kwargs": {"key": "bill_length_mm", "TOA": "String"}' "}, {" '"_type": "constructor", ' '"func": "make_split_dataframe", ' '"module": "transformations", ' '"kwargs": {"separator": ",", "col_names": {"_type": ' '"list", "_items": ["species", "island", ' '"bill_length_mm", "bill_depth_mm", "flipper_length_' 'mm", "body_mass_g", "sex"]}}' "}]}, " '"rhs": {' '"_type": "constructor", ' '"func": "then_cast_default", ' '"module": "transformations", ' '"kwargs": {"TOA": "f64"}' "}}, " '"rhs": {' '"_type": "constructor", ' '"func": "then_clamp", ' '"module": "transformations", ' '"kwargs": {"bounds": [30.0, 65.0]}' "}}, " '"rhs": {' '"_type": "constructor", ' '"func": "then_resize", ' '"module": "transformations", ' '"kwargs": {"size": 346, "constant": 43.61}' "}}, " '"rhs": {' '"_type": "constructor", ' '"func": "then_variance", ' '"module": "transformations"' "}}, " '"rhs": {' '"_type": "constructor", ' '"func": "then_laplace", ' '"module": "measurements", ' '"kwargs": {"scale": 5.0}' "}}}" ) DIFFPRIVLIB_PIPELINE = ( '{"module": "diffprivlib", ' f'"version": "{DIFFPRIVLIB_VERSION}", ' '"pipeline": [' "{" '"type": "_dpl_type:StandardScaler", ' '"name": "scaler", ' '"params": {' '"with_mean": true, ' '"with_std": true, ' '"copy": true, ' '"epsilon": 0.5, ' '"bounds": {' '"_tuple": true, ' '"_items": [[30.0, 13.0, 150.0, 2000.0], [65.0, 23.0, 250.0, 7000.0]]' "}, " '"random_state": null, ' '"accountant": "_dpl_instance:BudgetAccountant"' "}" "}, " "{" '"type": "_dpl_type:LogisticRegression", ' '"name": "classifier", ' '"params": {' '"tol": 0.0001, ' '"C": 1.0, ' '"fit_intercept": true, ' '"random_state": null, ' '"max_iter": 100, ' '"verbose": 0, ' '"warm_start": false, ' '"n_jobs": null, ' '"epsilon": 1.0, ' '"data_norm": 83.69469642643347, ' '"accountant": "_dpl_instance:BudgetAccountant"' "}" "}" "]" "}" )