import json
import warnings
from diffprivlib import models
from diffprivlib.utils import (
DiffprivlibCompatibilityWarning,
PrivacyLeakWarning,
)
from diffprivlib_logger import serialise_pipeline
from fastapi import status
from fastapi.testclient import TestClient
from sklearn.pipeline import Pipeline
from lomas_server.app import app
from lomas_server.constants import DPLibraries
from lomas_server.tests.test_api import TestRootAPIEndpoint
from lomas_server.utils.query_examples import (
example_diffprivlib,
example_dummy_diffprivlib,
)
[docs]
def validate_pipeline(response):
"""Validate that the pipeline ran successfully
Returns a model and a score.
"""
assert response.status_code == status.HTTP_200_OK
response_dict = json.loads(response.content.decode("utf8"))
assert "score" in response_dict["query_response"] # might be 0
assert response_dict["query_response"]["model"]
[docs]
class TestDiffPrivLibEndpoint(TestRootAPIEndpoint): # pylint: disable=R0904
"""
Test DiffPrivLib Endpoint with different models
"""
[docs]
def test_diffprivlib_query(self) -> None:
"""Test diffprivlib query"""
with TestClient(app, headers=self.headers) as client:
# Expect to work
response = client.post(
"/diffprivlib_query",
json=example_diffprivlib,
headers=self.headers,
)
assert response.status_code == status.HTTP_200_OK
response_dict = json.loads(response.content.decode("utf8"))
assert response_dict["requested_by"] == self.user_name
assert response_dict["query_response"]["score"] >= 0
assert response_dict["query_response"]["model"]
assert response_dict["spent_epsilon"] > 0
assert response_dict["spent_delta"] == 0
# # Should work for different imputation strategy (but does not yet #255)
def test_imputation(diffprivlib_body, imputer_strategy):
diffprivlib_body = dict(diffprivlib_body)
diffprivlib_body["imputer_strategy"] = imputer_strategy
response = client.post(
"/diffprivlib_query",
json=diffprivlib_body,
headers=self.headers,
)
return response
response = test_imputation(example_diffprivlib, "mean")
response_dict = json.loads(response.content.decode("utf8"))
assert response.status_code == status.HTTP_200_OK
response = test_imputation(example_diffprivlib, "median")
assert response.status_code == status.HTTP_200_OK
response = test_imputation(example_diffprivlib, "most_frequent")
assert response.status_code == status.HTTP_200_OK
# Should not work unknow imputation strategy
response = test_imputation(example_diffprivlib, "i_do_not_exist")
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert response.json() == {
"InvalidQueryException": ""
+ "Imputation strategy i_do_not_exist not supported."
}
# Should not work: Privacy Leak Warning
warnings.simplefilter("error", PrivacyLeakWarning)
diffprivlib_body = dict(example_diffprivlib)
dpl_pipeline = Pipeline(
[
("scaler", models.StandardScaler(epsilon=0.5)),
("classifier", models.LogisticRegression(epsilon=1.0)),
]
)
dpl_string = serialise_pipeline(dpl_pipeline)
diffprivlib_body["diffprivlib_json"] = dpl_string
response = client.post(
"/diffprivlib_query",
json=diffprivlib_body,
headers=self.headers,
)
assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
assert response.json() == {
"ExternalLibraryException": "PrivacyLeakWarning: "
+ "Bounds parameter hasn't been specified, so falling back to "
+ "determining bounds from the data.\n "
+ "This will result in additional privacy leakage. "
+ "To ensure differential privacy with no additional privacy "
+ "loss, specify `bounds` for each valued returned by "
+ "np.mean().. "
+ "Lomas server cannot fit pipeline on data, "
+ "PrivacyLeakWarning is a blocker.",
"library": DPLibraries.DIFFPRIVLIB,
}
# Should not work: Compatibility Warning
warnings.simplefilter("error", DiffprivlibCompatibilityWarning)
with self.assertRaises(DiffprivlibCompatibilityWarning):
Pipeline(
[
("scaler", models.StandardScaler(epsilon=0.5)),
(
"classifier",
models.LogisticRegression(
epsilon=1.0, svd_solver="full"
),
),
]
)
[docs]
def test_logistic_regression_models(self) -> None:
"""Test diffprivlib query: Logistic Regression"""
with TestClient(app, headers=self.headers) as client:
bounds = ([30.0, 13.0, 150.0, 2000.0], [65.0, 23.0, 250.0, 7000.0])
# Test Logistic Regression
pipeline = Pipeline(
[
(
"scaler",
models.StandardScaler(epsilon=0.5, bounds=bounds),
),
(
"classifier",
models.LogisticRegression(
epsilon=1.0, data_norm=83.69
),
),
]
)
diffprivlib_body = dict(example_diffprivlib)
diffprivlib_body["diffprivlib_json"] = serialise_pipeline(pipeline)
response = client.post(
"/diffprivlib_query",
json=diffprivlib_body,
headers=self.headers,
)
validate_pipeline(response)
[docs]
def test_linear_regression_models(self) -> None:
"""Test diffprivlib query: Linear Regression"""
with TestClient(app, headers=self.headers) as client:
# Test Linear Regression
pipeline = Pipeline(
[
(
"lr",
models.LinearRegression(
epsilon=2.0,
bounds_X=(30.0, 65.0),
bounds_y=(13.0, 23.0),
),
),
]
)
diffprivlib_body = dict(example_diffprivlib)
diffprivlib_body["diffprivlib_json"] = serialise_pipeline(pipeline)
diffprivlib_body["feature_columns"] = ["bill_length_mm"]
diffprivlib_body["target_columns"] = ["bill_length_mm"]
response = client.post(
"/diffprivlib_query",
json=diffprivlib_body,
headers=self.headers,
)
validate_pipeline(response)
[docs]
def test_naives_bayes_model(self) -> None:
"""Test diffprivlib query: Gaussian Naives Bayes"""
with TestClient(app, headers=self.headers) as client:
bounds = ([30.0, 13.0, 150.0, 2000.0], [65.0, 23.0, 250.0, 7000.0])
# Test Gaussian Naives Bayes
pipeline = Pipeline(
[
(
"scaler",
models.StandardScaler(epsilon=0.5, bounds=bounds),
),
(
"gaussian",
models.GaussianNB(
epsilon=1.0, bounds=bounds, priors=(0.3, 0.3, 0.4)
),
),
]
)
diffprivlib_body = dict(example_diffprivlib)
diffprivlib_body["diffprivlib_json"] = serialise_pipeline(pipeline)
response = client.post(
"/diffprivlib_query",
json=diffprivlib_body,
headers=self.headers,
)
validate_pipeline(response)
[docs]
def test_trees_models(self) -> None:
"""Test diffprivlib query: Random Forest, Decision Tree"""
with TestClient(app, headers=self.headers) as client:
bounds = ([30.0, 13.0, 150.0, 2000.0], [65.0, 23.0, 250.0, 7000.0])
# Test Random Forest
pipeline = Pipeline(
[
(
"rf",
models.RandomForestClassifier(
n_estimators=10,
epsilon=2.0,
bounds=bounds,
classes=["Adelie", "Chinstrap", "Gentoo"],
),
),
]
)
diffprivlib_body = dict(example_diffprivlib)
diffprivlib_body["diffprivlib_json"] = serialise_pipeline(pipeline)
response = client.post(
"/diffprivlib_query",
json=diffprivlib_body,
headers=self.headers,
)
validate_pipeline(response)
# Test Decision Tree Classifier
pipeline = Pipeline(
[
(
"dtc",
models.DecisionTreeClassifier(
epsilon=2.0,
bounds=bounds,
classes=["Adelie", "Chinstrap", "Gentoo"],
),
),
]
)
diffprivlib_body = dict(example_diffprivlib)
diffprivlib_body["diffprivlib_json"] = serialise_pipeline(pipeline)
response = client.post(
"/diffprivlib_query",
json=diffprivlib_body,
headers=self.headers,
)
validate_pipeline(response)
[docs]
def test_clustering_models(self) -> None:
"""Test diffprivlib query: K-Means"""
with TestClient(app, headers=self.headers) as client:
bounds = ([30.0, 13.0, 150.0, 2000.0], [65.0, 23.0, 250.0, 7000.0])
# Test K-MEANS
pipeline = Pipeline(
[
(
"kmeans",
models.KMeans(
n_clusters=8, epsilon=2.0, bounds=bounds
),
),
]
)
diffprivlib_body = dict(example_diffprivlib)
diffprivlib_body["diffprivlib_json"] = serialise_pipeline(pipeline)
response = client.post(
"/diffprivlib_query",
json=diffprivlib_body,
headers=self.headers,
)
validate_pipeline(response)
diffprivlib_body["target_columns"] = None
response = client.post(
"/diffprivlib_query",
json=diffprivlib_body,
headers=self.headers,
)
validate_pipeline(response)
[docs]
def test_dimension_reduction_models(self) -> None:
"""Test diffprivlib query: PCA"""
with TestClient(app, headers=self.headers) as client:
bounds = ([30.0, 13.0, 150.0, 2000.0], [65.0, 23.0, 250.0, 7000.0])
# Test PCA
pipeline = Pipeline(
[
(
"pca",
models.PCA(
n_components=8,
epsilon=2.0,
bounds=bounds,
data_norm=100,
),
),
]
)
diffprivlib_body = dict(example_diffprivlib)
diffprivlib_body["diffprivlib_json"] = serialise_pipeline(pipeline)
response = client.post(
"/diffprivlib_query",
json=diffprivlib_body,
headers=self.headers,
)
validate_pipeline(response)
[docs]
def test_dummy_diffprivlib_query(self) -> None:
"""test_dummy_diffprivlib_query"""
with TestClient(app) as client:
# Expect to work
response = client.post(
"/dummy_diffprivlib_query",
json=example_dummy_diffprivlib,
headers=self.headers,
)
assert response.status_code == status.HTTP_200_OK
response_dict = json.loads(response.content.decode("utf8"))
assert response_dict["query_response"]["score"] > 0
assert response_dict["query_response"]["model"]
# Expect to fail: user does have access to dataset
body = dict(example_dummy_diffprivlib)
body["dataset_name"] = "IRIS"
response = client.post(
"/dummy_diffprivlib_query",
json=body,
headers=self.headers,
)
assert response.status_code == status.HTTP_403_FORBIDDEN
assert response.json() == {
"UnauthorizedAccessException": ""
+ f"{self.user_name} does not have access to IRIS."
}
[docs]
def test_diffprivlib_cost(self) -> None:
"""test_diffprivlib_cost"""
with TestClient(app) as client:
# Expect to work
response = client.post(
"/estimate_diffprivlib_cost",
json=example_diffprivlib,
headers=self.headers,
)
assert response.status_code == status.HTTP_200_OK
response_dict = json.loads(response.content.decode("utf8"))
assert response_dict["epsilon_cost"] == 1.5
assert response_dict["delta_cost"] == 0
# Expect to fail: user does have access to dataset
body = dict(example_diffprivlib)
body["dataset_name"] = "IRIS"
response = client.post(
"/estimate_diffprivlib_cost",
json=body,
headers=self.headers,
)
assert response.status_code == status.HTTP_403_FORBIDDEN
assert response.json() == {
"UnauthorizedAccessException": ""
+ f"{self.user_name} does not have access to IRIS."
}