{ "cells": [ { "cell_type": "markdown", "id": "3f18d338", "metadata": {}, "source": [ "# S3 example" ] }, { "cell_type": "markdown", "id": "01ae30d2", "metadata": {}, "source": [ "## Step 1: Install the library\n", "To interact with the secure server on which the data is stored, one first needs to install the library `lomas-client` on her local developping environment. \n", "\n", "It can be installed via the pip command:" ] }, { "cell_type": "code", "execution_count": 1, "id": "28fbdd79-8c15-49a9-bcf9-fcdeac09d2b5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: lomas-client in /usr/local/lib/python3.12/site-packages (0.3.3)\n", "Requirement already satisfied: diffprivlib>=0.6.4 in /usr/local/lib/python3.12/site-packages (from lomas-client) (0.6.4)\n", "Requirement already satisfied: diffprivlib-logger>=0.0.3 in /usr/local/lib/python3.12/site-packages (from lomas-client) (0.0.3)\n", "Requirement already satisfied: numpy>=1.26.2 in /usr/local/lib/python3.12/site-packages (from lomas-client) (1.26.2)\n", "Requirement already satisfied: opendp==0.10.0 in /usr/local/lib/python3.12/site-packages (from lomas-client) (0.10.0)\n", "Requirement already satisfied: opendp-logger==0.3.0 in /usr/local/lib/python3.12/site-packages (from lomas-client) (0.3.0)\n", "Requirement already satisfied: pandas>=2.2.2 in /usr/local/lib/python3.12/site-packages (from lomas-client) (2.2.2)\n", "Requirement already satisfied: requests>=2.32.0 in /usr/local/lib/python3.12/site-packages (from lomas-client) (2.32.0)\n", "Requirement already satisfied: scikit-learn==1.4.0 in /usr/local/lib/python3.12/site-packages (from lomas-client) (1.4.0)\n", "Requirement already satisfied: smartnoise-synth==1.0.4 in /usr/local/lib/python3.12/site-packages (from lomas-client) (1.0.4)\n", "Requirement already satisfied: smartnoise-synth-logger==0.0.3 in /usr/local/lib/python3.12/site-packages (from lomas-client) (0.0.3)\n", "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.12/site-packages (from scikit-learn==1.4.0->lomas-client) (1.14.1)\n", "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/site-packages (from scikit-learn==1.4.0->lomas-client) (1.4.2)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.12/site-packages (from scikit-learn==1.4.0->lomas-client) (3.5.0)\n", "Requirement already satisfied: Faker>=17.0.0 in /usr/local/lib/python3.12/site-packages (from smartnoise-synth==1.0.4->lomas-client) (30.1.0)\n", "Requirement already satisfied: opacus<0.15.0,>=0.14.0 in /usr/local/lib/python3.12/site-packages (from smartnoise-synth==1.0.4->lomas-client) (0.14.0)\n", "Requirement already satisfied: pac-synth<0.0.9,>=0.0.8 in /usr/local/lib/python3.12/site-packages (from smartnoise-synth==1.0.4->lomas-client) (0.0.8)\n", "Requirement already satisfied: smartnoise-sql<2.0.0,>=1.0.4 in /usr/local/lib/python3.12/site-packages (from smartnoise-synth==1.0.4->lomas-client) (1.0.4)\n", "Requirement already satisfied: torch>=2.2.0 in /usr/local/lib/python3.12/site-packages (from smartnoise-synth==1.0.4->lomas-client) (2.4.1)\n", "Requirement already satisfied: setuptools>=49.0.0 in /usr/local/lib/python3.12/site-packages (from diffprivlib>=0.6.4->lomas-client) (75.1.0)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/site-packages (from pandas>=2.2.2->lomas-client) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/site-packages (from pandas>=2.2.2->lomas-client) (2024.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/site-packages (from pandas>=2.2.2->lomas-client) (2024.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.12/site-packages (from requests>=2.32.0->lomas-client) (3.4.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/site-packages (from requests>=2.32.0->lomas-client) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/site-packages (from requests>=2.32.0->lomas-client) (2.2.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/site-packages (from requests>=2.32.0->lomas-client) (2024.8.30)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.12/site-packages (from Faker>=17.0.0->smartnoise-synth==1.0.4->lomas-client) (4.12.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas>=2.2.2->lomas-client) (1.16.0)\n", "Requirement already satisfied: PyYAML<7.0.0,>=6.0.1 in /usr/local/lib/python3.12/site-packages (from smartnoise-sql<2.0.0,>=1.0.4->smartnoise-synth==1.0.4->lomas-client) (6.0.2)\n", "Requirement already satisfied: antlr4-python3-runtime==4.9.3 in /usr/local/lib/python3.12/site-packages (from smartnoise-sql<2.0.0,>=1.0.4->smartnoise-synth==1.0.4->lomas-client) (4.9.3)\n", "Requirement already satisfied: graphviz<0.18,>=0.17 in /usr/local/lib/python3.12/site-packages (from smartnoise-sql<2.0.0,>=1.0.4->smartnoise-synth==1.0.4->lomas-client) (0.17)\n", "Requirement already satisfied: sqlalchemy<3.0.0,>=2.0.0 in /usr/local/lib/python3.12/site-packages (from smartnoise-sql<2.0.0,>=1.0.4->smartnoise-synth==1.0.4->lomas-client) (2.0.35)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (3.16.1)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (1.13.3)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (3.3)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (3.1.4)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (2024.9.0)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.1.105)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.1.105)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.1.105)\n", "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (9.1.0.70)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.1.3.1)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (11.0.2.54)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (10.3.2.106)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (11.4.5.107)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.1.0.106)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (2.20.5)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.1.105)\n", "Requirement already satisfied: triton==3.0.0 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (3.0.0)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.12/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.6.77)\n", "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.12/site-packages (from sqlalchemy<3.0.0,>=2.0.0->smartnoise-sql<2.0.0,>=1.0.4->smartnoise-synth==1.0.4->lomas-client) (3.1.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/site-packages (from jinja2->torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (2.1.5)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/site-packages (from sympy->torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (1.3.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable.It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n", "\u001b[0m" ] } ], "source": [ "!pip install lomas-client" ] }, { "cell_type": "code", "execution_count": 1, "id": "6fb569fc", "metadata": {}, "outputs": [], "source": [ "from lomas_client.client import Client\n", "import numpy as np" ] }, { "cell_type": "markdown", "id": "9c63718b", "metadata": {}, "source": [ "## Step 2: Initialise the client\n", "\n", "Once the library is installed, a Client object must be created. It is responsible for sending sending requests to the server and processing responses in the local environment. It enables a seamless interaction with the server. \n", "\n", "To create the client, one needs to give it a few parameters:\n", "- a url: the root application endpoint to the remote secure server.\n", "- user_name: her name as registered in the database (Jack)\n", "- dataset_name: the name of the dataset that we want to query (TITANIC)" ] }, { "cell_type": "code", "execution_count": null, "id": "4ec2f52f", "metadata": {}, "outputs": [], "source": [ "DATASET_NAME = \"TITANIC\"" ] }, { "cell_type": "code", "execution_count": null, "id": "74fdc85e", "metadata": {}, "outputs": [], "source": [ "# The following would usually be set in the environment by a system administrator\n", "# and be tranparent to lomas users.\n", "APP_URL = \"http://localhost:48080\" # For local devenv setup\n", "# APP_URL = \"http://lomas_server:48080\" # For local docker compose setup\n", "# APP_URL = \"http://lomas-server.example.com:80\" # For Kubernetes deployment\n", "USER_NAME = \"Jack\"\n", "\n", "import os\n", "os.environ[\"LOMAS_CLIENT_ID\"] = USER_NAME\n", "os.environ[\"LOMAS_CLIENT_SECRET\"] = USER_NAME.lower()\n", "os.environ[\"LOMAS_KEYCLOAK_ADDRESS\"] = \"localhost\" # For local devenv setup\n", "# os.environ[\"LOMAS_KEYCLOAK_ADDRESS\"] = \"keycloak\" # For local docker compose setup\n", "# os.environ[\"LOMAS_KEYCLOAK_ADDRESS\"] = \"lomas-keycloak.example.com\" # For Kubernetes deployment \n", "os.environ[\"LOMAS_KEYCLOAK_PORT\"] = \"80\" # For local deployments\n", "# os.environ[\"LOMAS_KEYCLOAK_PORT\"] = \"443\" # For Kubernetes deployment\n", "os.environ[\"LOMAS_KEYCLOAK_USE_TLS\"] = \"0\" # For local deployments\n", "# os.environ[\"LOMAS_KEYCLOAK_USE_TLS\"] = \"1\" # For Kubernetes deployments\n", "os.environ[\"LOMAS_REALM\"] = \"lomas\"" ] }, { "cell_type": "code", "execution_count": null, "id": "e629463f", "metadata": {}, "outputs": [], "source": [ "client = Client(url=APP_URL, dataset_name=DATASET_NAME)" ] }, { "cell_type": "markdown", "id": "9b9a5f13", "metadata": {}, "source": [ "## Step 3: Understand the functionnalities of the library" ] }, { "cell_type": "markdown", "id": "c7cb5531", "metadata": {}, "source": [ "### Getting dataset metadata" ] }, { "cell_type": "code", "execution_count": 3, "id": "d15cbe39", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'max_ids': 1,\n", " 'rows': 887,\n", " 'row_privacy': True,\n", " 'censor_dims': False,\n", " 'columns': {'Pclass': {'private_id': False,\n", " 'nullable': False,\n", " 'max_partition_length': None,\n", " 'max_influenced_partitions': None,\n", " 'max_partition_contributions': None,\n", " 'type': 'int',\n", " 'precision': 32,\n", " 'lower': 1,\n", " 'upper': 3},\n", " 'Name': {'private_id': False,\n", " 'nullable': False,\n", " 'max_partition_length': None,\n", " 'max_influenced_partitions': None,\n", " 'max_partition_contributions': None,\n", " 'type': 'string'},\n", " 'Sex': {'private_id': False,\n", " 'nullable': False,\n", " 'max_partition_length': None,\n", " 'max_influenced_partitions': None,\n", " 'max_partition_contributions': None,\n", " 'type': 'string',\n", " 'cardinality': 2,\n", " 'categories': ['male', 'female']},\n", " 'Age': {'private_id': False,\n", " 'nullable': False,\n", " 'max_partition_length': None,\n", " 'max_influenced_partitions': None,\n", " 'max_partition_contributions': None,\n", " 'type': 'float',\n", " 'precision': 64,\n", " 'lower': 0.1,\n", " 'upper': 100.0},\n", " 'SibSp': {'private_id': False,\n", " 'nullable': False,\n", " 'max_partition_length': None,\n", " 'max_influenced_partitions': None,\n", " 'max_partition_contributions': None,\n", " 'type': 'int',\n", " 'precision': 32,\n", " 'lower': 0,\n", " 'upper': 10},\n", " 'Parch': {'private_id': False,\n", " 'nullable': False,\n", " 'max_partition_length': None,\n", " 'max_influenced_partitions': None,\n", " 'max_partition_contributions': None,\n", " 'type': 'int',\n", " 'precision': 32,\n", " 'lower': 0,\n", " 'upper': 10},\n", " 'Ticket': {'private_id': False,\n", " 'nullable': False,\n", " 'max_partition_length': None,\n", " 'max_influenced_partitions': None,\n", " 'max_partition_contributions': None,\n", " 'type': 'string'},\n", " 'Fare': {'private_id': False,\n", " 'nullable': False,\n", " 'max_partition_length': None,\n", " 'max_influenced_partitions': None,\n", " 'max_partition_contributions': None,\n", " 'type': 'float',\n", " 'precision': 64,\n", " 'lower': 0.0,\n", " 'upper': 1000.0},\n", " 'Cabin': {'private_id': False,\n", " 'nullable': False,\n", " 'max_partition_length': None,\n", " 'max_influenced_partitions': None,\n", " 'max_partition_contributions': None,\n", " 'type': 'string'},\n", " 'Embarked': {'private_id': False,\n", " 'nullable': False,\n", " 'max_partition_length': None,\n", " 'max_influenced_partitions': None,\n", " 'max_partition_contributions': None,\n", " 'type': 'string',\n", " 'cardinality': 3,\n", " 'categories': ['C', 'Q', 'S']},\n", " 'Survived': {'private_id': False,\n", " 'nullable': False,\n", " 'max_partition_length': None,\n", " 'max_influenced_partitions': None,\n", " 'max_partition_contributions': None,\n", " 'type': 'boolean'}}}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "titanic_metadata = client.get_dataset_metadata()\n", "titanic_metadata" ] }, { "cell_type": "markdown", "id": "5a3c899d", "metadata": {}, "source": [ "### Get a dummy dataset" ] }, { "cell_type": "code", "execution_count": 4, "id": "01f4365a", "metadata": {}, "outputs": [], "source": [ "NB_ROWS = 200\n", "SEED = 0" ] }, { "cell_type": "code", "execution_count": 5, "id": "3f553b29", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(200, 11)\n" ] }, { "data": { "text/html": [ "
\n", " | Pclass | \n", "Name | \n", "Sex | \n", "Age | \n", "SibSp | \n", "Parch | \n", "Ticket | \n", "Fare | \n", "Cabin | \n", "Embarked | \n", "Survived | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "3 | \n", "o | \n", "female | \n", "89.690443 | \n", "6 | \n", "6 | \n", "2 | \n", "858.435326 | \n", "U | \n", "S | \n", "True | \n", "
1 | \n", "2 | \n", "D | \n", "male | \n", "58.373673 | \n", "0 | \n", "0 | \n", "Z | \n", "620.908898 | \n", "a | \n", "C | \n", "True | \n", "
2 | \n", "2 | \n", "u | \n", "female | \n", "4.117800 | \n", "2 | \n", "4 | \n", "h | \n", "193.917948 | \n", "G | \n", "S | \n", "True | \n", "
3 | \n", "1 | \n", "o | \n", "male | \n", "71.177534 | \n", "9 | \n", "7 | \n", "a | \n", "687.914521 | \n", "Z | \n", "Q | \n", "True | \n", "
4 | \n", "1 | \n", "3 | \n", "male | \n", "56.945683 | \n", "4 | \n", "10 | \n", "1 | \n", "758.999002 | \n", "W | \n", "S | \n", "True | \n", "