{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "3f18d338",
   "metadata": {},
   "source": [
    "# S3 example"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "01ae30d2",
   "metadata": {},
   "source": [
    "## Step 1: Install the library\n",
    "To interact with the secure server on which the data is stored, one first needs to install the library `lomas-client` on her local developping environment. \n",
    "\n",
    "It can be installed via the pip command:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "28fbdd79-8c15-49a9-bcf9-fcdeac09d2b5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: lomas-client in /usr/local/lib/python3.12/site-packages (0.3.3)\n",
      "Requirement already satisfied: diffprivlib>=0.6.4 in /usr/local/lib/python3.12/site-packages (from lomas-client) (0.6.4)\n",
      "Requirement already satisfied: diffprivlib-logger>=0.0.3 in /usr/local/lib/python3.12/site-packages (from lomas-client) (0.0.3)\n",
      "Requirement already satisfied: numpy>=1.26.2 in /usr/local/lib/python3.12/site-packages (from lomas-client) (1.26.2)\n",
      "Requirement already satisfied: opendp==0.10.0 in /usr/local/lib/python3.12/site-packages (from lomas-client) (0.10.0)\n",
      "Requirement already satisfied: opendp-logger==0.3.0 in /usr/local/lib/python3.12/site-packages (from lomas-client) (0.3.0)\n",
      "Requirement already satisfied: pandas>=2.2.2 in /usr/local/lib/python3.12/site-packages (from lomas-client) (2.2.2)\n",
      "Requirement already satisfied: requests>=2.32.0 in /usr/local/lib/python3.12/site-packages (from lomas-client) (2.32.0)\n",
      "Requirement already satisfied: scikit-learn==1.4.0 in /usr/local/lib/python3.12/site-packages (from lomas-client) (1.4.0)\n",
      "Requirement already satisfied: smartnoise-synth==1.0.4 in /usr/local/lib/python3.12/site-packages (from lomas-client) (1.0.4)\n",
      "Requirement already satisfied: smartnoise-synth-logger==0.0.3 in /usr/local/lib/python3.12/site-packages (from lomas-client) (0.0.3)\n",
      "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.12/site-packages (from scikit-learn==1.4.0->lomas-client) (1.14.1)\n",
      "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/site-packages (from scikit-learn==1.4.0->lomas-client) (1.4.2)\n",
      "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.12/site-packages (from scikit-learn==1.4.0->lomas-client) (3.5.0)\n",
      "Requirement already satisfied: Faker>=17.0.0 in /usr/local/lib/python3.12/site-packages (from smartnoise-synth==1.0.4->lomas-client) (30.1.0)\n",
      "Requirement already satisfied: opacus<0.15.0,>=0.14.0 in /usr/local/lib/python3.12/site-packages (from smartnoise-synth==1.0.4->lomas-client) (0.14.0)\n",
      "Requirement already satisfied: pac-synth<0.0.9,>=0.0.8 in /usr/local/lib/python3.12/site-packages (from smartnoise-synth==1.0.4->lomas-client) (0.0.8)\n",
      "Requirement already satisfied: smartnoise-sql<2.0.0,>=1.0.4 in /usr/local/lib/python3.12/site-packages (from smartnoise-synth==1.0.4->lomas-client) (1.0.4)\n",
      "Requirement already satisfied: torch>=2.2.0 in /usr/local/lib/python3.12/site-packages (from smartnoise-synth==1.0.4->lomas-client) (2.4.1)\n",
      "Requirement already satisfied: setuptools>=49.0.0 in /usr/local/lib/python3.12/site-packages (from diffprivlib>=0.6.4->lomas-client) (75.1.0)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/site-packages (from pandas>=2.2.2->lomas-client) (2.9.0.post0)\n",
      "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/site-packages (from pandas>=2.2.2->lomas-client) (2024.2)\n",
      "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/site-packages (from pandas>=2.2.2->lomas-client) (2024.2)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.12/site-packages (from requests>=2.32.0->lomas-client) (3.4.0)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/site-packages (from requests>=2.32.0->lomas-client) (3.10)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/site-packages (from requests>=2.32.0->lomas-client) (2.2.3)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/site-packages (from requests>=2.32.0->lomas-client) (2024.8.30)\n",
      "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.12/site-packages (from Faker>=17.0.0->smartnoise-synth==1.0.4->lomas-client) (4.12.2)\n",
      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas>=2.2.2->lomas-client) (1.16.0)\n",
      "Requirement already satisfied: PyYAML<7.0.0,>=6.0.1 in /usr/local/lib/python3.12/site-packages (from smartnoise-sql<2.0.0,>=1.0.4->smartnoise-synth==1.0.4->lomas-client) (6.0.2)\n",
      "Requirement already satisfied: antlr4-python3-runtime==4.9.3 in /usr/local/lib/python3.12/site-packages (from smartnoise-sql<2.0.0,>=1.0.4->smartnoise-synth==1.0.4->lomas-client) (4.9.3)\n",
      "Requirement already satisfied: graphviz<0.18,>=0.17 in /usr/local/lib/python3.12/site-packages (from smartnoise-sql<2.0.0,>=1.0.4->smartnoise-synth==1.0.4->lomas-client) (0.17)\n",
      "Requirement already satisfied: sqlalchemy<3.0.0,>=2.0.0 in /usr/local/lib/python3.12/site-packages (from smartnoise-sql<2.0.0,>=1.0.4->smartnoise-synth==1.0.4->lomas-client) (2.0.35)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (3.16.1)\n",
      "Requirement already satisfied: sympy in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (1.13.3)\n",
      "Requirement already satisfied: networkx in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (3.3)\n",
      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (3.1.4)\n",
      "Requirement already satisfied: fsspec in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (2024.9.0)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (9.1.0.70)\n",
      "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.1.3.1)\n",
      "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (11.0.2.54)\n",
      "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (10.3.2.106)\n",
      "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (11.4.5.107)\n",
      "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.1.0.106)\n",
      "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (2.20.5)\n",
      "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.1.105)\n",
      "Requirement already satisfied: triton==3.0.0 in /usr/local/lib/python3.12/site-packages (from torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (3.0.0)\n",
      "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.12/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (12.6.77)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.12/site-packages (from sqlalchemy<3.0.0,>=2.0.0->smartnoise-sql<2.0.0,>=1.0.4->smartnoise-synth==1.0.4->lomas-client) (3.1.1)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/site-packages (from jinja2->torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (2.1.5)\n",
      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/site-packages (from sympy->torch>=2.2.0->smartnoise-synth==1.0.4->lomas-client) (1.3.0)\n",
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable.It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "!pip install lomas-client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6fb569fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "from lomas_client.client import Client\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9c63718b",
   "metadata": {},
   "source": [
    "## Step 2: Initialise the client\n",
    "\n",
    "Once the library is installed, a Client object must be created. It is responsible for sending sending requests to the server and processing responses in the local environment. It enables a seamless interaction with the server. \n",
    "\n",
    "To create the client, one needs to give it a few parameters:\n",
    "- a url: the root application endpoint to the remote secure server.\n",
    "- user_name: her name as registered in the database (Jack)\n",
    "- dataset_name: the name of the dataset that we want to query (TITANIC)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ec2f52f",
   "metadata": {},
   "outputs": [],
   "source": [
    "DATASET_NAME = \"TITANIC\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "74fdc85e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# The following would usually be set in the environment by a system administrator\n",
    "# and be tranparent to lomas users.\n",
    "APP_URL = \"http://localhost:48080\"                                    # For local devenv setup\n",
    "# APP_URL = \"http://lomas_server:48080\"                               # For local docker compose setup\n",
    "# APP_URL = \"http://lomas-server.example.com:80\"                 # For Kubernetes deployment\n",
    "USER_NAME = \"Jack\"\n",
    "\n",
    "import os\n",
    "os.environ[\"LOMAS_CLIENT_ID\"] = USER_NAME\n",
    "os.environ[\"LOMAS_CLIENT_SECRET\"] = USER_NAME.lower()\n",
    "os.environ[\"LOMAS_KEYCLOAK_ADDRESS\"] = \"localhost\"                    # For local devenv setup\n",
    "# os.environ[\"LOMAS_KEYCLOAK_ADDRESS\"] = \"keycloak\"                   # For local docker compose setup\n",
    "# os.environ[\"LOMAS_KEYCLOAK_ADDRESS\"] = \"lomas-keycloak.example.com\" # For Kubernetes deployment \n",
    "os.environ[\"LOMAS_KEYCLOAK_PORT\"] = \"80\"                              # For local deployments\n",
    "# os.environ[\"LOMAS_KEYCLOAK_PORT\"] = \"443\"                           # For Kubernetes deployment\n",
    "os.environ[\"LOMAS_KEYCLOAK_USE_TLS\"] = \"0\"                            # For local deployments\n",
    "# os.environ[\"LOMAS_KEYCLOAK_USE_TLS\"] = \"1\"                          # For Kubernetes deployments\n",
    "os.environ[\"LOMAS_REALM\"] = \"lomas\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e629463f",
   "metadata": {},
   "outputs": [],
   "source": [
    "client = Client(url=APP_URL, dataset_name=DATASET_NAME)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9b9a5f13",
   "metadata": {},
   "source": [
    "## Step 3: Understand the functionnalities of the library"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c7cb5531",
   "metadata": {},
   "source": [
    "### Getting dataset metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d15cbe39",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_ids': 1,\n",
       " 'rows': 887,\n",
       " 'row_privacy': True,\n",
       " 'censor_dims': False,\n",
       " 'columns': {'Pclass': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'int',\n",
       "   'precision': 32,\n",
       "   'lower': 1,\n",
       "   'upper': 3},\n",
       "  'Name': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'string'},\n",
       "  'Sex': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'string',\n",
       "   'cardinality': 2,\n",
       "   'categories': ['male', 'female']},\n",
       "  'Age': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'float',\n",
       "   'precision': 64,\n",
       "   'lower': 0.1,\n",
       "   'upper': 100.0},\n",
       "  'SibSp': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'int',\n",
       "   'precision': 32,\n",
       "   'lower': 0,\n",
       "   'upper': 10},\n",
       "  'Parch': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'int',\n",
       "   'precision': 32,\n",
       "   'lower': 0,\n",
       "   'upper': 10},\n",
       "  'Ticket': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'string'},\n",
       "  'Fare': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'float',\n",
       "   'precision': 64,\n",
       "   'lower': 0.0,\n",
       "   'upper': 1000.0},\n",
       "  'Cabin': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'string'},\n",
       "  'Embarked': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'string',\n",
       "   'cardinality': 3,\n",
       "   'categories': ['C', 'Q', 'S']},\n",
       "  'Survived': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'boolean'}}}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic_metadata = client.get_dataset_metadata()\n",
    "titanic_metadata"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5a3c899d",
   "metadata": {},
   "source": [
    "### Get a dummy dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "01f4365a",
   "metadata": {},
   "outputs": [],
   "source": [
    "NB_ROWS = 200\n",
    "SEED = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3f553b29",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(200, 11)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "      <th>Survived</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>o</td>\n",
       "      <td>female</td>\n",
       "      <td>89.690443</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>858.435326</td>\n",
       "      <td>U</td>\n",
       "      <td>S</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>D</td>\n",
       "      <td>male</td>\n",
       "      <td>58.373673</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Z</td>\n",
       "      <td>620.908898</td>\n",
       "      <td>a</td>\n",
       "      <td>C</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>u</td>\n",
       "      <td>female</td>\n",
       "      <td>4.117800</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>h</td>\n",
       "      <td>193.917948</td>\n",
       "      <td>G</td>\n",
       "      <td>S</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>o</td>\n",
       "      <td>male</td>\n",
       "      <td>71.177534</td>\n",
       "      <td>9</td>\n",
       "      <td>7</td>\n",
       "      <td>a</td>\n",
       "      <td>687.914521</td>\n",
       "      <td>Z</td>\n",
       "      <td>Q</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>male</td>\n",
       "      <td>56.945683</td>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>758.999002</td>\n",
       "      <td>W</td>\n",
       "      <td>S</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Pclass Name     Sex        Age  SibSp  Parch Ticket        Fare Cabin  \\\n",
       "0       3    o  female  89.690443      6      6      2  858.435326     U   \n",
       "1       2    D    male  58.373673      0      0      Z  620.908898     a   \n",
       "2       2    u  female   4.117800      2      4      h  193.917948     G   \n",
       "3       1    o    male  71.177534      9      7      a  687.914521     Z   \n",
       "4       1    3    male  56.945683      4     10      1  758.999002     W   \n",
       "\n",
       "  Embarked  Survived  \n",
       "0        S      True  \n",
       "1        C      True  \n",
       "2        S      True  \n",
       "3        Q      True  \n",
       "4        S      True  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_dummy = client.get_dummy_dataset(\n",
    "    nb_rows = NB_ROWS, \n",
    "    seed = SEED\n",
    ")\n",
    "\n",
    "print(df_dummy.shape)\n",
    "df_dummy.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "98e6fda2-dde7-4f8b-a787-c9a1e3571ebe",
   "metadata": {},
   "source": [
    "### Query on dummy dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "243c73e3-daec-45d6-a3c8-ae1d60439ec4",
   "metadata": {},
   "source": [
    "#### Average and number of rows with smartnoise-sql library on remote dummy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3946425d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Average Age\n",
    "QUERY = \"SELECT COUNT(*) AS nb_passengers, \\\n",
    "        AVG(Age) AS avg_age \\\n",
    "        FROM df\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "90cf2a6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# On the remote server dummy dataframe\n",
    "dummy_res = client.smartnoise_sql.query(\n",
    "    query = QUERY,  \n",
    "    epsilon = 100.0, # make sure to select high values of epsilon and delta to have small differences\n",
    "    delta = 2.0,     # make sure to select high values of epsilon and delta to have small differences\n",
    "    dummy = True, \n",
    "    nb_rows = NB_ROWS,\n",
    "    seed = SEED\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a30f277e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Average age in remote dummy: 51.7 years old.\n",
      "Number of rows in remote dummy: 199.\n"
     ]
    }
   ],
   "source": [
    "print(f\"Average age in remote dummy: {np.round(dummy_res.result.df['avg_age'][0], 2)} years old.\")\n",
    "print(f\"Number of rows in remote dummy: {np.round(dummy_res.result.df['nb_passengers'][0], 2)}.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "324454ed",
   "metadata": {},
   "source": [
    "### Get current budget"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "61a467f3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "InitialBudgetResponse(initial_epsilon=45.0, initial_delta=0.2)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "client.get_initial_budget()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "afd22f84",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SpentBudgetResponse(total_spent_epsilon=0.0, total_spent_delta=0.0)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "client.get_total_spent_budget()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "05daf5a4",
   "metadata": {},
   "source": [
    "It will also be useful to know what the remaining budget is. Therefore, we call the function `get_remaining_budget`. It just substarcts the total spent budget from the initial budget."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "6260cf54",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RemainingBudgetResponse(remaining_epsilon=45.0, remaining_delta=0.2)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "client.get_remaining_budget()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "20298e00",
   "metadata": {},
   "source": [
    "As expected, for now the remaining budget is equal to the inital budget."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b746374c",
   "metadata": {},
   "source": [
    "### Estimate cost of a query\n",
    "Another safeguard is the functionnality to estimate the cost of a query. As in OpenDP and SmartnoiseSQL, the budget that will by used by a query might be slightly different than what is asked by the user. The `estimate cost` function returns the estimated real cost of any query.\n",
    "\n",
    "Again, of course, this will not impact the user's budget."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "fd5ed08a",
   "metadata": {},
   "outputs": [],
   "source": [
    "EPSILON = 0.5\n",
    "DELTA = 1e-4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "133020c6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CostResponse(epsilon=1.5, delta=0.00014999500000001387)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "client.smartnoise_sql.cost(\n",
    "    query = QUERY, \n",
    "    epsilon = EPSILON, \n",
    "    delta = DELTA\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e5379edf",
   "metadata": {},
   "source": [
    "### Query on real private dataset with smartnoise-sql."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "19e60263",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RemainingBudgetResponse(remaining_epsilon=45.0, remaining_delta=0.2)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "client.get_remaining_budget()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "69767fac",
   "metadata": {},
   "outputs": [],
   "source": [
    "response = client.smartnoise_sql.query(\n",
    "    query = QUERY,  \n",
    "    epsilon = EPSILON, \n",
    "    delta = DELTA,\n",
    "    dummy = False # Optionnal\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a7a4e2d3-2922-4f95-bdc9-a35c160f157c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "6dbbdf93",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of passengers in real data: 887.\n",
      "Average age in real data: 29.34.\n"
     ]
    }
   ],
   "source": [
    "nb_passengers = response.result.df['nb_passengers'].iloc[0]\n",
    "print(f\"Number of passengers in real data: {nb_passengers}.\")\n",
    "\n",
    "avg_age = np.round(response.result.df['avg_age'].iloc[0], 2)\n",
    "print(f\"Average age in real data: {avg_age}.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b2767e65",
   "metadata": {},
   "source": [
    "After each query on the real dataset, the budget informations are also returned to the researcher. It is possible possible to check the remaining budget again afterwards:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "39701fe5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RemainingBudgetResponse(remaining_epsilon=43.5, remaining_delta=0.199850005)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "client.get_remaining_budget()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e37c587f",
   "metadata": {},
   "source": [
    "As can be seen in `get_total_spent_budget()`, it is the budget estimated with `estimate_smartnoise_sql_cost()` that was spent."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "487f835f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SpentBudgetResponse(total_spent_epsilon=1.5, total_spent_delta=0.00014999500000001387)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "client.get_total_spent_budget()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "04929993",
   "metadata": {},
   "source": [
    "## Step 4: Titanic statistics with opendp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "b9685226",
   "metadata": {},
   "outputs": [],
   "source": [
    "import opendp as dp\n",
    "import opendp.transformations as trans\n",
    "import opendp.measurements as meas"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bbbca191",
   "metadata": {},
   "source": [
    "### Confidence intervals for age over the whole population"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "4331d86f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_ids': 1,\n",
       " 'rows': 887,\n",
       " 'row_privacy': True,\n",
       " 'censor_dims': False,\n",
       " 'columns': {'Pclass': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'int',\n",
       "   'precision': 32,\n",
       "   'lower': 1,\n",
       "   'upper': 3},\n",
       "  'Name': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'string'},\n",
       "  'Sex': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'string',\n",
       "   'cardinality': 2,\n",
       "   'categories': ['male', 'female']},\n",
       "  'Age': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'float',\n",
       "   'precision': 64,\n",
       "   'lower': 0.1,\n",
       "   'upper': 100.0},\n",
       "  'SibSp': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'int',\n",
       "   'precision': 32,\n",
       "   'lower': 0,\n",
       "   'upper': 10},\n",
       "  'Parch': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'int',\n",
       "   'precision': 32,\n",
       "   'lower': 0,\n",
       "   'upper': 10},\n",
       "  'Ticket': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'string'},\n",
       "  'Fare': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'float',\n",
       "   'precision': 64,\n",
       "   'lower': 0.0,\n",
       "   'upper': 1000.0},\n",
       "  'Cabin': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'string'},\n",
       "  'Embarked': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'string',\n",
       "   'cardinality': 3,\n",
       "   'categories': ['C', 'Q', 'S']},\n",
       "  'Survived': {'private_id': False,\n",
       "   'nullable': False,\n",
       "   'max_partition_length': None,\n",
       "   'max_influenced_partitions': None,\n",
       "   'max_partition_contributions': None,\n",
       "   'type': 'boolean'}}}"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic_metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ff8cb7b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "columns = [\"PassengerId\", \"Pclass\", \"Name\", \"Sex\", \"Age\", \"SibSp\", \"Parch\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "70b2bdb1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.1, 100.0)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "age_min = titanic_metadata['columns']['Age']['lower']\n",
    "age_max = titanic_metadata['columns']['Age']['upper']\n",
    "age_min, age_max"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "75e4933b",
   "metadata": {},
   "outputs": [],
   "source": [
    "age_transformation_pipeline = (\n",
    "    trans.make_split_dataframe(separator=\",\", col_names=columns) >>\n",
    "    trans.make_select_column(key=\"Age\", TOA=str) >>\n",
    "    trans.then_cast_default(TOA=float) >>\n",
    "    trans.then_clamp(bounds=(age_min, age_max)) >>\n",
    "    trans.then_resize(size=nb_passengers.tolist(), constant=avg_age) >>\n",
    "    trans.then_variance()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "8041a647",
   "metadata": {},
   "outputs": [
    {
     "ename": "InvalidQueryException",
     "evalue": "The pipeline provided is not a measurement. It cannot be processed in this server.",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mInvalidQueryException\u001b[0m                     Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[24], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Expect to fail !!!\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopendp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43mopendp_pipeline\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mage_transformation_pipeline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdummy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m      5\u001b[0m \u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/code/lomas_client/libraries/opendp.py:105\u001b[0m, in \u001b[0;36mOpenDPClient.query\u001b[0;34m(self, opendp_pipeline, fixed_delta, dummy, nb_rows, seed)\u001b[0m\n\u001b[1;32m    102\u001b[0m body \u001b[38;5;241m=\u001b[39m request_model\u001b[38;5;241m.\u001b[39mmodel_validate(body_dict)\n\u001b[1;32m    103\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhttp_client\u001b[38;5;241m.\u001b[39mpost(endpoint, body)\n\u001b[0;32m--> 105\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mvalidate_model_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mres\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mQueryResponse\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/code/lomas_client/utils.py:83\u001b[0m, in \u001b[0;36mvalidate_model_response\u001b[0;34m(response, response_model)\u001b[0m\n\u001b[1;32m     80\u001b[0m     r_model \u001b[38;5;241m=\u001b[39m response_model\u001b[38;5;241m.\u001b[39mmodel_validate_json(data)\n\u001b[1;32m     81\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m r_model\n\u001b[0;32m---> 83\u001b[0m \u001b[43mraise_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     84\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[0;32m/code/lomas_client/utils.py:26\u001b[0m, in \u001b[0;36mraise_error\u001b[0;34m(response)\u001b[0m\n\u001b[1;32m     24\u001b[0m error_message \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mjson()\n\u001b[1;32m     25\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m status\u001b[38;5;241m.\u001b[39mHTTP_400_BAD_REQUEST:\n\u001b[0;32m---> 26\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m InvalidQueryException(error_message[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalidQueryException\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m     27\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m status\u001b[38;5;241m.\u001b[39mHTTP_422_UNPROCESSABLE_ENTITY:\n\u001b[1;32m     28\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m ExternalLibraryException(\n\u001b[1;32m     29\u001b[0m         error_message[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlibrary\u001b[39m\u001b[38;5;124m\"\u001b[39m], error_message[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExternalLibraryException\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m     30\u001b[0m     )\n",
      "\u001b[0;31mInvalidQueryException\u001b[0m: The pipeline provided is not a measurement. It cannot be processed in this server."
     ]
    }
   ],
   "source": [
    "# Expect to fail !!!\n",
    "client.opendp.query(\n",
    "    opendp_pipeline = age_transformation_pipeline,\n",
    "    dummy=True\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d06c59dc",
   "metadata": {},
   "source": [
    "This is because the server will only allow measurement pipeline with differentially private results. We add Laplacian noise to the pipeline and should be able to instantiate the pipeline."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "b8162859",
   "metadata": {},
   "outputs": [],
   "source": [
    "var_age_transformation_pipeline = (\n",
    "    age_transformation_pipeline >>\n",
    "    meas.then_laplace(scale=5.0)\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fc7e0ecd",
   "metadata": {},
   "source": [
    "Now that there is a measurement, one is able to apply the pipeline on the dummy dataset of the server."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "df61bce0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dummy result for variance: 59.75\n"
     ]
    }
   ],
   "source": [
    "dummy_var_res = client.opendp.query(\n",
    "    opendp_pipeline = var_age_transformation_pipeline, \n",
    "    dummy=True\n",
    ")\n",
    "print(f\"Dummy result for variance: {np.round(dummy_var_res.result.value, 2)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ded11ac4",
   "metadata": {},
   "source": [
    "With opendp, the function `estimate_opendp_cost` is particularly useful to estimate the used `epsilon` and `delta` based on the `scale` value."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "7ae7f735",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CostResponse(epsilon=2.2502841037292076, delta=0.0)"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cost_res = client.opendp.cost(\n",
    "    opendp_pipeline = var_age_transformation_pipeline\n",
    ")\n",
    "cost_res"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1c791d36",
   "metadata": {},
   "source": [
    "One can now execute the query on the real dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "085555a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "var_res = client.opendp.query(\n",
    "    opendp_pipeline = var_age_transformation_pipeline, \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "674332e7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of passengers: 887 (from previous smartnoise-sql query).\n",
      "Average age: 29.34 (from previous smartnoise-sql query).\n",
      "Variance of age: 182.132 (from opendp query).\n"
     ]
    }
   ],
   "source": [
    "print(f\"Number of passengers: {nb_passengers} (from previous smartnoise-sql query).\")\n",
    "\n",
    "print(f\"Average age: {np.round(avg_age, 2)} (from previous smartnoise-sql query).\")\n",
    "\n",
    "var_age = var_res.result.value\n",
    "print(f\"Variance of age: {np.round(var_age, 3)} (from opendp query).\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "f72b19d0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Standard error of age: 0.45.\n"
     ]
    }
   ],
   "source": [
    "# Get standard error\n",
    "standard_error = np.sqrt(var_age/nb_passengers)\n",
    "print(f\"Standard error of age: {np.round(standard_error, 2)}.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "62630a03",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The 95% confidence interval of the age of all passengers is [28.45, 30.23].\n"
     ]
    }
   ],
   "source": [
    " # Compute the 95% confidence interval\n",
    "ZSCORE = 1.96\n",
    "lower_bound = np.round(avg_age - ZSCORE*standard_error, 2)\n",
    "upper_bound = np.round(avg_age + ZSCORE*standard_error, 2)\n",
    "print(f\"The 95% confidence interval of the age of all passengers is [{lower_bound}, {upper_bound}].\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3bcb4225-59c3-4e58-9be5-b9fae115b99d",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}