{ "cells": [ { "cell_type": "markdown", "id": "3f5c5368", "metadata": {}, "source": [ "# CassIO quickstart" ] }, { "cell_type": "markdown", "id": "871486f5", "metadata": {}, "source": [ "In this notebook, you will use CassIO to connect to the database, create a few tables, insert data in them, and run queries to read data." ] }, { "cell_type": "markdown", "id": "c0763b09", "metadata": {}, "source": [ "## Dependencies" ] }, { "cell_type": "code", "execution_count": 1, "id": "459cce82-d115-47ba-8246-024834480801", "metadata": {}, "outputs": [], "source": [ "!pip install cassio" ] }, { "cell_type": "code", "execution_count": 2, "id": "a61e4821", "metadata": {}, "outputs": [], "source": [ "import cassio" ] }, { "cell_type": "markdown", "id": "4578a87b", "metadata": {}, "source": [ "## Connect to the database\n", "\n", "Edit and run the cells in the appropriate section here, depending on your target database.\n", "\n", "_Note: in a real application, **do not hardcode the database secrets in the program**._" ] }, { "cell_type": "markdown", "id": "f28b524e-93b2-42b1-b221-2a5cdae584eb", "metadata": {}, "source": [ "### If your database is Cassandra ..." ] }, { "cell_type": "code", "execution_count": 3, "id": "9515d4af-9fc2-4c29-b9f2-5c90cd48cc1b", "metadata": {}, "outputs": [], "source": [ "CASSANDRA_CONTACT_POINTS = \"172.17.0.2\" # e.g. \"143.11.15.207,143.11.15.221\"\n", "CASSANDRA_KEYSPACE = \"cassio_tutorial\" # the keyspace must exist on DB\n", "# CASSANDRA_USERNAME = None # or \"my_username\"\n", "# CASSANDRA_PASSWORD = None # or \"my_secret\"" ] }, { "cell_type": "code", "execution_count": 4, "id": "2c0f1a09-4f0a-4502-85a5-28fcc5f7a38b", "metadata": {}, "outputs": [], "source": [ "cassio.init(\n", " contact_points=CASSANDRA_CONTACT_POINTS,\n", " keyspace=CASSANDRA_KEYSPACE,\n", " # Uncomment these parameters if needed:\n", " # username=CASSANDRA_USERNAME,\n", " # password=CASSANDRA_PASSWORD,\n", ")" ] }, { "cell_type": "markdown", "id": "3f7c7b37-f2e3-461b-81d8-5a38e9fd69bc", "metadata": {}, "source": [ "### If your database is Astra DB ..." ] }, { "cell_type": "code", "execution_count": null, "id": "ff2c54c2-abda-4519-b494-2388a63be9d0", "metadata": {}, "outputs": [], "source": [ "ASTRA_DB_API_ENDPOINT = \"https://-.apps.astra.datastax.com\"\n", "ASTRA_DB_APPLICATION_TOKEN = \"AstraCS:...\"\n", "# ASTRA_DB_KEYSPACE = \"cassandra_tutorial\" # optional" ] }, { "cell_type": "code", "execution_count": null, "id": "5c436ebe-2d48-4ec7-9e00-ab704d7e6ffe", "metadata": {}, "outputs": [], "source": [ "# pull the Database ID out of the API Endpoint (the ID looks like \"8-4-4-4-12\" hex digits)\n", "_database_id = ASTRA_DB_API_ENDPOINT[8:8+36].lower()\n", "if len(_database_id) != 36 or set(_database_id) - set('1234567890abcdef-'):\n", " raise ValueError(\"Cannot extract a valid database ID from the endpoint.\")" ] }, { "cell_type": "code", "execution_count": null, "id": "d84f6330-9825-412f-990f-9a6ed238ae8b", "metadata": {}, "outputs": [], "source": [ "cassio.init(\n", " database_id=_database_id,\n", " token=ASTRA_DB_APPLICATION_TOKEN,\n", " # Uncomment these parameters if needed:\n", " # keyspace=ASTRA_DB_KEYSPACE,\n", ")" ] }, { "cell_type": "markdown", "id": "8ea05735-0a26-4692-b95b-f1ec94464b66", "metadata": {}, "source": [ "### Verify the connection" ] }, { "cell_type": "code", "execution_count": 5, "id": "918e12d6-b97c-4f0e-b4bc-a411bccb0151", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Connected.\n", "* Session = \n", "* Keyspace = cassio_tutorial\n" ] } ], "source": [ "print(\"Connected.\\n* Session =\", cassio.config.resolve_session())\n", "print(\"* Keyspace =\", cassio.config.resolve_keyspace())" ] }, { "cell_type": "markdown", "id": "19c8acd6-f58c-4f26-bedd-96be254c1ee4", "metadata": {}, "source": [ "## Table with body and metadata" ] }, { "cell_type": "markdown", "id": "ce9ec9bd-fb12-4b6e-89ac-ef3da37727ff", "metadata": {}, "source": [ "### Create the table" ] }, { "cell_type": "code", "execution_count": 6, "id": "ede6820c-04d1-4835-9bc2-14fccf367a6e", "metadata": {}, "outputs": [], "source": [ "from cassio.table.tables import MetadataCassandraTable" ] }, { "cell_type": "code", "execution_count": 7, "id": "1d9e0c65-5f41-459d-8902-efc0fcc124b5", "metadata": {}, "outputs": [], "source": [ "md_table = MetadataCassandraTable(\n", " table=\"test_mct\",\n", " primary_key_type=\"TEXT\",\n", ")" ] }, { "cell_type": "markdown", "id": "0ef96a6c-3148-4d30-af68-e7ab548dda40", "metadata": {}, "source": [ "### Insert a few rows" ] }, { "cell_type": "code", "execution_count": 8, "id": "c044505e-4b6e-4969-9784-dc0cc2da2ff0", "metadata": {}, "outputs": [], "source": [ "md_table.put(row_id=\"Ag\", body_blob=\"Silver\", metadata={\"metal\": \"Y\", \"solid\": \"Y\"})\n", "md_table.put(row_id=\"Hg\", body_blob=\"Mercury\", metadata={\"metal\": \"Y\", \"solid\": \"N\"})\n", "md_table.put(row_id=\"Ca\", body_blob=\"Calcium\", metadata={\"metal\": \"N\", \"solid\": \"Y\"})\n", "md_table.put(row_id=\"He\", body_blob=\"Helium\", metadata={\"metal\": \"N\", \"solid\": \"N\"})" ] }, { "cell_type": "markdown", "id": "3e19df21-a4cc-4b7e-a27f-5fca45ce3603", "metadata": {}, "source": [ "### Get a row" ] }, { "cell_type": "code", "execution_count": 9, "id": "261bb4d1-077c-4ff9-baf6-b086875b884d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'metadata': {'metal': 'N', 'solid': 'Y'}, 'row_id': 'Ca', 'body_blob': 'Calcium'}\n" ] } ], "source": [ "row = md_table.get(row_id=\"Ca\")\n", "print(row)" ] }, { "cell_type": "markdown", "id": "2281d2fa-72eb-4896-961f-da7c6cc71a55", "metadata": {}, "source": [ "### Delete a row" ] }, { "cell_type": "code", "execution_count": 10, "id": "d8c0d742-2f88-4cf2-bcfc-ce58debde893", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "None\n" ] } ], "source": [ "md_table.delete(row_id=\"Ca\")\n", "print(md_table.get(row_id=\"Ca\"))" ] }, { "cell_type": "markdown", "id": "9c7ee292-21d0-42e5-940a-b3cca18d4af6", "metadata": {}, "source": [ "### Search by metadata" ] }, { "cell_type": "code", "execution_count": 11, "id": "3f9ead78-882f-4178-9a05-727a6090cd2e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'metadata': {'metal': 'N', 'solid': 'N'}, 'row_id': 'He', 'body_blob': 'Helium'}\n", "{'metadata': {'metal': 'Y', 'solid': 'N'}, 'row_id': 'Hg', 'body_blob': 'Mercury'}\n" ] } ], "source": [ "non_solids = md_table.find_entries(metadata={\"solid\": \"N\"}, n=5)\n", "for n_s in non_solids:\n", " print(n_s)" ] }, { "cell_type": "markdown", "id": "a1a046d2-66da-408d-80c1-63b39196c0ff", "metadata": {}, "source": [ "## Table with a vector" ] }, { "cell_type": "markdown", "id": "41e06210-f16d-461d-85bc-0525db55c7cb", "metadata": {}, "source": [ "### Create the table" ] }, { "cell_type": "code", "execution_count": 12, "id": "99905a39-ed9b-4c9a-80d6-5a3a88656459", "metadata": {}, "outputs": [], "source": [ "from cassio.table.tables import MetadataVectorCassandraTable" ] }, { "cell_type": "code", "execution_count": 13, "id": "f6e6bc22-85d0-47e1-844a-3dde0e77cb23", "metadata": {}, "outputs": [], "source": [ "vec_md_table = MetadataVectorCassandraTable(\n", " table=\"test_vmct\",\n", " vector_dimension=5,\n", " primary_key_type=\"INT\",\n", ")" ] }, { "cell_type": "markdown", "id": "4668f01e-a520-4c29-bd34-f06e9d5cde61", "metadata": {}, "source": [ "### Insert a few rows" ] }, { "cell_type": "code", "execution_count": 14, "id": "7d647fa9-7d77-43a7-9d7c-b3d591e8230d", "metadata": {}, "outputs": [], "source": [ "vec_md_table.put(\n", " row_id=100,\n", " body_blob=\"Shoes\",\n", " vector=[0.1, 0.4, 0.0, 0.4, 0.2],\n", " metadata={\"cooking\": \"N\", \"wearable\": \"Y\"},\n", ")\n", "vec_md_table.put(\n", " row_id=87,\n", " body_blob=\"Pan\",\n", " vector=[0.3, -0.1, 0.2, -0.3, -0.1],\n", " metadata={\"cooking\": \"Y\", \"wearable\": \"N\"},\n", ")\n", "vec_md_table.put(\n", " row_id=191,\n", " body_blob=\"Kitten\",\n", " vector=[0.0, 0.3, -0.1, -0.1, 0.3],\n", " metadata={\"cooking\": \"N\", \"wearable\": \"N\"},\n", ")\n", "vec_md_table.put(\n", " row_id=1,\n", " body_blob=\"Oven Mitt\",\n", " vector=[0.4, 0.2, -0.3, -0.2, 0.0],\n", " metadata={\"cooking\": \"Y\", \"wearable\": \"Y\"},\n", ")" ] }, { "cell_type": "markdown", "id": "ca2532c3-e5c2-427a-b828-f5f20b2a2b33", "metadata": {}, "source": [ "### Run a vector ANN search" ] }, { "cell_type": "code", "execution_count": 15, "id": "57d3392e-b942-481b-82f7-85927c62da12", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[id=191] Kitten (sim=0.51)\n", "[id= 87] Pan (sim=0.46)\n" ] } ], "source": [ "query_v = [0.1, 0.2, 0.1, -0.1, 0.0]\n", "results = vec_md_table.metric_ann_search(vector=query_v, n=2, metric=\"cos\")\n", "for hit in results:\n", " print(f'[id={hit[\"row_id\"]:3}] {hit[\"body_blob\"]:12s} (sim={hit[\"distance\"]:0.2f})')" ] }, { "cell_type": "markdown", "id": "91b2c22f-de21-4820-aa15-1c54b28075e2", "metadata": {}, "source": [ "### Run an ANN+metadata search" ] }, { "cell_type": "code", "execution_count": 16, "id": "f142f8d3-56e4-485a-b79c-9ec4c7a3c369", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[id=191] Kitten (sim=0.51)\n", "[id=100] Shoes (sim=0.31)\n" ] } ], "source": [ "results = vec_md_table.metric_ann_search(vector=query_v, metadata={\"cooking\": \"N\"}, n=2, metric=\"cos\")\n", "for hit in results:\n", " print(f'[id={hit[\"row_id\"]:3}] {hit[\"body_blob\"]:12s} (sim={hit[\"distance\"]:0.2f})')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 5 }