
CassIO quickstart¶
In this notebook, you will use CassIO to connect to the database, create a few tables, insert data in them, and run queries to read data.
Dependencies¶
In [1]:
Copied!
!pip install cassio
!pip install cassio
In [2]:
Copied!
import cassio
import cassio
Connect to the database¶
Edit and run the cells in the appropriate section here, depending on your target database.
Note: in a real application, do not hardcode the database secrets in the program.
If your database is Cassandra ...¶
In [3]:
Copied!
CASSANDRA_CONTACT_POINTS = "172.17.0.2" # e.g. "143.11.15.207,143.11.15.221"
CASSANDRA_KEYSPACE = "cassio_tutorial" # the keyspace must exist on DB
# CASSANDRA_USERNAME = None # or "my_username"
# CASSANDRA_PASSWORD = None # or "my_secret"
CASSANDRA_CONTACT_POINTS = "172.17.0.2" # e.g. "143.11.15.207,143.11.15.221"
CASSANDRA_KEYSPACE = "cassio_tutorial" # the keyspace must exist on DB
# CASSANDRA_USERNAME = None # or "my_username"
# CASSANDRA_PASSWORD = None # or "my_secret"
In [4]:
Copied!
cassio.init(
contact_points=CASSANDRA_CONTACT_POINTS,
keyspace=CASSANDRA_KEYSPACE,
# Uncomment these parameters if needed:
# username=CASSANDRA_USERNAME,
# password=CASSANDRA_PASSWORD,
)
cassio.init(
contact_points=CASSANDRA_CONTACT_POINTS,
keyspace=CASSANDRA_KEYSPACE,
# Uncomment these parameters if needed:
# username=CASSANDRA_USERNAME,
# password=CASSANDRA_PASSWORD,
)
If your database is Astra DB ...¶
In [ ]:
Copied!
ASTRA_DB_API_ENDPOINT = "https://<database_id>-<region>.apps.astra.datastax.com"
ASTRA_DB_APPLICATION_TOKEN = "AstraCS:..."
# ASTRA_DB_KEYSPACE = "cassandra_tutorial" # optional
ASTRA_DB_API_ENDPOINT = "https://-.apps.astra.datastax.com"
ASTRA_DB_APPLICATION_TOKEN = "AstraCS:..."
# ASTRA_DB_KEYSPACE = "cassandra_tutorial" # optional
In [ ]:
Copied!
# pull the Database ID out of the API Endpoint (the ID looks like "8-4-4-4-12" hex digits)
_database_id = ASTRA_DB_API_ENDPOINT[8:8+36].lower()
if len(_database_id) != 36 or set(_database_id) - set('1234567890abcdef-'):
raise ValueError("Cannot extract a valid database ID from the endpoint.")
# pull the Database ID out of the API Endpoint (the ID looks like "8-4-4-4-12" hex digits)
_database_id = ASTRA_DB_API_ENDPOINT[8:8+36].lower()
if len(_database_id) != 36 or set(_database_id) - set('1234567890abcdef-'):
raise ValueError("Cannot extract a valid database ID from the endpoint.")
In [ ]:
Copied!
cassio.init(
database_id=_database_id,
token=ASTRA_DB_APPLICATION_TOKEN,
# Uncomment these parameters if needed:
# keyspace=ASTRA_DB_KEYSPACE,
)
cassio.init(
database_id=_database_id,
token=ASTRA_DB_APPLICATION_TOKEN,
# Uncomment these parameters if needed:
# keyspace=ASTRA_DB_KEYSPACE,
)
Verify the connection¶
In [5]:
Copied!
print("Connected.\n* Session =", cassio.config.resolve_session())
print("* Keyspace =", cassio.config.resolve_keyspace())
print("Connected.\n* Session =", cassio.config.resolve_session())
print("* Keyspace =", cassio.config.resolve_keyspace())
Connected. * Session = <cassandra.cluster.Session object at 0x7f6728f25df0> * Keyspace = cassio_tutorial
Table with body and metadata¶
Create the table¶
In [6]:
Copied!
from cassio.table.tables import MetadataCassandraTable
from cassio.table.tables import MetadataCassandraTable
In [7]:
Copied!
md_table = MetadataCassandraTable(
table="test_mct",
primary_key_type="TEXT",
)
md_table = MetadataCassandraTable(
table="test_mct",
primary_key_type="TEXT",
)
Insert a few rows¶
In [8]:
Copied!
md_table.put(row_id="Ag", body_blob="Silver", metadata={"metal": "Y", "solid": "Y"})
md_table.put(row_id="Hg", body_blob="Mercury", metadata={"metal": "Y", "solid": "N"})
md_table.put(row_id="Ca", body_blob="Calcium", metadata={"metal": "N", "solid": "Y"})
md_table.put(row_id="He", body_blob="Helium", metadata={"metal": "N", "solid": "N"})
md_table.put(row_id="Ag", body_blob="Silver", metadata={"metal": "Y", "solid": "Y"})
md_table.put(row_id="Hg", body_blob="Mercury", metadata={"metal": "Y", "solid": "N"})
md_table.put(row_id="Ca", body_blob="Calcium", metadata={"metal": "N", "solid": "Y"})
md_table.put(row_id="He", body_blob="Helium", metadata={"metal": "N", "solid": "N"})
Get a row¶
In [9]:
Copied!
row = md_table.get(row_id="Ca")
print(row)
row = md_table.get(row_id="Ca")
print(row)
{'metadata': {'metal': 'N', 'solid': 'Y'}, 'row_id': 'Ca', 'body_blob': 'Calcium'}
Delete a row¶
In [10]:
Copied!
md_table.delete(row_id="Ca")
print(md_table.get(row_id="Ca"))
md_table.delete(row_id="Ca")
print(md_table.get(row_id="Ca"))
None
Search by metadata¶
In [11]:
Copied!
non_solids = md_table.find_entries(metadata={"solid": "N"}, n=5)
for n_s in non_solids:
print(n_s)
non_solids = md_table.find_entries(metadata={"solid": "N"}, n=5)
for n_s in non_solids:
print(n_s)
{'metadata': {'metal': 'N', 'solid': 'N'}, 'row_id': 'He', 'body_blob': 'Helium'} {'metadata': {'metal': 'Y', 'solid': 'N'}, 'row_id': 'Hg', 'body_blob': 'Mercury'}
Table with a vector¶
Create the table¶
In [12]:
Copied!
from cassio.table.tables import MetadataVectorCassandraTable
from cassio.table.tables import MetadataVectorCassandraTable
In [13]:
Copied!
vec_md_table = MetadataVectorCassandraTable(
table="test_vmct",
vector_dimension=5,
primary_key_type="INT",
)
vec_md_table = MetadataVectorCassandraTable(
table="test_vmct",
vector_dimension=5,
primary_key_type="INT",
)
Insert a few rows¶
In [14]:
Copied!
vec_md_table.put(
row_id=100,
body_blob="Shoes",
vector=[0.1, 0.4, 0.0, 0.4, 0.2],
metadata={"cooking": "N", "wearable": "Y"},
)
vec_md_table.put(
row_id=87,
body_blob="Pan",
vector=[0.3, -0.1, 0.2, -0.3, -0.1],
metadata={"cooking": "Y", "wearable": "N"},
)
vec_md_table.put(
row_id=191,
body_blob="Kitten",
vector=[0.0, 0.3, -0.1, -0.1, 0.3],
metadata={"cooking": "N", "wearable": "N"},
)
vec_md_table.put(
row_id=1,
body_blob="Oven Mitt",
vector=[0.4, 0.2, -0.3, -0.2, 0.0],
metadata={"cooking": "Y", "wearable": "Y"},
)
vec_md_table.put(
row_id=100,
body_blob="Shoes",
vector=[0.1, 0.4, 0.0, 0.4, 0.2],
metadata={"cooking": "N", "wearable": "Y"},
)
vec_md_table.put(
row_id=87,
body_blob="Pan",
vector=[0.3, -0.1, 0.2, -0.3, -0.1],
metadata={"cooking": "Y", "wearable": "N"},
)
vec_md_table.put(
row_id=191,
body_blob="Kitten",
vector=[0.0, 0.3, -0.1, -0.1, 0.3],
metadata={"cooking": "N", "wearable": "N"},
)
vec_md_table.put(
row_id=1,
body_blob="Oven Mitt",
vector=[0.4, 0.2, -0.3, -0.2, 0.0],
metadata={"cooking": "Y", "wearable": "Y"},
)
Run a vector ANN search¶
In [15]:
Copied!
query_v = [0.1, 0.2, 0.1, -0.1, 0.0]
results = vec_md_table.metric_ann_search(vector=query_v, n=2, metric="cos")
for hit in results:
print(f'[id={hit["row_id"]:3}] {hit["body_blob"]:12s} (sim={hit["distance"]:0.2f})')
query_v = [0.1, 0.2, 0.1, -0.1, 0.0]
results = vec_md_table.metric_ann_search(vector=query_v, n=2, metric="cos")
for hit in results:
print(f'[id={hit["row_id"]:3}] {hit["body_blob"]:12s} (sim={hit["distance"]:0.2f})')
[id=191] Kitten (sim=0.51) [id= 87] Pan (sim=0.46)
Run an ANN+metadata search¶
In [16]:
Copied!
results = vec_md_table.metric_ann_search(vector=query_v, metadata={"cooking": "N"}, n=2, metric="cos")
for hit in results:
print(f'[id={hit["row_id"]:3}] {hit["body_blob"]:12s} (sim={hit["distance"]:0.2f})')
results = vec_md_table.metric_ann_search(vector=query_v, metadata={"cooking": "N"}, n=2, metric="cos")
for hit in results:
print(f'[id={hit["row_id"]:3}] {hit["body_blob"]:12s} (sim={hit["distance"]:0.2f})')
[id=191] Kitten (sim=0.51) [id=100] Shoes (sim=0.31)