Provisioning a new dataset
This doc gives an example of how to provision a dataset to a Media Data Clean Room (DCR), using a direct upload from a local source.
Setup Script
If you want to test this functionality and don't have a clean room already set up, you can use this script to create an appropriate environment to test the rest of this guide with.
import decentriq_platform as dq
from decentriq_platform.media import MediaDcrBuilder
advertiser_email = "@@ YOUR EMAIL HERE @@"
advertiser_api_token = "@@ YOUR TOKEN HERE @@"
publisher_email = "@@ EMAIL OF PUBLISHER PARTICIPANT @@"
advertiser_client = dq.create_client(advertiser_email, advertiser_api_token)
builder = MediaDcrBuilder(client=advertiser_client)
dcr_definition = builder.\
with_name("My DCR").\
with_insights().\
with_lookalike().\
with_retargeting().\
with_matching_id_format(dq.types.MatchingId.STRING).\
with_publisher_emails(publisher_email).\
with_advertiser_emails(advertiser_email).\
with_agency_emails(["test@agency.com"]).\
with_observer_emails(["test@observer.com"]).\
build()
media_dcr = advertiser_client.publish_media_dcr(dcr_definition)
dcr_id = media_dcr.id
Direct Upload
Advertisers can use the Session
function publish_dataset
to provision their data.
import decentriq_platform as dq
user_email = "@@ YOUR EMAIL HERE @@"
api_token = "@@ YOUR TOKEN HERE @@"
dataset_name = "audiences.csv"
dataset_path = "/path/to/advertiser_data.csv"
client = dq.create_client(user_email, api_token)
data_room_descriptions = {description['id']: description for description in client.get_data_room_descriptions()}
data_room_description = data_room_descriptions[dcr_id]
session = client.create_session_from_data_room_description(data_room_description)
key = dq.Key()
with open(dataset_path, "rb") as f:
dataset_id = client.upload_dataset(
f,
key,
dataset_name,
)
session.publish_dataset(dcr_id, dataset_id, "audiences", key)
Upload using stored key
This is an example of provisioning data to a Media DCR. You will not need to remember the key to reprovision this data.
import decentriq_platform as dq
user_email = "@@ YOUR EMAIL HERE @@"
api_token = "@@ YOUR TOKEN HERE @@"
dataset_id = "@@ YOUR DATASET ID HERE @@"
client = dq.create_client(user_email, api_token)
key = client.get_dataset_key(dataset_id)
data_room_descriptions = {description['id']: description for description in client.get_data_room_descriptions()}
data_room_description = data_room_descriptions[dcr_id]
session = client.create_session_from_data_room_description(data_room_description)
session.publish_dataset(dcr_id, dataset_id,"audiences", key)
# alternate way to call without session
# dcr = client.retrieve_media_dcr(dcr_id)
# dcr.get_node("audiences").publish_dataset(
# dataset_id,
# dq.Key(key.value)
# )
No stored key
For completeness, this example upload script does the same operation without retrieveing the encryption key from the secure enclave. You will need to use the same key again to reprovision this data.
import decentriq_platform as dq
user_email = "@@ YOUR EMAIL HERE @@"
api_token = "@@ YOUR TOKEN HERE @@"
dataset_name = "audiences.csv"
dataset_path = "/path/to/advertiser_data.csv"
client = dq.create_client(user_email, api_token)
data_room_descriptions = {description['id']: description for description in client.get_data_room_descriptions()}
data_room_description = data_room_descriptions[dcr_id]
session = client.create_session_from_data_room_description(data_room_description)
key = dq.Key()
with open(dataset_path, "rb") as f:
dataset_id = client.upload_dataset(f, key, dataset_name)
session.publish_dataset(dcr_id, dataset_id, "audiences", key)