Provisioning data labs to an Audience Builder Media DCR
This guide will show you how to create and provision a data lab to an Audience Builder Media DCR as a publisher.
The purpose of a data lab is to check for internal consistency in your data before that data is provisioned to an Audience Builder Media DCR
Creating a data lab
A data lab is created through the use of the DataLabBuilder
class. This class provides a means for customising the data that is required. Matching datasets are always required, but all other datasets are optional.
import decentriq_platform as dq
publisher_email = "@@ EMAIL OF PUBLISHER PARTICIPANT @@"
publisher_api_token = "@@ PUBLISHER TOKEN HERE @@"
publisher_client = dq.create_client(publisher_email, publisher_api_token)
builder = dq.data_lab.DataLabBuilder(publisher_client)
builder.with_name("tutorial-data-lab")
builder.with_matching_id_format(dq.types.MatchingIdFormat.STRING)
builder.with_embeddings(50)
builder.with_demographics()
builder.with_segments()
data_lab = builder.build()
Uploading new datasets to a data lab
A data lab can be provisioned with datasets from a local file system. This is done through the use of the provision_local_datasets
method.
# Path to the formatted CSV datasets
match_data_path = "/path/to/matching_data.csv"
segments_data_path = "/path/to/segments_data.csv"
demographics_data_path = "/path/to/demographics_data.csv"
embeddings_data_path= "/path/to/embeddings_data.csv"
key = dq.Key()
data_lab.provision_local_datasets(
key,
match_data_path,
segments_data_path=segments_data_path,
demographics_data_path=demographics_data_path,
embeddings_data_path=embeddings_data_path,
)
Once complete, the data lab can be validated to check for errors.
data_lab.run()
validation_report = data_lab.get_validation_report()
if not data_lab.is_validation_passed(validation_report):
raise Exception("DataLab validation failed", validation_report)
Provisioning existing datasets from the dataset portal to a data lab
The following example shows how to provision an existing dataset from the dataset portal to a data lab.
from decentriq_platform.types import DataLabDatasetType
datasets = publisher_client.get_available_datasets()
# Get the latest version of each dataset.
datasets_name_lookup = {}
for dataset in datasets:
name = dataset['name']
created_at = dataset['createdAt']
manifest_hash = dataset["manifestHash"]
if name not in datasets_name_lookup:
datasets_name_lookup[dataset["name"]] = (created_at, manifest_hash)
elif created_at > datasets_name_lookup[name][0]: # replace with latest version
datasets_name_lookup[dataset["name"]] = (created_at, manifest_hash)
datasets_to_provision = {
DataLabDatasetType.MATCH: datasets_name_lookup["pub_match"],
DataLabDatasetType.SEGMENTS: datasets_name_lookup["pub_segment"],
DataLabDatasetType.EMBEDDINGS: datasets_name_lookup["pub_attribute"],
DataLabDatasetType.DEMOGRAPHICS: datasets_name_lookup["pub_demo"],
}
for dataset_type, (_created_at, manifest_hash) in datasets_to_provision.items():
key = publisher_client.get_dataset_key(manifest_hash)
data_lab.provision_dataset(manifest_hash, key, dataset_type)
Updating an existing data lab with new datasets
First retrieve the existing data lab.
datalab_id = data_lab.data_lab_id
builder = dq.data_lab.DataLabBuilder(publisher_client)
builder.from_existing(datalab_id)
datalab = builder.build()
Deprovisiong the existing dataset from the data lab.
data_lab.deprovision_dataset(DataLabDatasetType.SEGMENTS)
Provision the new dataset to the data lab.
# Upload the new dataset to the dataset portal.
with open(segments_data_path, "rb") as file:
name = "new_segments.csv"
dataset_id = publisher_client.upload_dataset(file, key, name)
# Provision the new dataset to the data lab and run the data lab.
data_lab.provision_dataset(dataset_id, key, DataLabDatasetType.SEGMENTS)
data_lab.run()
# Get the statistics report to ensure the data lab has completed successfully.
statistics_report = data_lab.get_statistics_report()
Provisioning a data lab to an Audience Builder Media DCR
from decentriq_platform.ab_media import (
AbMediaDcrBuilder,
MatchingId,
)
advertiser_email = "@@ YOUR EMAIL HERE @@"
# Pass mandatory fields to the constructor.
builder = AbMediaDcrBuilder(
name="audience-builder-dcr",
publisher_emails=[publisher_email],
advertiser_emails=[advertiser_email],
matching_id_format=MatchingId.STRING,
client=publisher_client
)
# Configure optional settings.
dcr_definition = builder.\
with_insights().\
with_lookalike().\
with_remarketing().\
with_rule_based().\
with_agency_emails(["test@agency.com"]).\
with_observer_emails(["test@observer.com"]).\
build()
dcr = publisher_client.publish_ab_media_dcr(dcr_definition=dcr_definition)
dcr_id = dcr.id
To provision a data lab to an Audience Builder Media DCR,
ab_media_dcr = publisher_client.retrieve_ab_media_dcr(dcr_id)
ab_media_dcr.publisher.provision_from_data_lab(data_lab.data_lab_id)