Provisioning Datalabs to a Media DCR
This guide will show you how to create and provision a Datalab to a Media DCR as a publisher.
The purpose of a Datalab is to check for internal consistency in your data before that data is provisioned to a Media DCR.
Creating a Datalab
A Datalab is created through the use of the DataLabBuilder class. This class provides a means for customizing the data that is required. Matching datasets are always required, but all other datasets are optional.
import decentriq_platform as dq
publisher_email = "@@ EMAIL OF PUBLISHER PARTICIPANT @@"
publisher_api_token = "@@ PUBLISHER TOKEN HERE @@"
publisher_client = dq.create_client(publisher_email, publisher_api_token)
builder = dq.data_lab.DataLabBuilder(publisher_client)
builder.with_name("tutorial-data-lab")
builder.with_matching_id_format(dq.types.MatchingIdFormat.STRING)
builder.with_embeddings(50)
builder.with_demographics()
builder.with_segments()
data_lab = builder.build()
Uploading new datasets to a Datalab
A Datalab can be provisioned with datasets from a local file system. This is done through the use of the provision_local_datasets method.
# Path to the formatted CSV datasets
match_data_path = "/path/to/matching_data.csv"
segments_data_path = "/path/to/segments_data.csv"
demographics_data_path = "/path/to/demographics_data.csv"
embeddings_data_path= "/path/to/embeddings_data.csv"
key = dq.Key()
data_lab.provision_local_datasets(
  key,
  match_data_path,
  segments_data_path=segments_data_path,
  demographics_data_path=demographics_data_path,
  embeddings_data_path=embeddings_data_path,
)
Once complete, the Datalab can be validated to check for errors.
data_lab.run()
validation_report = data_lab.get_validation_report()
if not data_lab.is_validation_passed(validation_report):
  raise Exception("DataLab validation failed", validation_report)
Provisioning existing datasets from the dataset portal to a Datalab
The following example shows how to provision an existing dataset from the dataset portal to a Datalab.
from decentriq_platform.types import DataLabDatasetType
datasets = publisher_client.get_available_datasets()
# Get the latest version of each dataset.
datasets_name_lookup = {}
for dataset in datasets:
  name = dataset['name']
  created_at = dataset['createdAt']
  manifest_hash = dataset["manifestHash"]
  if name not in datasets_name_lookup:
      datasets_name_lookup[dataset["name"]] = (created_at, manifest_hash)
  elif created_at > datasets_name_lookup[name][0]:  # replace with latest version
      datasets_name_lookup[dataset["name"]] = (created_at, manifest_hash)
datasets_to_provision = {
    DataLabDatasetType.MATCH: datasets_name_lookup["pub_match"],
    DataLabDatasetType.SEGMENTS: datasets_name_lookup["pub_segment"],
    DataLabDatasetType.EMBEDDINGS: datasets_name_lookup["pub_attribute"],
    DataLabDatasetType.DEMOGRAPHICS: datasets_name_lookup["pub_demo"],
}
for dataset_type, (_created_at, manifest_hash) in datasets_to_provision.items():
  key = publisher_client.get_dataset_key(manifest_hash)
  data_lab.provision_dataset(manifest_hash, key, dataset_type)
Updating an existing Datalab with new datasets
First retrieve the existing Datalab.
datalab_id = data_lab.data_lab_id
builder = dq.data_lab.DataLabBuilder(publisher_client)
builder.from_existing(datalab_id)
datalab = builder.build()
Deprovisiong the existing dataset from the Datalab.
data_lab.deprovision_dataset(DataLabDatasetType.SEGMENTS)
Provision the new dataset to the Datalab.
# Upload the new dataset to the dataset portal.
with open(segments_data_path, "rb") as file:
  name = "new_segments.csv"
  dataset_id = publisher_client.upload_dataset(file, key, name)
# Provision the new dataset to the Datalab and run the Datalab.
data_lab.provision_dataset(dataset_id, key, DataLabDatasetType.SEGMENTS)
data_lab.run()
# Get the statistics report to ensure the Datalab has completed successfully.
statistics_report = data_lab.get_statistics_report()
Provisioning a Datalab to a Media DCR
import decentriq_platform as dq
from decentriq_platform.media import (
    MediaDcr,
    Participant,
    CollaborationType,
    Permission,
)
advertiser_email = "@@ YOUR EMAIL HERE @@"
advertiser_api_token = "@@ YOUR TOKEN HERE @@"
advertiser_client = dq.create_client(advertiser_email, advertiser_api_token)
media_dcr = MediaDcr(
    client=client,
    name="my_media_dcr",
    participants=[
        Participant(
            role="Publisher",
            emails=[publisher_email],
            permissions=[
                Permission.VIEW_OVERLAP,
                Permission.VIEW_INSIGHTS,
                Permission.PROVIDE_BASE_AUDIENCE,
                Permission.EXPORT_AUDIENCE,
            ],
        ),
        Participant(
            role="Advertiser",
            emails=[advertiser_email],
            permissions=[
                Permission.VIEW_OVERLAP,
                Permission.VIEW_INSIGHTS,
                Permission.PROVIDE_SEED_AUDIENCE,
                Permission.EXPORT_AUDIENCE,
                Permission.CREATE_CUSTOM_AUDIENCE,
            ],
        ),
    ],
    collaboration_types=[
        CollaborationType.INSIGHTS,
        CollaborationType.LOOKALIKE,
        CollaborationType.REMARKETING,
        CollaborationType.RULE_BASED,
    ],
    matching_ids=[dq.types.MatchingIdFormat.STRING],
)
dcr_id = media_dcr.id
To provision a Datalab to a Media DCR,
media_dcr = MediaDcr.from_existing(dcr_id, publisher_client)
media_dcr.provision_base_audience(data_lab.data_lab_id)