Exporting to Azure Blob Storage
Setup
First, import the necessary modules:
import io
from decentriq_platform import create_client, Key
from decentriq_platform.analytics import (
AnalyticsDcrBuilder,
RawDataNodeDefinition,
PythonComputeNodeDefinition,
)
from decentriq_platform.data_connectors import (
AzureBlobStorageExportConnectorDefinition,
AzureBlobStorageCredentials,
ExportNodeDependency,
)
Then, create the Client instance with which you can communicate with the
Decentriq platform:
user_email = "@@ YOUR EMAIL HERE @@"
api_token = "@@ YOUR TOKEN HERE @@"
client = create_client(user_email, api_token)
import json
enclave_specs = dq.enclave_specifications.latest()
Example 1: Export a raw data file to Azure Blob Storage
This example shows how to export a raw file that you upload directly to Azure Blob Storage.
# Build the Data Clean Room
builder = AnalyticsDcrBuilder(client=client)
dcr_definition = (
builder.with_name("Azure Export DCR - Raw File")
.with_owner(user_email)
.with_description("Export a raw file to Azure Blob Storage")
.add_node_definitions([
# Node to hold the data you want to export
RawDataNodeDefinition(
name="raw-export-data",
is_required=True,
),
# Node to hold Azure credentials
RawDataNodeDefinition(
name="azure_blob_storage_credentials",
is_required=True,
),
# Export connector node
AzureBlobStorageExportConnectorDefinition(
name="azure_blob_storage_export",
credentials_dependency="azure_blob_storage_credentials",
node_dependency=ExportNodeDependency.raw(
name="raw-export-data",
object_key="exported-data.txt"
),
),
])
.add_participant(
user_email,
analyst_of=["azure_blob_storage_export"],
data_owner_of=["azure_blob_storage_credentials", "raw-export-data"],
)
.build()
)
# Publish the Data Clean Room
dcr = client.publish_analytics_dcr(dcr_definition)
# Upload Azure Blob Storage credentials
azure_blob_storage_credentials = dcr.get_node("azure_blob_storage_credentials")
azure_blob_storage_credentials.upload_and_publish_dataset(
AzureBlobStorageCredentials(
storage_account="@@ AZURE STORAGE ACCOUNT HERE @@",
storage_container="@@ AZURE CONTAINER NAME HERE @@",
blob_name="exported-data.txt",
sas_token="@@ AZURE SAS TOKEN HERE @@",
).as_binary_io(),
Key(),
"credentials.txt",
)
# Upload the data to export
raw_data_node = dcr.get_node("raw-export-data")
raw_data_node.upload_and_publish_dataset(
io.BytesIO(b"your data here"),
Key(),
"raw-file.txt"
)
# Execute the export
azure_blob_storage_export_connector = dcr.get_node("azure_blob_storage_export")
azure_blob_storage_export_connector.run_computation_and_get_results_as_bytes()
The file will now be available in your Azure Blob Storage container at the specified blob name.
Example 2: Export a single file from a computation
This example shows how to export a specific file from a Python computation's output.
# Python script that generates output files
script = """with open("/output/results.txt", 'w+') as f:
f.write("computation results")"""
builder = AnalyticsDcrBuilder(client=client)
dcr_definition = (
builder.with_name("Azure Export DCR - Single File")
.with_owner(user_email)
.with_description("Export a single file from computation to Azure Blob Storage")
.add_node_definitions([
# Computation node that generates output
PythonComputeNodeDefinition(
name="python-node",
script=script,
),
# Node to hold Azure credentials
RawDataNodeDefinition(
name="azure_blob_storage_credentials",
is_required=True,
),
# Export connector node
AzureBlobStorageExportConnectorDefinition(
name="azure_blob_storage_export",
credentials_dependency="azure_blob_storage_credentials",
node_dependency=ExportNodeDependency.file(
name="python-node",
file="results.txt",
object_key="exported-results.txt",
),
),
])
.add_participant(
user_email,
analyst_of=["azure_blob_storage_export", "python-node"],
data_owner_of=["azure_blob_storage_credentials"],
)
.build()
)
# Publish the Data Clean Room
dcr = client.publish_analytics_dcr(dcr_definition)
# Upload Azure Blob Storage credentials
azure_blob_storage_credentials = dcr.get_node("azure_blob_storage_credentials")
azure_blob_storage_credentials.upload_and_publish_dataset(
AzureBlobStorageCredentials(
storage_account="@@ AZURE STORAGE ACCOUNT HERE @@",
storage_container="@@ AZURE CONTAINER NAME HERE @@",
blob_name="exported-results.txt",
sas_token="@@ AZURE SAS TOKEN HERE @@",
).as_binary_io(),
Key(),
"credentials.txt",
)
# Execute the export (this will run the computation and export the file)
azure_blob_storage_export_connector = dcr.get_node("azure_blob_storage_export")
azure_blob_storage_export_connector.run_computation_and_get_results_as_bytes()
ExportNodeDependency options
The ExportNodeDependency class provides three methods for different export scenarios:
.raw(name, object_key): Export a raw data node with the specified object key (blob name).file(name, file, object_key): Export a specific file from a computation's output with the specified object key (blob name).all(name): This is not supported for the Azure export connector