Skip to main content

Exporting to AWS

Setup

First, import the necessary modules:

import io
from decentriq_platform import create_client, Key
from decentriq_platform.analytics import (
AnalyticsDcrBuilder,
RawDataNodeDefinition,
PythonComputeNodeDefinition,
)
from decentriq_platform.data_connectors import (
AwsExportConnectorDefinition,
AwsCredentials,
ExportNodeDependency,
)

Then, create the Client instance with which you can communicate with the Decentriq platform:

user_email = "@@ YOUR EMAIL HERE @@"
api_token = "@@ YOUR TOKEN HERE @@"

client = create_client(user_email, api_token)
enclave_specs = dq.enclave_specifications.latest()

Example 1: Export a raw data file to S3

This example shows how to export a raw file that you upload directly to AWS S3.

# Build the Data Clean Room
builder = AnalyticsDcrBuilder(client=client)

dcr_definition = (
builder.with_name("AWS Export DCR - Raw File")
.with_owner(user_email)
.with_description("Export a raw file to AWS S3")
.add_node_definitions([
# Node to hold the data you want to export
RawDataNodeDefinition(
name="raw-export-data",
is_required=True,
),
# Node to hold AWS credentials
RawDataNodeDefinition(
name="aws-credentials",
is_required=True,
),
# Export connector node
AwsExportConnectorDefinition(
name="aws-export",
bucket="@@ AWS BUCKET NAME HERE @@",
region="@@ AWS REGION HERE @@",
credentials_dependency="aws-credentials",
node_dependency=ExportNodeDependency.raw(
name="raw-export-data",
object_key="exported-data.txt"
),
),
])
.add_participant(
user_email,
analyst_of=["aws-export"],
data_owner_of=["aws-credentials", "raw-export-data"],
)
.build()
)

# Publish the Data Clean Room
dcr = client.publish_analytics_dcr(dcr_definition)

# Upload AWS credentials
aws_credentials = dcr.get_node("aws-credentials")
aws_credentials.upload_and_publish_dataset(
AwsCredentials(
access_key="@@ AWS ACCESS KEY HERE @@",
secret_key="@@ AWS SECRET HERE @@",
).as_binary_io(),
Key(),
"credentials.txt",
)

# Upload the data to export
raw_data_node = dcr.get_node("raw-export-data")
raw_data_node.upload_and_publish_dataset(
io.BytesIO(b"your data here"),
Key(),
"raw-file.txt"
)

# Execute the export
aws_export_connector = dcr.get_node("aws-export")
aws_export_connector.run_computation_and_get_results_as_bytes()

The file will now be available in your S3 bucket at the specified object key.

Example 2: Export a single file from a computation

This example shows how to export a specific file from a Python computation's output.

# Python script that generates output files
script = """with open("/output/results.txt", 'w+') as f:
f.write("computation results")"""

builder = AnalyticsDcrBuilder(client=client)

dcr_definition = (
builder.with_name("AWS Export DCR - Single File")
.with_owner(user_email)
.with_description("Export a single file from computation to AWS S3")
.add_node_definitions([
# Computation node that generates output
PythonComputeNodeDefinition(
name="python-node",
script=script,
),
# Node to hold AWS credentials
RawDataNodeDefinition(
name="aws-credentials",
is_required=True,
),
# Export connector node
AwsExportConnectorDefinition(
name="aws-export",
bucket="@@ AWS BUCKET NAME HERE @@",
region="@@ AWS REGION HERE @@",
credentials_dependency="aws-credentials",
node_dependency=ExportNodeDependency.file(
name="python-node",
file="results.txt",
object_key="exported-results.txt",
),
),
])
.add_participant(
user_email,
analyst_of=["aws-export", "python-node"],
data_owner_of=["aws-credentials"],
)
.build()
)

# Publish the Data Clean Room
dcr = client.publish_analytics_dcr(dcr_definition)

# Upload AWS credentials
aws_credentials = dcr.get_node("aws-credentials")
aws_credentials.upload_and_publish_dataset(
AwsCredentials(
access_key="@@ AWS ACCESS KEY HERE @@",
secret_key="@@ AWS SECRET HERE @@",
).as_binary_io(),
Key(),
"credentials.txt",
)

# Execute the export (this will run the computation and export the file)
aws_export_connector = dcr.get_node("aws-export")
aws_export_connector.run_computation_and_get_results_as_bytes()

Example 3: Export all files from a computation

This example shows how to export all output files from a computation to S3.

# Python script that generates multiple output files
script = """with open("/output/file1.txt", 'w+') as f:
f.write("first file")
with open("/output/file2.txt", 'w+') as f:
f.write("second file")"""

builder = AnalyticsDcrBuilder(client=client)

dcr_definition = (
builder.with_name("AWS Export DCR - All Files")
.with_owner(user_email)
.with_description("Export all files from computation to AWS S3")
.add_node_definitions([
# Computation node that generates output
PythonComputeNodeDefinition(
name="python-node",
script=script,
),
# Node to hold AWS credentials
RawDataNodeDefinition(
name="aws-credentials",
is_required=True,
),
# Export connector node
AwsExportConnectorDefinition(
name="aws-export",
bucket="@@ AWS BUCKET NAME HERE @@",
region="@@ AWS REGION HERE @@",
credentials_dependency="aws-credentials",
node_dependency=ExportNodeDependency.all(name="python-node"),
),
])
.add_participant(
user_email,
analyst_of=["aws-export", "python-node"],
data_owner_of=["aws-credentials"],
)
.build()
)

# Publish the Data Clean Room
dcr = client.publish_analytics_dcr(dcr_definition)

# Upload AWS credentials
aws_credentials = dcr.get_node("aws-credentials")
aws_credentials.upload_and_publish_dataset(
AwsCredentials(
access_key="@@ AWS ACCESS KEY HERE @@",
secret_key="@@ AWS SECRET HERE @@",
).as_binary_io(),
Key(),
"credentials.txt",
)

# Execute the export (this will run the computation and export all files)
aws_export_connector = dcr.get_node("aws-export")
aws_export_connector.run_computation_and_get_results_as_bytes()

When using ExportNodeDependency.all(), all files from the computation output will be exported to S3 with their original filenames as the object keys.

ExportNodeDependency options

The ExportNodeDependency class provides three methods for different export scenarios:

  • .raw(name, object_key): Export a raw data node with the specified object key
  • .file(name, file, object_key): Export a specific file from a computation's output with the specified object key
  • .all(name): Export all files from a computation's output, using their original filenames as object keys