Skip to main content

Exporting to GCS

Setup

First, import the necessary modules:

import io
from decentriq_platform import create_client, Key
from decentriq_platform.analytics import (
AnalyticsDcrBuilder,
RawDataNodeDefinition,
PythonComputeNodeDefinition,
)
from decentriq_platform.data_connectors import (
GcsExportConnectorDefinition,
GcsCredentials,
ExportNodeDependency,
)

Then, create the Client instance with which you can communicate with the Decentriq platform:

user_email = "@@ YOUR EMAIL HERE @@"
api_token = "@@ YOUR TOKEN HERE @@"

client = create_client(user_email, api_token)
import json
enclave_specs = dq.enclave_specifications.latest()

Example 1: Export a raw data file to GCS

This example shows how to export a raw file that you upload directly to Google Cloud Storage.

# Build the Data Clean Room
builder = AnalyticsDcrBuilder(client=client)

dcr_definition = (
builder.with_name("GCS Export DCR - Raw File")
.with_owner(user_email)
.with_description("Export a raw file to GCS")
.add_node_definitions([
# Node to hold the data you want to export
RawDataNodeDefinition(
name="raw-export-data",
is_required=True,
),
# Node to hold GCS credentials
RawDataNodeDefinition(
name="gcs-credentials",
is_required=True,
),
# Export connector node
GcsExportConnectorDefinition(
name="gcs-export",
bucket="@@ GCS BUCKET NAME HERE @@",
credentials_dependency="gcs-credentials",
node_dependency=ExportNodeDependency.raw(
name="raw-export-data",
object_key="exported-data.txt"
),
),
])
.add_participant(
user_email,
analyst_of=["gcs-export"],
data_owner_of=["gcs-credentials", "raw-export-data"],
)
.build()
)

# Publish the Data Clean Room
dcr = client.publish_analytics_dcr(dcr_definition)

# Upload GCS credentials
gcs_credentials = dcr.get_node("gcs-credentials")
gcs_credentials.upload_and_publish_dataset(
GcsCredentials(
credentials_json="@@ GCS SERVICE ACCOUNT JSON HERE @@",
).as_binary_io(),
Key(),
"credentials.txt",
)

# Upload the data to export
raw_data_node = dcr.get_node("raw-export-data")
raw_data_node.upload_and_publish_dataset(
io.BytesIO(b"your data here"),
Key(),
"raw-file.txt"
)

# Execute the export
gcs_export_connector = dcr.get_node("gcs-export")
gcs_export_connector.run_computation_and_get_results_as_bytes()

The file will now be available in your GCS bucket at the specified object key.

Example 2: Export a single file from a computation

This example shows how to export a specific file from a Python computation's output.

# Python script that generates output files
script = """with open("/output/results.txt", 'w+') as f:
f.write("computation results")"""

builder = AnalyticsDcrBuilder(client=client)

dcr_definition = (
builder.with_name("GCS Export DCR - Single File")
.with_owner(user_email)
.with_description("Export a single file from computation to GCS")
.add_node_definitions([
# Computation node that generates output
PythonComputeNodeDefinition(
name="python-node",
script=script,
),
# Node to hold GCS credentials
RawDataNodeDefinition(
name="gcs-credentials",
is_required=True,
),
# Export connector node
GcsExportConnectorDefinition(
name="gcs-export",
bucket="@@ GCS BUCKET NAME HERE @@",
credentials_dependency="gcs-credentials",
node_dependency=ExportNodeDependency.file(
name="python-node",
file="results.txt",
object_key="exported-results.txt",
),
),
])
.add_participant(
user_email,
analyst_of=["gcs-export", "python-node"],
data_owner_of=["gcs-credentials"],
)
.build()
)

# Publish the Data Clean Room
dcr = client.publish_analytics_dcr(dcr_definition)

# Upload GCS credentials
gcs_credentials = dcr.get_node("gcs-credentials")
gcs_credentials.upload_and_publish_dataset(
GcsCredentials(
credentials_json="@@ GCS SERVICE ACCOUNT JSON HERE @@",
).as_binary_io(),
Key(),
"credentials.txt",
)

# Execute the export (this will run the computation and export the file)
gcs_export_connector = dcr.get_node("gcs-export")
gcs_export_connector.run_computation_and_get_results_as_bytes()

Example 3: Export all files from a computation

This example shows how to export all output files from a computation to GCS.

# Python script that generates multiple output files
script = """with open("/output/file1.txt", 'w+') as f:
f.write("first file")
with open("/output/file2.txt", 'w+') as f:
f.write("second file")"""

builder = AnalyticsDcrBuilder(client=client)

dcr_definition = (
builder.with_name("GCS Export DCR - All Files")
.with_owner(user_email)
.with_description("Export all files from computation to GCS")
.add_node_definitions([
# Computation node that generates output
PythonComputeNodeDefinition(
name="python-node",
script=script,
),
# Node to hold GCS credentials
RawDataNodeDefinition(
name="gcs-credentials",
is_required=True,
),
# Export connector node
GcsExportConnectorDefinition(
name="gcs-export",
bucket="@@ GCS BUCKET NAME HERE @@",
credentials_dependency="gcs-credentials",
node_dependency=ExportNodeDependency.all(name="python-node"),
),
])
.add_participant(
user_email,
analyst_of=["gcs-export", "python-node"],
data_owner_of=["gcs-credentials"],
)
.build()
)

# Publish the Data Clean Room
dcr = client.publish_analytics_dcr(dcr_definition)

# Upload GCS credentials
gcs_credentials = dcr.get_node("gcs-credentials")
gcs_credentials.upload_and_publish_dataset(
GcsCredentials(
credentials_json="@@ GCS SERVICE ACCOUNT JSON HERE @@",
).as_binary_io(),
Key(),
"credentials.txt",
)

# Execute the export (this will run the computation and export all files)
gcs_export_connector = dcr.get_node("gcs-export")
gcs_export_connector.run_computation_and_get_results_as_bytes()

When using ExportNodeDependency.all(), all files from the computation output will be exported to GCS with their original filenames as the object keys.

ExportNodeDependency options

The ExportNodeDependency class provides three methods for different export scenarios:

  • .raw(name, object_key): Export a raw data node with the specified object key
  • .file(name, file, object_key): Export a specific file from a computation's output with the specified object key
  • .all(name): Export all files from a computation's output, using their original filenames as object keys