Skip to main content
A common task is downloading transcripts alongside their agent run metadata for local analysis. This page shows how to export using DQL.

Exporting with DQL

Export all transcripts and metadata

from docent import Docent
import json

client = Docent()
collection_id = "your-collection-id"

# Download all transcripts joined with agent run metadata
result = client.execute_dql(
    collection_id,
    """SELECT
  t.id AS transcript_id,
  t.name AS transcript_name,
  t.messages,
  t.metadata_json AS transcript_metadata,
  ar.id AS agent_run_id,
  ar.name AS agent_run_name,
  ar.metadata_json AS agent_run_metadata
FROM transcripts t
JOIN agent_runs ar ON ar.id = t.agent_run_id"""
)

rows = client.dql_result_to_dicts(result)

# Parse the messages JSON from each transcript
for row in rows:
    row["messages"] = json.loads(row["messages"]) if isinstance(row["messages"], str) else row["messages"]

print(f"Downloaded {len(rows)} transcripts")
print(f"First transcript has {len(rows[0]['messages'])} messages")

Paginating large collections

DQL caps results at 10,000 rows. If your collection has more transcripts, use LIMIT and OFFSET to paginate:
page_size = 1000
offset = 0
all_rows = []

while True:
    result = client.execute_dql(
        collection_id,
        f"""SELECT
  t.id AS transcript_id,
  t.name AS transcript_name,
  t.messages,
  ar.id AS agent_run_id,
  ar.name AS agent_run_name,
  ar.metadata_json AS agent_run_metadata
FROM transcripts t
JOIN agent_runs ar ON ar.id = t.agent_run_id
ORDER BY t.id
LIMIT {page_size} OFFSET {offset}"""
    )
    rows = client.dql_result_to_dicts(result)
    if not rows:
        break
    all_rows.extend(rows)
    offset += page_size

print(f"Downloaded {len(all_rows)} total transcripts")

Filtering by metadata

You can narrow the export to specific runs using metadata filters:
result = client.execute_dql(
    collection_id,
    """SELECT
  t.id AS transcript_id,
  t.messages,
  ar.name AS agent_run_name,
  ar.metadata_json->>'model' AS model
FROM transcripts t
JOIN agent_runs ar ON ar.id = t.agent_run_id
WHERE ar.metadata_json->>'environment' = 'prod'"""
)

See also