Agent Run
AnAgentRun represents a complete agent run. It contains a collection of Transcript objects, as well as metadata (scores, experiment info, etc.).
- In single-agent (most common) settings, each
AgentRuncontains a singleTranscript. - In multi-agent settings, an
AgentRunmay contain multipleTranscriptobjects. For example, in a two-agent debate setting, you’ll have oneTranscriptper agent in the sameAgentRun. - Docent’s LLM search features operate over complete
AgentRunobjects. Runs are passed to LLMs in their.textform.
Usage
AgentRun objects require a dictionary of Transcript objects, as well as a metadata dictionary whose keys are strings. The metadata should be JSON-serializable.
Copy
Ask AI
from docent.data_models import AgentRun, Transcript
from docent.data_models.chat import UserMessage, AssistantMessage
transcripts = [
Transcript(
messages=[
UserMessage(content="Hello, what's 1 + 1?"),
AssistantMessage(content="2"),
]
)
]
agent_run = AgentRun(
transcripts=transcripts,
metadata={
"scores": {"correct": True, "reward": 1.0},
}
)
Rendering
To see how yourAgentRun is being rendered to an LLM, you can print(agent_run.text). This might be useful for validating that your metadata is being included properly.
AgentRun
Bases:BaseModel
Represents a complete run of an agent with transcripts and metadata.
An AgentRun encapsulates the execution of an agent, storing all communication
transcripts and associated metadata. It must contain at least one transcript.
Attributes:
| Name | Type | Description | |
|---|---|---|---|
id | str | Unique identifier for the agent run, auto-generated by default. | |
name | `str | None` | Optional human-readable name for the agent run. |
description | `str | None` | Optional description of the agent run. |
transcripts | list[Transcript] | List of Transcript objects. | |
transcript_groups | list[TranscriptGroup] | List of TranscriptGroup objects. | |
metadata | dict[str, Any] | Additional structured metadata about the agent run as a JSON-serializable dictionary. |
docent/data_models/agent_run.py
docent/data_models/agent_run.py
Copy
Ask AI
class AgentRun(BaseModel):
"""Represents a complete run of an agent with transcripts and metadata.
An AgentRun encapsulates the execution of an agent, storing all communication
transcripts and associated metadata. It must contain at least one transcript.
Attributes:
id: Unique identifier for the agent run, auto-generated by default.
name: Optional human-readable name for the agent run.
description: Optional description of the agent run.
transcripts: List of Transcript objects.
transcript_groups: List of TranscriptGroup objects.
metadata: Additional structured metadata about the agent run as a JSON-serializable dictionary.
"""
id: str = Field(default_factory=lambda: str(uuid4()))
name: str | None = None
description: str | None = None
transcripts: list[Transcript]
transcript_groups: list[TranscriptGroup] = Field(default_factory=list)
metadata: dict[str, Any] = Field(default_factory=dict)
##############
# Validators #
##############
@field_validator("transcripts", mode="before")
@classmethod
def _validate_transcripts_type(cls, v: Any) -> Any:
if isinstance(v, dict):
logger.warning(
"dict[str, Transcript] for transcripts is deprecated. Use list[Transcript] instead."
)
v = cast(dict[str, Transcript], v)
return [Transcript.model_validate(t) for t in v.values()]
return v
@field_validator("transcript_groups", mode="before")
@classmethod
def _validate_transcript_groups_type(cls, v: Any) -> Any:
if isinstance(v, dict):
logger.warning(
"dict[str, TranscriptGroup] for transcript_groups is deprecated. Use list[TranscriptGroup] instead."
)
v = cast(dict[str, TranscriptGroup], v)
return [TranscriptGroup.model_validate(tg) for tg in v.values()]
return v
@model_validator(mode="after")
def _validate_transcripts_not_empty(self):
"""Validates that the agent run contains at least one transcript.
Raises:
ValueError: If the transcripts list is empty.
Returns:
AgentRun: The validated AgentRun instance.
"""
if len(self.transcripts) == 0:
raise ValueError("AgentRun must have at least one transcript")
return self
@property
def transcript_dict(self) -> dict[str, Transcript]:
"""Returns a dictionary mapping transcript IDs to Transcript objects."""
return {t.id: t for t in self.transcripts}
@property
def transcript_group_dict(self) -> dict[str, TranscriptGroup]:
"""Returns a dictionary mapping transcript group IDs to TranscriptGroup objects."""
return {tg.id: tg for tg in self.transcript_groups}
def to_text(
self,
children_text: str,
agent_run_alias: int | str = 0,
indent: int = 0,
render_metadata: bool = True,
agent_run_metadata_comments: list[Comment] | None = None,
) -> str:
if not isinstance(agent_run_alias, str):
agent_run_alias = f"R{agent_run_alias}"
if render_metadata:
metadata_text = dump_metadata(self.metadata)
if metadata_text is not None:
if indent > 0:
metadata_text = textwrap.indent(metadata_text, " " * indent)
metadata_alias = f"{agent_run_alias}M"
children_text += f"\n<|agent run metadata {metadata_alias}|>\n{metadata_text}\n</|agent run metadata {metadata_alias}|>"
# Add agent run metadata comments right underneath the metadata block
if agent_run_metadata_comments:
metadata_comments_text = render_metadata_comments(agent_run_metadata_comments)
if metadata_comments_text:
if indent > 0:
metadata_comments_text = textwrap.indent(
metadata_comments_text, " " * indent
)
children_text += f"\n<|agent run metadata comments|>\n{metadata_comments_text}\n</|agent run metadata comments|>"
if indent > 0:
children_text = textwrap.indent(children_text, " " * indent)
return (
f"<|agent run {agent_run_alias}|>\n{children_text}\n</|agent run {agent_run_alias}|>\n"
)
transcript_dict property
Copy
Ask AI
transcript_dict: dict[str, Transcript]
transcript_group_dict property
Copy
Ask AI
transcript_group_dict: dict[str, TranscriptGroup]
AgentRunTree
Bases:BaseModel
docent/data_models/agent_run.py
docent/data_models/agent_run.py
Copy
Ask AI
class AgentRunTree(BaseModel):
nodes: dict[str, AgentRunTreeNode]
transcript_id_to_idx: dict[str, int]
parent_map: dict[str, str] # child_id -> parent_id
@property
def nodes_pruned(self):
return self._prune_transcriptless_nodes(self.nodes)
@classmethod
def from_agent_run(cls, agent_run: AgentRun) -> AgentRunTree:
t_dict = agent_run.transcript_dict
tg_dict = agent_run.transcript_group_dict
# Init tree and add the root AgentRun node
nodes: dict[str, AgentRunTreeNode] = {
GLOBAL_ROOT_ID: AgentRunTreeNode(
id=GLOBAL_ROOT_ID,
node_type=NodeType.AGENT_RUN,
children_ids=[],
)
}
parent_map: dict[str, str] = {}
# Add all transcript groups to the tree
for tg_id, tg in tg_dict.items():
# Add this tg
if tg_id not in nodes:
nodes[tg_id] = AgentRunTreeNode(
id=tg_id,
node_type=NodeType.TRANSCRIPT_GROUP,
children_ids=[],
)
# Add parent and mark the relationship
# If the stated ID is None, then it's the global root
par_id = tg.parent_transcript_group_id or GLOBAL_ROOT_ID
if par_id not in nodes:
nodes[par_id] = AgentRunTreeNode(
id=par_id,
node_type=(
NodeType.AGENT_RUN
if par_id == GLOBAL_ROOT_ID
else NodeType.TRANSCRIPT_GROUP
),
children_ids=[],
)
nodes[par_id].children_ids.append(tg_id)
parent_map[tg_id] = par_id
# Now add all the transcripts
for t_id, t in t_dict.items():
# Add this transcript
nodes[t_id] = AgentRunTreeNode(
id=t_id,
node_type=NodeType.TRANSCRIPT,
children_ids=[],
)
# Mark parent relationship
par_id = t.transcript_group_id or GLOBAL_ROOT_ID
# This should never happen, but check anyways for safety; fallback to global root
if par_id not in nodes:
logger.error(
f"Parent {par_id} not found for transcript {t_id}. Assigning to global root as a fallback"
)
par_id = GLOBAL_ROOT_ID
nodes[par_id].children_ids.append(t_id)
parent_map[t_id] = par_id
# Go through each node and sort its children by created_at timestamp
def _cmp(obj_id: str) -> datetime:
obj_type = nodes[obj_id].node_type
if obj_type == NodeType.TRANSCRIPT_GROUP:
# This should never happen, but check anyways for safety
if obj_id not in tg_dict:
logger.error(f"Transcript group {obj_id} not found")
return datetime.max
return tg_dict[obj_id].created_at or datetime.max
elif obj_type == NodeType.TRANSCRIPT:
# This should never happen, but check anyways for safety
if obj_id not in t_dict:
logger.error(f"Transcript {obj_id} not found")
return datetime.max
return t_dict[obj_id].created_at or datetime.max
else:
raise ValueError(f"Unknown node type: {obj_type}")
for node in nodes.values():
node.children_ids = sorted(node.children_ids, key=_cmp)
# Combined DFS: mark has_transcript_in_subtree and assign transcript indices
t_id_to_idx: dict[str, int] = {}
def _dfs(u_id: str, next_idx: int) -> tuple[bool, int]:
"""Mark has_transcript_in_subtree and assign indices in a single traversal.
Returns (contains_transcript, next_idx_after).
"""
node = nodes.get(u_id)
if node is None:
return False, next_idx
if node.node_type == NodeType.TRANSCRIPT:
# Leaf node: assign index immediately (pre-order)
t_id_to_idx[u_id] = next_idx
node.has_transcript_in_subtree = True
return True, next_idx + 1
# Non-transcript node: recurse into children
contains_transcript = False
for child_id in node.children_ids:
child_contains, next_idx = _dfs(child_id, next_idx)
contains_transcript = contains_transcript or child_contains
node.has_transcript_in_subtree = contains_transcript
return contains_transcript, next_idx
_dfs(GLOBAL_ROOT_ID, 0)
return cls(nodes=nodes, transcript_id_to_idx=t_id_to_idx, parent_map=parent_map)
def _prune_transcriptless_nodes(self, nodes: dict[str, AgentRunTreeNode]):
"""Return a view of the canonical tree that only includes transcript-bearing branches."""
return {
node_id: node
for node_id, node in nodes.items()
if node.has_transcript_in_subtree or node_id == GLOBAL_ROOT_ID
}
SelectionSpec
Bases:BaseModel
docent/data_models/agent_run.py
docent/data_models/agent_run.py
Copy
Ask AI
class SelectionSpec(BaseModel):
nodes: dict[str, SelectionSpecNode]
@classmethod
def from_agent_run_tree(cls, agent_run_tree: AgentRunTree) -> SelectionSpec:
return cls(
nodes={
node_id: SelectionSpecNode(node_id=node_id)
for node_id in agent_run_tree.nodes.keys()
}
)
def is_default(self) -> bool:
"""Return True if all nodes have default settings (show everything)."""
return all(
node.render_children_default is True
and len(node.render_children_overrides) == 0
and node.render_self_metadata is True
for node in self.nodes.values()
)
is_default
Copy
Ask AI
is_default() -> bool
docent/data_models/agent_run.py
docent/data_models/agent_run.py
Copy
Ask AI
def is_default(self) -> bool:
"""Return True if all nodes have default settings (show everything)."""
return all(
node.render_children_default is True
and len(node.render_children_overrides) == 0
and node.render_self_metadata is True
for node in self.nodes.values()
)
AgentRunView
docent/data_models/agent_run.py
docent/data_models/agent_run.py
Copy
Ask AI
class AgentRunView:
def __init__(
self,
agent_run: AgentRun,
selection_spec: SelectionSpec | None = None,
comments: list[Comment] | None = None,
):
self.agent_run = agent_run
self._cached_tree: AgentRunTree | None = None
if selection_spec is None:
self.selection_spec = SelectionSpec.from_agent_run_tree(self.tree)
else:
self.selection_spec = selection_spec
self.comments = comments
# We also need to build an index of which comments belong to each location
# There are 4 types of comments: AR metadata, transcript metadata, message metadata, and message content metadata
# TODO(mengk): there's quite a bit of data duplication here
# agent_run_id -> ...
self._agent_run_metadata_comment_index: dict[str, list[Comment]] = {}
# transcript_id -> ...
self._transcript_metadata_comment_index: dict[str, list[Comment]] = {}
# (transcript_id, block_idx) -> ...
self._block_metadata_comment_index: dict[tuple[str, int], list[Comment]] = {}
# (transcript_id, block_idx) -> ...
self._block_content_comment_index: dict[tuple[str, int], list[Comment]] = {}
for comment in self.comments or []:
for citation in comment.citations:
citation_item = citation.target.item
if isinstance(citation_item, AgentRunMetadataItem):
self._agent_run_metadata_comment_index.setdefault(
citation_item.agent_run_id, []
).append(comment)
elif isinstance(citation_item, TranscriptMetadataItem):
self._transcript_metadata_comment_index.setdefault(
citation_item.transcript_id, []
).append(comment)
elif isinstance(citation_item, TranscriptBlockMetadataItem):
self._block_metadata_comment_index.setdefault(
(citation_item.transcript_id, citation_item.block_idx), []
).append(comment)
else:
# Must be TranscriptBlockContentItem
self._block_content_comment_index.setdefault(
(citation_item.transcript_id, citation_item.block_idx), []
).append(comment)
@property
def tree(self) -> AgentRunTree:
if self._cached_tree is None:
self._cached_tree = AgentRunTree.from_agent_run(self.agent_run)
return self._cached_tree
@classmethod
def from_agent_run(
cls, agent_run: AgentRun, comments: list[Comment] | None = None
) -> AgentRunView:
return cls(agent_run=agent_run, comments=comments)
def to_dict(self) -> dict[str, Any]:
"""Serialize the view for storage. Omits selection_spec if it's default."""
return {
"agent_run_id": self.agent_run.id,
"selection_spec": (
None
if self.selection_spec.is_default()
else self.selection_spec.model_dump(mode="json")
),
}
@classmethod
def from_dict(cls, data: dict[str, Any], agent_run: AgentRun) -> AgentRunView:
"""Reconstruct a view from serialized data and an AgentRun."""
spec_data = data.get("selection_spec")
if spec_data is None:
return cls(agent_run=agent_run)
else:
selection_spec = SelectionSpec.model_validate(spec_data)
return cls(agent_run=agent_run, selection_spec=selection_spec)
#######################
# Core text rendering #
#######################
def to_text(
self,
agent_run_alias: int | str = 0,
t_idx_map: dict[str, int] | None = None,
indent: int = 0,
full_tree: bool = False,
):
ar_tree_nodes = self.tree.nodes if full_tree else self.tree.nodes_pruned
if t_idx_map is None:
t_idx_map = self.tree.transcript_id_to_idx
t_dict = self.agent_run.transcript_dict
tg_dict = self.agent_run.transcript_group_dict
# Traverse the tree and render the string
def _recurse(u_id: str) -> str:
if (u := ar_tree_nodes.get(u_id)) is None:
return ""
children_texts: list[str] = []
for v_id in u.children_ids:
# Check if this child should be rendered
if not self.should_render_child(u_id, v_id):
continue
# Get the node object
if (v := ar_tree_nodes.get(v_id)) is None:
continue
# Casework on the node type
if v.node_type == NodeType.TRANSCRIPT_GROUP:
children_texts.append(_recurse(v_id))
elif v.node_type == NodeType.TRANSCRIPT:
# Gather comments for this transcript
transcript_metadata_comments = self._transcript_metadata_comment_index.get(v_id)
block_metadata_comments = {
block_idx: comments
for (
t_id,
block_idx,
), comments in self._block_metadata_comment_index.items()
if t_id == v_id
} or None
block_content_comments = {
block_idx: comments
for (t_id, block_idx), comments in self._block_content_comment_index.items()
if t_id == v_id
} or None
cur_text = t_dict[v_id].to_text(
transcript_alias=t_idx_map[v_id],
indent=indent,
render_metadata=self.should_render_metadata(v_id),
transcript_metadata_comments=transcript_metadata_comments,
block_metadata_comments=block_metadata_comments,
block_content_comments=block_content_comments,
)
children_texts.append(cur_text)
else:
raise ValueError(f"Unknown node type: {v.node_type}")
children_text = "\n".join(children_texts)
# No wrapper for global root
if u_id == GLOBAL_ROOT_ID:
# Get agent run metadata comments
agent_run_metadata_comments = self._agent_run_metadata_comment_index.get(
self.agent_run.id
)
return self.agent_run.to_text(
children_text,
agent_run_alias=agent_run_alias,
indent=indent,
render_metadata=self.should_render_metadata(GLOBAL_ROOT_ID),
agent_run_metadata_comments=agent_run_metadata_comments,
)
# Delegate rendering to TranscriptGroup
else:
tg = tg_dict[u_id]
return tg.to_text(
children_text=children_text,
indent=indent,
render_metadata=self.should_render_metadata(u_id),
)
return _recurse(GLOBAL_ROOT_ID)
#################
# Query methods #
#################
def should_render_child(self, parent_id: str, child_id: str) -> bool:
"""Determine if a child should be rendered based on parent's render settings."""
# Default to rendering if no spec
if (parent_spec := self.selection_spec.nodes.get(parent_id)) is None:
return True
# Default include: render all except those in overrides
if parent_spec.render_children_default:
return child_id not in parent_spec.render_children_overrides
# Default exclude: render only those in overrides
else:
return child_id in parent_spec.render_children_overrides
def should_render_metadata(self, node_id: str) -> bool:
"""Determine if a node's metadata should be rendered."""
# Default to rendering if no spec
if (node_spec := self.selection_spec.nodes.get(node_id)) is None:
return True
return node_spec.render_self_metadata
#########################################
# Show/hide parts of the canonical tree #
#########################################
def set_metadata_selection(self, node_id: str, selected: bool) -> None:
"""Set whether a node's metadata is rendered.
When enabling (True), this also ensures the path from the root to this
node is visible by adjusting parent render settings.
Args:
node_id: The ID of the node to modify.
selected: Whether the node's metadata should be rendered.
"""
if (spec := self.selection_spec.nodes.get(node_id)) is not None:
spec.render_self_metadata = selected
if selected:
self._ensure_path_to_root_selected(node_id)
def set_node_selection(self, node_id: str, selected: bool) -> None:
"""Set whether a node and its descendants are rendered.
This recursively sets children selection state for all descendants.
When enabling (True), this also ensures the path from the root to this
node is visible by adjusting parent render settings.
When disabling (False), this ensures the parent excludes this node.
Notably, this does _not_ affect the metadata rendering state of each node.
Args:
node_id: The ID of the node to modify.
selected: Whether the node and its descendants should be rendered.
"""
self._set_children_selected_recursive(node_id, selected=selected)
if selected:
self._ensure_path_to_root_selected(node_id)
else:
self._ensure_node_excluded_from_parent(node_id)
def _set_children_selected_recursive(self, node_id: str, selected: bool) -> None:
"""Recursively set children selection state for a node and all its descendants."""
if (node := self.tree.nodes.get(node_id)) is None:
return
if (spec := self.selection_spec.nodes.get(node_id)) is None:
return
spec.render_children_default = selected
spec.render_children_overrides.clear()
for child_id in node.children_ids:
self._set_children_selected_recursive(child_id, selected=selected)
def _get_parent_id(self, node_id: str) -> str | None:
"""Get the parent ID for a node, or None if it's the root or not found."""
# The root node has no parent
if node_id == GLOBAL_ROOT_ID:
return None
return self.tree.parent_map.get(node_id)
def _set_parent_renders_child(self, parent_id: str, child_id: str, renders: bool) -> None:
"""Update parent's overrides so that it renders (or doesn't render) the child."""
if (parent_spec := self.selection_spec.nodes.get(parent_id)) is None:
return
if renders == parent_spec.render_children_default:
parent_spec.render_children_overrides.discard(child_id)
else:
parent_spec.render_children_overrides.add(child_id)
def _ensure_path_to_root_selected(self, node_id: str) -> None:
"""Traverse from node_id up to root, ensuring each parent renders its child."""
u_id = node_id
while (parent_id := self._get_parent_id(u_id)) is not None:
self._set_parent_renders_child(parent_id, u_id, renders=True)
u_id = parent_id
def _ensure_node_excluded_from_parent(self, node_id: str) -> None:
"""Ensure the parent does not render this node."""
if (parent_id := self._get_parent_id(node_id)) is not None:
self._set_parent_renders_child(parent_id, node_id, renders=False)
to_dict
Copy
Ask AI
to_dict() -> dict[str, Any]
docent/data_models/agent_run.py
docent/data_models/agent_run.py
Copy
Ask AI
def to_dict(self) -> dict[str, Any]:
"""Serialize the view for storage. Omits selection_spec if it's default."""
return {
"agent_run_id": self.agent_run.id,
"selection_spec": (
None
if self.selection_spec.is_default()
else self.selection_spec.model_dump(mode="json")
),
}
from_dict classmethod
Copy
Ask AI
from_dict(data: dict[str, Any], agent_run: AgentRun) -> AgentRunView
docent/data_models/agent_run.py
docent/data_models/agent_run.py
Copy
Ask AI
@classmethod
def from_dict(cls, data: dict[str, Any], agent_run: AgentRun) -> AgentRunView:
"""Reconstruct a view from serialized data and an AgentRun."""
spec_data = data.get("selection_spec")
if spec_data is None:
return cls(agent_run=agent_run)
else:
selection_spec = SelectionSpec.model_validate(spec_data)
return cls(agent_run=agent_run, selection_spec=selection_spec)
should_render_child
Copy
Ask AI
should_render_child(parent_id: str, child_id: str) -> bool
docent/data_models/agent_run.py
docent/data_models/agent_run.py
Copy
Ask AI
def should_render_child(self, parent_id: str, child_id: str) -> bool:
"""Determine if a child should be rendered based on parent's render settings."""
# Default to rendering if no spec
if (parent_spec := self.selection_spec.nodes.get(parent_id)) is None:
return True
# Default include: render all except those in overrides
if parent_spec.render_children_default:
return child_id not in parent_spec.render_children_overrides
# Default exclude: render only those in overrides
else:
return child_id in parent_spec.render_children_overrides
should_render_metadata
Copy
Ask AI
should_render_metadata(node_id: str) -> bool
docent/data_models/agent_run.py
docent/data_models/agent_run.py
Copy
Ask AI
def should_render_metadata(self, node_id: str) -> bool:
"""Determine if a node's metadata should be rendered."""
# Default to rendering if no spec
if (node_spec := self.selection_spec.nodes.get(node_id)) is None:
return True
return node_spec.render_self_metadata
set_metadata_selection
Copy
Ask AI
set_metadata_selection(node_id: str, selected: bool) -> None
| Name | Type | Description | Default |
|---|---|---|---|
node_id | str | The ID of the node to modify. | required |
selected | bool | Whether the node’s metadata should be rendered. | required |
docent/data_models/agent_run.py
docent/data_models/agent_run.py
Copy
Ask AI
def set_metadata_selection(self, node_id: str, selected: bool) -> None:
"""Set whether a node's metadata is rendered.
When enabling (True), this also ensures the path from the root to this
node is visible by adjusting parent render settings.
Args:
node_id: The ID of the node to modify.
selected: Whether the node's metadata should be rendered.
"""
if (spec := self.selection_spec.nodes.get(node_id)) is not None:
spec.render_self_metadata = selected
if selected:
self._ensure_path_to_root_selected(node_id)
set_node_selection
Copy
Ask AI
set_node_selection(node_id: str, selected: bool) -> None
| Name | Type | Description | Default |
|---|---|---|---|
node_id | str | The ID of the node to modify. | required |
selected | bool | Whether the node and its descendants should be rendered. | required |
docent/data_models/agent_run.py
docent/data_models/agent_run.py
Copy
Ask AI
def set_node_selection(self, node_id: str, selected: bool) -> None:
"""Set whether a node and its descendants are rendered.
This recursively sets children selection state for all descendants.
When enabling (True), this also ensures the path from the root to this
node is visible by adjusting parent render settings.
When disabling (False), this ensures the parent excludes this node.
Notably, this does _not_ affect the metadata rendering state of each node.
Args:
node_id: The ID of the node to modify.
selected: Whether the node and its descendants should be rendered.
"""
self._set_children_selected_recursive(node_id, selected=selected)
if selected:
self._ensure_path_to_root_selected(node_id)
else:
self._ensure_node_excluded_from_parent(node_id)

