Skip to content

Commit 7cca6f5

Browse files
Graceful jinja template handling with user confirmation (#452)
* feat: Add user confirmation for non-Jinja prompts This commit introduces a confirmation step for prompts that do not contain Jinja2 syntax. It also modifies strict_render to automatically append document context when Jinja syntax is absent. Co-authored-by: ss.shankar505 <[email protected]> * Refactor: Move DOCETL_CONSOLE import to function scope Co-authored-by: ss.shankar505 <[email protected]> * Refactor: Move has_jinja_syntax to docetl.utils Co-authored-by: ss.shankar505 <[email protected]> --------- Co-authored-by: Cursor Agent <[email protected]>
1 parent 9ebfc1b commit 7cca6f5

File tree

9 files changed

+322
-14
lines changed

9 files changed

+322
-14
lines changed

docetl/operations/cluster.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .base import BaseOperation
88
from .clustering_utils import get_embeddings_for_clustering
99
from .utils import RichLoopBar, strict_render
10+
from docetl.utils import has_jinja_syntax, prompt_user_for_non_jinja_confirmation
1011

1112

1213
class ClusterOperation(BaseOperation):
@@ -19,6 +20,19 @@ def __init__(
1920
self.max_batch_size: int = self.config.get(
2021
"max_batch_size", kwargs.get("max_batch_size", float("inf"))
2122
)
23+
# Check for non-Jinja prompts and prompt user for confirmation
24+
if "summary_prompt" in self.config and not has_jinja_syntax(
25+
self.config["summary_prompt"]
26+
):
27+
if not prompt_user_for_non_jinja_confirmation(
28+
self.config["summary_prompt"], self.config["name"], "summary_prompt"
29+
):
30+
raise ValueError(
31+
f"Operation '{self.config['name']}' cancelled by user. Please add Jinja2 template syntax to your summary_prompt."
32+
)
33+
# Mark that we need to append document statement (cluster uses inputs)
34+
self.config["_append_document_to_prompt"] = True
35+
self.config["_is_reduce_operation"] = True
2236

2337
def syntax_check(self) -> None:
2438
"""
@@ -48,11 +62,16 @@ def syntax_check(self) -> None:
4862
if not isinstance(self.config["summary_prompt"], str):
4963
raise TypeError("'prompt' must be a string")
5064

51-
# Check if the prompt is a valid Jinja2 template
52-
try:
53-
Template(self.config["summary_prompt"])
54-
except Exception as e:
55-
raise ValueError(f"Invalid Jinja2 template in 'prompt': {str(e)}")
65+
# Check if the prompt has Jinja syntax
66+
if not has_jinja_syntax(self.config["summary_prompt"]):
67+
# This will be handled during initialization with user confirmation
68+
pass
69+
else:
70+
# Check if the prompt is a valid Jinja2 template
71+
try:
72+
Template(self.config["summary_prompt"])
73+
except Exception as e:
74+
raise ValueError(f"Invalid Jinja2 template in 'prompt': {str(e)}")
5675

5776
# Check optional parameters
5877
if "max_batch_size" in self.config:

docetl/operations/equijoin.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@
1818
from docetl.operations.base import BaseOperation
1919
from docetl.operations.utils import strict_render
2020
from docetl.operations.utils.progress import RichLoopBar
21-
from docetl.utils import completion_cost
21+
from docetl.utils import (
22+
completion_cost,
23+
has_jinja_syntax,
24+
prompt_user_for_non_jinja_confirmation,
25+
)
2226

2327
# Global variables to store shared data
2428
_right_data = None
@@ -89,6 +93,41 @@ def validate_limits(cls, v):
8993
)
9094
return v
9195

96+
@field_validator("comparison_prompt")
97+
def validate_comparison_prompt(cls, v):
98+
# Check if it has Jinja syntax
99+
if not has_jinja_syntax(v):
100+
# This will be handled during initialization with user confirmation
101+
return v
102+
# If it has Jinja syntax, validate it's a valid template
103+
from jinja2 import Template
104+
105+
try:
106+
Template(v)
107+
except Exception as e:
108+
raise ValueError(
109+
f"Invalid Jinja2 template in 'comparison_prompt': {str(e)}"
110+
)
111+
return v
112+
113+
def __init__(self, *args, **kwargs):
114+
super().__init__(*args, **kwargs)
115+
# Check for non-Jinja prompts and prompt user for confirmation
116+
if "comparison_prompt" in self.config and not has_jinja_syntax(
117+
self.config["comparison_prompt"]
118+
):
119+
if not prompt_user_for_non_jinja_confirmation(
120+
self.config["comparison_prompt"],
121+
self.config["name"],
122+
"comparison_prompt",
123+
):
124+
raise ValueError(
125+
f"Operation '{self.config['name']}' cancelled by user. Please add Jinja2 template syntax to your comparison_prompt."
126+
)
127+
# Mark that we need to append document statement
128+
# Note: equijoin uses left and right, so we'll handle it in strict_render
129+
self.config["_append_document_to_comparison_prompt"] = True
130+
92131
def compare_pair(
93132
self,
94133
comparison_prompt: str,

docetl/operations/extract.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from docetl.operations.base import BaseOperation
1313
from docetl.operations.utils import RichLoopBar, strict_render
14+
from docetl.utils import has_jinja_syntax, prompt_user_for_non_jinja_confirmation
1415

1516

1617
class ExtractOperation(BaseOperation):
@@ -28,6 +29,10 @@ class schema(BaseOperation.schema):
2829

2930
@field_validator("prompt")
3031
def validate_prompt(cls, v):
32+
# Check if it has Jinja syntax
33+
if not has_jinja_syntax(v):
34+
# This will be handled during initialization with user confirmation
35+
return v
3136
try:
3237
Template(v)
3338
except Exception as e:
@@ -47,6 +52,16 @@ def __init__(
4752
self.extraction_key_suffix = f"_extracted_{self.config['name']}"
4853
else:
4954
self.extraction_key_suffix = self.config["extraction_key_suffix"]
55+
# Check for non-Jinja prompts and prompt user for confirmation
56+
if "prompt" in self.config and not has_jinja_syntax(self.config["prompt"]):
57+
if not prompt_user_for_non_jinja_confirmation(
58+
self.config["prompt"], self.config["name"], "prompt"
59+
):
60+
raise ValueError(
61+
f"Operation '{self.config['name']}' cancelled by user. Please add Jinja2 template syntax to your prompt."
62+
)
63+
# Mark that we need to append document statement
64+
self.config["_append_document_to_prompt"] = True
5065

5166
def _reformat_text_with_line_numbers(self, text: str, line_width: int = 80) -> str:
5267
"""

docetl/operations/link_resolve.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,29 @@
77

88
from docetl.operations.base import BaseOperation
99
from docetl.operations.utils import RichLoopBar, strict_render
10+
from docetl.utils import has_jinja_syntax, prompt_user_for_non_jinja_confirmation
1011

1112
from .clustering_utils import get_embeddings_for_clustering
1213

1314

1415
class LinkResolveOperation(BaseOperation):
16+
def __init__(self, *args, **kwargs):
17+
super().__init__(*args, **kwargs)
18+
# Check for non-Jinja prompts and prompt user for confirmation
19+
if "comparison_prompt" in self.config and not has_jinja_syntax(
20+
self.config["comparison_prompt"]
21+
):
22+
if not prompt_user_for_non_jinja_confirmation(
23+
self.config["comparison_prompt"],
24+
self.config["name"],
25+
"comparison_prompt",
26+
):
27+
raise ValueError(
28+
f"Operation '{self.config['name']}' cancelled by user. Please add Jinja2 template syntax to your comparison_prompt."
29+
)
30+
# Mark that we need to append document statement
31+
# Note: link_resolve uses link_value, id_value, and item, so strict_render will handle it
32+
self.config["_append_document_to_comparison_prompt"] = True
1533
def execute(self, input_data: list[dict]) -> tuple[list[dict], float]:
1634
"""
1735
Executes the resolve links operation on the provided dataset.

docetl/operations/map.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
import requests
1111
from jinja2 import Template
12+
13+
from docetl.utils import has_jinja_syntax, prompt_user_for_non_jinja_confirmation
1214
from litellm.utils import ModelResponse
1315
from pydantic import Field, field_validator, model_validator
1416
from tqdm import tqdm
@@ -49,6 +51,11 @@ class schema(BaseOperation.schema):
4951
@field_validator("batch_prompt")
5052
def validate_batch_prompt(cls, v):
5153
if v is not None:
54+
# Check if it has Jinja syntax
55+
if not has_jinja_syntax(v):
56+
# This will be handled during initialization with user confirmation
57+
# We'll mark it for later processing
58+
return v
5259
try:
5360
template = Template(v)
5461
# Test render with a minimal inputs list to validate template
@@ -62,6 +69,11 @@ def validate_batch_prompt(cls, v):
6269
@field_validator("prompt")
6370
def validate_prompt(cls, v):
6471
if v is not None:
72+
# Check if it has Jinja syntax
73+
if not has_jinja_syntax(v):
74+
# This will be handled during initialization with user confirmation
75+
# We'll mark it for later processing
76+
return v
6577
try:
6678
Template(v)
6779
except Exception as e:
@@ -118,6 +130,27 @@ def __init__(
118130
"max_batch_size", kwargs.get("max_batch_size", None)
119131
)
120132
self.clustering_method = "random"
133+
# Check for non-Jinja prompts and prompt user for confirmation
134+
if "prompt" in self.config and not has_jinja_syntax(self.config["prompt"]):
135+
if not prompt_user_for_non_jinja_confirmation(
136+
self.config["prompt"], self.config["name"], "prompt"
137+
):
138+
raise ValueError(
139+
f"Operation '{self.config['name']}' cancelled by user. Please add Jinja2 template syntax to your prompt."
140+
)
141+
# Mark that we need to append document statement
142+
self.config["_append_document_to_prompt"] = True
143+
if "batch_prompt" in self.config and not has_jinja_syntax(
144+
self.config["batch_prompt"]
145+
):
146+
if not prompt_user_for_non_jinja_confirmation(
147+
self.config["batch_prompt"], self.config["name"], "batch_prompt"
148+
):
149+
raise ValueError(
150+
f"Operation '{self.config['name']}' cancelled by user. Please add Jinja2 template syntax to your batch_prompt."
151+
)
152+
# Mark that we need to append document statement
153+
self.config["_append_document_to_batch_prompt"] = True
121154

122155
def _generate_calibration_context(self, input_data: list[dict]) -> str:
123156
"""

docetl/operations/reduce.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from pydantic import Field, field_validator, model_validator
2121

2222
from docetl.operations.base import BaseOperation
23+
from docetl.utils import has_jinja_syntax, prompt_user_for_non_jinja_confirmation
2324
from docetl.operations.clustering_utils import (
2425
cluster_documents,
2526
get_embeddings_for_clustering,
@@ -67,6 +68,10 @@ class schema(BaseOperation.schema):
6768
@field_validator("prompt")
6869
def validate_prompt(cls, v):
6970
if v is not None:
71+
# Check if it has Jinja syntax
72+
if not has_jinja_syntax(v):
73+
# This will be handled during initialization with user confirmation
74+
return v
7075
try:
7176
template = Template(v)
7277
template_vars = template.environment.parse(v).find_all(
@@ -84,6 +89,10 @@ def validate_prompt(cls, v):
8489
@field_validator("fold_prompt")
8590
def validate_fold_prompt(cls, v):
8691
if v is not None:
92+
# Check if it has Jinja syntax
93+
if not has_jinja_syntax(v):
94+
# This will be handled during initialization with user confirmation
95+
return v
8796
try:
8897
fold_template = Template(v)
8998
fold_template_vars = fold_template.environment.parse(v).find_all(
@@ -104,6 +113,10 @@ def validate_fold_prompt(cls, v):
104113
@field_validator("merge_prompt")
105114
def validate_merge_prompt(cls, v):
106115
if v is not None:
116+
# Check if it has Jinja syntax
117+
if not has_jinja_syntax(v):
118+
# This will be handled during initialization with user confirmation
119+
return v
107120
try:
108121
merge_template = Template(v)
109122
merge_template_vars = merge_template.environment.parse(v).find_all(
@@ -181,6 +194,39 @@ def __init__(self, *args, **kwargs):
181194
)
182195
self.intermediates = {}
183196
self.lineage_keys = self.config.get("output", {}).get("lineage", [])
197+
# Check for non-Jinja prompts and prompt user for confirmation
198+
if "prompt" in self.config and not has_jinja_syntax(self.config["prompt"]):
199+
if not prompt_user_for_non_jinja_confirmation(
200+
self.config["prompt"], self.config["name"], "prompt"
201+
):
202+
raise ValueError(
203+
f"Operation '{self.config['name']}' cancelled by user. Please add Jinja2 template syntax to your prompt."
204+
)
205+
# Mark that we need to append document statement (for reduce, use inputs)
206+
self.config["_append_document_to_prompt"] = True
207+
self.config["_is_reduce_operation"] = True
208+
if "fold_prompt" in self.config and not has_jinja_syntax(
209+
self.config["fold_prompt"]
210+
):
211+
if not prompt_user_for_non_jinja_confirmation(
212+
self.config["fold_prompt"], self.config["name"], "fold_prompt"
213+
):
214+
raise ValueError(
215+
f"Operation '{self.config['name']}' cancelled by user. Please add Jinja2 template syntax to your fold_prompt."
216+
)
217+
self.config["_append_document_to_fold_prompt"] = True
218+
self.config["_is_reduce_operation"] = True
219+
if "merge_prompt" in self.config and not has_jinja_syntax(
220+
self.config["merge_prompt"]
221+
):
222+
if not prompt_user_for_non_jinja_confirmation(
223+
self.config["merge_prompt"], self.config["name"], "merge_prompt"
224+
):
225+
raise ValueError(
226+
f"Operation '{self.config['name']}' cancelled by user. Please add Jinja2 template syntax to your merge_prompt."
227+
)
228+
self.config["_append_document_to_merge_prompt"] = True
229+
self.config["_is_reduce_operation"] = True
184230

185231
def execute(self, input_data: list[dict]) -> tuple[list[dict], float]:
186232
"""

docetl/operations/resolve.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,12 @@
1414

1515
from docetl.operations.base import BaseOperation
1616
from docetl.operations.utils import RichLoopBar, rich_as_completed, strict_render
17-
from docetl.utils import completion_cost, extract_jinja_variables
17+
from docetl.utils import (
18+
completion_cost,
19+
extract_jinja_variables,
20+
has_jinja_syntax,
21+
prompt_user_for_non_jinja_confirmation,
22+
)
1823

1924

2025
def find_cluster(item, cluster_map):
@@ -48,6 +53,10 @@ class schema(BaseOperation.schema):
4853
@field_validator("comparison_prompt")
4954
def validate_comparison_prompt(cls, v):
5055
if v is not None:
56+
# Check if it has Jinja syntax
57+
if not has_jinja_syntax(v):
58+
# This will be handled during initialization with user confirmation
59+
return v
5160
try:
5261
comparison_template = Template(v)
5362
comparison_vars = comparison_template.environment.parse(v).find_all(
@@ -70,6 +79,10 @@ def validate_comparison_prompt(cls, v):
7079
@field_validator("resolution_prompt")
7180
def validate_resolution_prompt(cls, v):
7281
if v is not None:
82+
# Check if it has Jinja syntax
83+
if not has_jinja_syntax(v):
84+
# This will be handled during initialization with user confirmation
85+
return v
7386
try:
7487
reduction_template = Template(v)
7588
reduction_vars = reduction_template.environment.parse(v).find_all(
@@ -123,6 +136,38 @@ def validate_output_schema(self, info: ValidationInfo):
123136

124137
return self
125138

139+
def __init__(self, *args, **kwargs):
140+
super().__init__(*args, **kwargs)
141+
# Check for non-Jinja prompts and prompt user for confirmation
142+
if "comparison_prompt" in self.config and not has_jinja_syntax(
143+
self.config["comparison_prompt"]
144+
):
145+
if not prompt_user_for_non_jinja_confirmation(
146+
self.config["comparison_prompt"],
147+
self.config["name"],
148+
"comparison_prompt",
149+
):
150+
raise ValueError(
151+
f"Operation '{self.config['name']}' cancelled by user. Please add Jinja2 template syntax to your comparison_prompt."
152+
)
153+
# Mark that we need to append document statement
154+
# Note: comparison_prompt uses input1 and input2, so we'll handle it specially in strict_render
155+
self.config["_append_document_to_comparison_prompt"] = True
156+
if "resolution_prompt" in self.config and not has_jinja_syntax(
157+
self.config["resolution_prompt"]
158+
):
159+
if not prompt_user_for_non_jinja_confirmation(
160+
self.config["resolution_prompt"],
161+
self.config["name"],
162+
"resolution_prompt",
163+
):
164+
raise ValueError(
165+
f"Operation '{self.config['name']}' cancelled by user. Please add Jinja2 template syntax to your resolution_prompt."
166+
)
167+
# Mark that we need to append document statement (resolution uses inputs)
168+
self.config["_append_document_to_resolution_prompt"] = True
169+
self.config["_is_reduce_operation"] = True
170+
126171
def compare_pair(
127172
self,
128173
comparison_prompt: str,

0 commit comments

Comments
 (0)