Skip to content
Open
118 changes: 106 additions & 12 deletions .github/tests/lm_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import lotus
from lotus.models import LM, SentenceTransformersRM
from lotus.types import CascadeArgs
from lotus.types import CascadeArgs, PromptStrategy
from lotus.vector_store import FaissVS

################################################################################
Expand Down Expand Up @@ -161,7 +161,7 @@ def test_map_fewshot(setup_models, model):
examples = {"School": ["Stanford", "MIT"], "Answer": ["CA", "MA"]}
examples_df = pd.DataFrame(examples)
user_instruction = "What state is {School} in? Respond only with the two-letter abbreviation."
df = df.sem_map(user_instruction, examples=examples_df, suffix="State")
df = df.sem_map(user_instruction, prompt_strategy=PromptStrategy(dems=examples_df), suffix="State")

# clean up the state names to be more robust to free-form text
df["State"] = df["State"].str[-2:].str.lower()
Expand Down Expand Up @@ -283,7 +283,7 @@ def test_filter_operation_cot(setup_models, model):
}
df = pd.DataFrame(data)
user_instruction = "{Text} I have at least one apple"
filtered_df = df.sem_filter(user_instruction, strategy="cot")
filtered_df = df.sem_filter(user_instruction, prompt_strategy=PromptStrategy(cot=True))
expected_df = pd.DataFrame({"Text": ["I had two apples, then I gave away one", "My friend gave me an apple"]})
assert filtered_df.equals(expected_df)

Expand All @@ -303,11 +303,12 @@ def test_filter_operation_cot_fewshot(setup_models, model):
}
df = pd.DataFrame(data)
examples = {
"Sequence": ["1, 2, 3", "penny, nickel, dime, quarter", "villiage, town, city"],
"Answer": [True, True, True],
"Sequence": ["1, 2, 3", "A, B, C", "penny, nickel, dime, quarter", "villiage, town, city"],
"Answer": [True, True, True, True],
"Reasoning": [
"1, 2, 3 is an increasing sequence of numbers",
"penny, nickel, dime, quarter is an increasing sequence of coins",
"A, B, C is an increasing sequence of letters in alphabetical order",
"penny, nickel, dime, quarter is an increasing sequence of coins by value",
"villiage, town, city is an increasing sequence of settlements",
],
}
Expand All @@ -316,9 +317,9 @@ def test_filter_operation_cot_fewshot(setup_models, model):
user_instruction = "{Sequence} is increasing"
filtered_df = df.sem_filter(
user_instruction,
strategy="cot",
examples=examples_df,
additional_cot_instructions="Assume the most typical or logical case.",
prompt_strategy=PromptStrategy(
cot=True, dems=examples_df, additional_cot_instructions="Assume the most typical or logical case."
),
)
expected_df = pd.DataFrame(
{
Expand Down Expand Up @@ -347,13 +348,13 @@ def test_filter_operation_cot_fewshot_no_reasoning(setup_models, model):
}
df = pd.DataFrame(data)
examples = {
"Sequence": ["1, 2, 3", "penny, nickel, dime, quarter", "villiage, town, city"],
"Answer": [True, True, True],
"Sequence": ["1, 2, 3", "penny, nickel, dime, quarter", "villiage, town, city", "A, B, C"],
"Answer": [True, True, True, True],
}
examples_df = pd.DataFrame(examples)

user_instruction = "{Sequence} is increasing"
filtered_df = df.sem_filter(user_instruction, strategy="cot", examples=examples_df)
filtered_df = df.sem_filter(user_instruction, prompt_strategy=PromptStrategy(cot=True, dems=examples_df))
expected_df = pd.DataFrame(
{
"Sequence": [
Expand Down Expand Up @@ -550,6 +551,99 @@ def test_custom_tokenizer():
assert tokens < 100


################################################################################
# Auto-bootstrapping tests
################################################################################
@pytest.mark.parametrize("model", get_enabled("gpt-4o-mini"))
def test_auto_bootstrapping_filter(setup_models, model):
lm = setup_models[model]
lotus.settings.configure(lm=lm)

# Test auto-bootstrapping with filter operation
data = {
"Course Name": [
"Linear Algebra",
"Poetry Writing",
"Calculus II",
"Art History",
"Statistics",
"Creative Writing",
"Machine Learning",
"Philosophy",
]
}
df = pd.DataFrame(data)
user_instruction = "{Course Name} requires a lot of math"

# Test auto-bootstrapping
result = df.sem_filter(
user_instruction,
prompt_strategy=PromptStrategy(cot=True, dems="auto", max_dems=2),
return_explanations=True,
return_all=True,
)

# Check structure
assert "filter_label" in result.columns
assert "explanation_filter" in result.columns

# Should have some math courses identified
math_courses = result[result["filter_label"]]["Course Name"].tolist()
expected_math_courses = ["Linear Algebra", "Calculus II", "Statistics", "Machine Learning"]
assert any(course in expected_math_courses for course in math_courses)


@pytest.mark.parametrize("model", get_enabled("gpt-4o-mini"))
def test_auto_bootstrapping_map(setup_models, model):
lm = setup_models[model]
lotus.settings.configure(lm=lm)

# Test auto-bootstrapping with map operation
data = {"Course Name": ["Linear Algebra", "Poetry Writing", "Calculus II", "Art History"]}
df = pd.DataFrame(data)
user_instruction = "What is the difficulty level of {Course Name}? Answer: Beginner, Intermediate, or Advanced"

# Test auto-bootstrapping
result = df.sem_map(
user_instruction,
prompt_strategy=PromptStrategy(cot=True, dems="auto", max_dems=2),
return_explanations=True,
)

# Check structure
assert "_map" in result.columns
assert "explanation_map" in result.columns

# Check that all difficulty levels are valid
for difficulty in result["_map"]:
assert difficulty.lower() in ["beginner", "intermediate", "advanced"]


@pytest.mark.parametrize("model", get_enabled("gpt-4o-mini"))
def test_auto_bootstrapping_with_teacher_model(setup_models, model):
lm = setup_models[model]
teacher_lm = setup_models[model] # Use same model as teacher for testing
lotus.settings.configure(lm=lm)

data = {"Text": ["I am happy", "I am sad", "I am excited", "I am tired"]}
df = pd.DataFrame(data)
user_instruction = "{Text} expresses a positive emotion"

# Test auto-bootstrapping with explicit teacher model
result = df.sem_filter(
user_instruction,
prompt_strategy=PromptStrategy(cot=True, dems="auto", max_dems=2, teacher_lm=teacher_lm),
return_all=True,
)

# Check structure
assert "filter_label" in result.columns

# Should identify positive emotions
positive_texts = result[result["filter_label"]]["Text"].tolist()
assert any(text in ["I am happy", "I am excited"] for text in positive_texts)


################################################################################
# Eval tests
################################################################################
Expand Down
6 changes: 3 additions & 3 deletions .github/tests/multimodality_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ def test_topk_operation(setup_models, model):
]
)

strategies = ["quick", "heap", "naive"]
for strategy in strategies:
sorted_df = df.sem_topk(user_instruction, K=3, strategy=strategy)
methods = ["quick", "heap", "naive"]
for method in methods:
sorted_df = df.sem_topk(user_instruction, K=3, method=method)

top_2_actual = set(sorted_df["image"].values)
assert top_2_expected.issubset(top_2_actual)
Expand Down
4 changes: 2 additions & 2 deletions examples/model_examples/deepseek.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import lotus
from lotus.models import LM
from lotus.types import ReasoningStrategy
from lotus.types import PromptStrategy

# Set up model
lm = LM(model="ollama/deepseek-r1:7b", temperature=0.6)
Expand Down Expand Up @@ -33,6 +33,6 @@
)

# Run semantic mapping with CoT strategy
df = df.sem_map(user_instruction, return_explanations=True, strategy=ReasoningStrategy.ZS_COT)
df = df.sem_map(user_instruction, return_explanations=True, prompt_strategy=PromptStrategy(cot=True))

print(df)
3 changes: 2 additions & 1 deletion examples/op_examples/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import lotus
from lotus.models import LM
from lotus.types import PromptStrategy

lm = LM(model="gpt-4o-mini")

Expand All @@ -17,5 +18,5 @@
}
df = pd.DataFrame(data)
user_instruction = "{Course Name} requires a lot of math"
df = df.sem_filter(user_instruction, strategy="cot")
df = df.sem_filter(user_instruction, prompt_strategy=PromptStrategy(cot=True))
print(df)
6 changes: 3 additions & 3 deletions examples/op_examples/filter_cot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import lotus
from lotus.models import LM
from lotus.types import ReasoningStrategy
from lotus.types import PromptStrategy

lm = LM(model="gpt-4o-mini")

Expand All @@ -20,9 +20,9 @@
}
df = pd.DataFrame(data)
user_instruction = "{Text} I have at least one apple"
# filtered_df = df.sem_filter(user_instruction, strategy="cot", return_all=True)
# Old way: filtered_df = df.sem_filter(user_instruction, strategy="cot", return_all=True)
filtered_df = df.sem_filter(
user_instruction, strategy=ReasoningStrategy.ZS_COT, return_all=True, return_explanations=True
user_instruction, prompt_strategy=PromptStrategy(cot=True), return_all=True, return_explanations=True
) # uncomment to see reasoning chains

print(filtered_df)
Expand Down
4 changes: 2 additions & 2 deletions examples/op_examples/map_deepseek_cot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import lotus
from lotus.models import LM
from lotus.types import ReasoningStrategy
from lotus.types import PromptStrategy

lm = LM(model="ollama/deepseek-r1:7b", temperature=0.5)

Expand All @@ -17,5 +17,5 @@
}
df = pd.DataFrame(data)
user_instruction = "What is a similar course to {Course Name}. Just give the course name."
df = df.sem_map(user_instruction, return_explanations=True, strategy=ReasoningStrategy.ZS_COT)
df = df.sem_map(user_instruction, return_explanations=True, prompt_strategy=PromptStrategy(cot=True))
print(df)
3 changes: 2 additions & 1 deletion examples/op_examples/multimodal_ops/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import lotus
from lotus.dtype_extensions import ImageArray
from lotus.models import LM
from lotus.types import PromptStrategy

lotus.settings.configure(lm=LM(model="gpt-4o-mini"))

Expand All @@ -17,6 +18,6 @@
image_df = pd.DataFrame({"image": ImageArray(image_paths), "image_path": image_paths})
labels_df = pd.DataFrame({"label": [0, 1]})

df = image_df.sem_join(labels_df, "{image} represents the number {label}", strategy="zs-cot")
df = image_df.sem_join(labels_df, "{image} represents the number {label}", prompt_strategy=PromptStrategy(cot=True))

print(df)
78 changes: 78 additions & 0 deletions examples/op_examples/simple_reasoning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import pandas as pd

import lotus
from lotus.models import LM
from lotus.types import PromptStrategy

# Configure the language model
lm = LM(model="gpt-4o-mini")
lotus.settings.configure(lm=lm)

# Sample data
data = {
"Course Name": ["Linear Algebra", "Poetry Writing", "Calculus II", "Art History", "Statistics", "Creative Writing"]
}
df = pd.DataFrame(data)
user_instruction = "{Course Name} requires a lot of math"

# Example 1: Basic filtering (no reasoning)
print("=== 1. Basic Filtering ===")
basic_df = df.sem_filter(user_instruction, return_all=True)
print(basic_df[["Course Name", "filter_label"]])
print()

# Example 2: Chain-of-Thought reasoning
print("=== 2. Chain-of-Thought Reasoning ===")
cot_df = df.sem_filter(
user_instruction, prompt_strategy=PromptStrategy(cot=True), return_explanations=True, return_all=True
)
print(cot_df[["Course Name", "filter_label", "explanation_filter"]])
print()

# Example 3: Few-shot examples (demonstrations)
print("=== 3. Few-shot Examples ===")
examples = pd.DataFrame({"Course Name": ["Machine Learning", "Literature", "Physics"], "Answer": [True, False, True]})

demo_df = df.sem_filter(
user_instruction,
prompt_strategy=PromptStrategy(dems=examples),
return_all=True,
)
print(demo_df[["Course Name", "filter_label"]])
print()

# Example 4: CoT + Demonstrations (the powerful combination)
print("=== 4. CoT + Demonstrations ===")
examples_with_reasoning = pd.DataFrame(
{
"Course Name": ["Machine Learning", "Literature", "Physics"],
"Answer": [True, False, True],
"Reasoning": [
"Machine Learning requires linear algebra, calculus, and statistics",
"Literature focuses on reading, writing, and analysis - no math required",
"Physics is fundamentally mathematical with equations and calculations",
],
}
)

combined_df = df.sem_filter(
user_instruction,
prompt_strategy=PromptStrategy(cot=True, dems=examples_with_reasoning),
return_explanations=True,
return_all=True,
)
print(combined_df[["Course Name", "filter_label", "explanation_filter"]])
print()

# Example 5: Automatic demonstration bootstrapping
print("=== 5. Bootstrapped Demonstrations ===")

bootstrap_df = df.sem_filter(
user_instruction,
prompt_strategy=PromptStrategy(cot=True, dems="auto", max_dems=2),
return_explanations=True,
return_all=True,
)
print("Automatically generated demonstrations:")
print(bootstrap_df[["Course Name", "filter_label", "explanation_filter"]])
print()
4 changes: 2 additions & 2 deletions examples/op_examples/top_k_deepseek_cot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import lotus
from lotus.models import LM
from lotus.types import ReasoningStrategy
from lotus.types import PromptStrategy

lm = LM(model="ollama/deepseek-r1:7b", temperature=0.6)
lotus.settings.configure(lm=lm)
Expand All @@ -24,7 +24,7 @@
"{Review} suggests that the user would recommend the product to others",
K=2,
method=method,
strategy=ReasoningStrategy.ZS_COT,
prompt_strategy=PromptStrategy(cot=True),
return_stats=True,
return_explanations=True,
)
Expand Down
2 changes: 2 additions & 0 deletions lotus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from lotus.evals import llm_as_judge, pairwise_judge
from lotus.web_search import web_search, WebSearchCorpus
from lotus.settings import settings # type: ignore[attr-defined]
from lotus.types import PromptStrategy


logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO)
Expand Down Expand Up @@ -52,6 +53,7 @@
"dtype_extensions",
"web_search",
"WebSearchCorpus",
"PromptStrategy",
"llm_as_judge",
"pairwise_judge",
]
Loading