Skip to content

Commit f0d67f1

Browse files
committed
Merge branch 'cohere-rerank'
2 parents 6476021 + 9009abe commit f0d67f1

File tree

6 files changed

+940
-20
lines changed

6 files changed

+940
-20
lines changed

env.example

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ RERANK_BINDING=null
102102
# RERANK_MODEL=rerank-v3.5
103103
# RERANK_BINDING_HOST=https://api.cohere.com/v2/rerank
104104
# RERANK_BINDING_API_KEY=your_rerank_api_key_here
105+
### Cohere rerank chunking configuration (useful for models with token limits like ColBERT)
106+
# RERANK_ENABLE_CHUNKING=true
107+
# RERANK_MAX_TOKENS_PER_DOC=480
105108

106109
### Default value for Jina AI
107110
# RERANK_MODEL=jina-reranker-v2-base-multilingual

examples/rerank_example.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@
1515
EMBEDDING_BINDING_HOST
1616
EMBEDDING_BINDING_API_KEY
1717
3. Set your vLLM deployed AI rerank model setting with env vars:
18-
RERANK_MODEL
19-
RERANK_BINDING_HOST
18+
RERANK_BINDING=cohere
19+
RERANK_MODEL (e.g., answerai-colbert-small-v1 or rerank-v3.5)
20+
RERANK_BINDING_HOST (e.g., https://api.cohere.com/v2/rerank or LiteLLM proxy)
2021
RERANK_BINDING_API_KEY
22+
RERANK_ENABLE_CHUNKING=true (optional, for models with token limits)
23+
RERANK_MAX_TOKENS_PER_DOC=480 (optional, default 4096)
2124
2225
Note: Rerank is controlled per query via the 'enable_rerank' parameter (default: True)
2326
"""
@@ -66,9 +69,11 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
6669

6770
rerank_model_func = partial(
6871
cohere_rerank,
69-
model=os.getenv("RERANK_MODEL"),
72+
model=os.getenv("RERANK_MODEL", "rerank-v3.5"),
7073
api_key=os.getenv("RERANK_BINDING_API_KEY"),
71-
base_url=os.getenv("RERANK_BINDING_HOST"),
74+
base_url=os.getenv("RERANK_BINDING_HOST", "https://api.cohere.com/v2/rerank"),
75+
enable_chunking=os.getenv("RERANK_ENABLE_CHUNKING", "false").lower() == "true",
76+
max_tokens_per_doc=int(os.getenv("RERANK_MAX_TOKENS_PER_DOC", "4096")),
7277
)
7378

7479

lightrag/api/lightrag_server.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,15 +1005,27 @@ async def server_rerank_func(
10051005
query: str, documents: list, top_n: int = None, extra_body: dict = None
10061006
):
10071007
"""Server rerank function with configuration from environment variables"""
1008-
return await selected_rerank_func(
1009-
query=query,
1010-
documents=documents,
1011-
top_n=top_n,
1012-
api_key=args.rerank_binding_api_key,
1013-
model=args.rerank_model,
1014-
base_url=args.rerank_binding_host,
1015-
extra_body=extra_body,
1016-
)
1008+
# Prepare kwargs for rerank function
1009+
kwargs = {
1010+
"query": query,
1011+
"documents": documents,
1012+
"top_n": top_n,
1013+
"api_key": args.rerank_binding_api_key,
1014+
"model": args.rerank_model,
1015+
"base_url": args.rerank_binding_host,
1016+
}
1017+
1018+
# Add Cohere-specific parameters if using cohere binding
1019+
if args.rerank_binding == "cohere":
1020+
# Enable chunking if configured (useful for models with token limits like ColBERT)
1021+
kwargs["enable_chunking"] = (
1022+
os.getenv("RERANK_ENABLE_CHUNKING", "false").lower() == "true"
1023+
)
1024+
kwargs["max_tokens_per_doc"] = int(
1025+
os.getenv("RERANK_MAX_TOKENS_PER_DOC", "4096")
1026+
)
1027+
1028+
return await selected_rerank_func(**kwargs, extra_body=extra_body)
10171029

10181030
rerank_model_func = server_rerank_func
10191031
logger.info(

0 commit comments

Comments
 (0)