Skip to content

Commit 0c9b971

Browse files
authored
🐛 use 512 tokens instead of 256 (#509)
# Description Recent updates are causing issues with running granite models @ context length 256. This updates the unit tests to default to 512 instead. --------- Signed-off-by: Joe Runde <[email protected]>
1 parent a8a857f commit 0c9b971

File tree

3 files changed

+4
-4
lines changed

3 files changed

+4
-4
lines changed

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def pytest_generate_tests(metafunc):
3131
# default parameterizations
3232
default_warmup_shape = [[(64, 20, 4)]]
3333
default_max_num_seqs = [4]
34-
default_max_model_len = [256]
34+
default_max_model_len = [512]
3535

3636
existing_markers = [
3737
marker.name if marker.name != "parametrize" else marker.args[0]

tests/e2e/test_spyre_basic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def test_batch_handling(model: ModelInfo, backend: str, cb: int, warmup_shapes,
137137
vllm_results = generate_spyre_vllm_output(
138138
model=model,
139139
prompts=prompts,
140-
max_model_len=256,
140+
max_model_len=max_model_len,
141141
sampling_params=vllm_sampling_params,
142142
tensor_parallel_size=1,
143143
backend=backend,

tests/llm_cache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def get_engine(
180180

181181
# 🌶️🌶️🌶️
182182
# Messing with the blocks and context length by either:
183-
# - setting context < 256 tokens
183+
# - setting context < 512 tokens
184184
# - setting available blocks != (context * batch size // 64)
185185
# can cause compilation failures on spyre hardware.
186186

@@ -195,7 +195,7 @@ def get_engine(
195195
engine_args = EngineArgs(
196196
model=model_name,
197197
tokenizer=model_name,
198-
max_model_len=max(max_model_len, 256),
198+
max_model_len=max(max_model_len, 512),
199199
max_num_seqs=max_num_seqs_compiled,
200200
num_gpu_blocks_override=None,
201201
revision=revision,

0 commit comments

Comments
 (0)