From 1c77415e5786b1adf466942276c29c65f541b99c Mon Sep 17 00:00:00 2001 From: michaelj Date: Wed, 15 Oct 2025 12:11:34 +0100 Subject: [PATCH 01/46] feat: support geospatial benchmark - Add backend arg to execute benchmark - Add execute_geospatial_benchmark function This uses the geospatial_valencia.jsonl dataset --- .../execute_benchmark.py | 73 ++++++++++++++++--- 1 file changed, 62 insertions(+), 11 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index af8e2c34..a65463f9 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -17,6 +17,7 @@ def execute_benchmark( base_url: str, model: str, data_set: str, + backend: str = "openai", interpreter: str = "python", num_prompts: int = 500, request_rate: int | None = None, @@ -68,21 +69,21 @@ def execute_benchmark( request += ( # changing from script invocation to cli invocation # f"{interpreter} {code} --backend openai --base-url {base_url} --dataset-name {data_set} " - f"vllm bench serve --backend openai --base-url {base_url} --dataset-name {data_set} " + f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {data_set} " f"--model {model} --seed 12345 --num-prompts {num_prompts!s} --save-result --metric-percentiles " f'"25,75,99" --percentile-metrics "ttft,tpot,itl,e2el" --result-dir . --result-filename {f_name} ' f"--burstiness {burstiness} " ) if data_set_path is not None: - request += f"--dataset-path {data_set_path} " + request += f" --dataset-path {data_set_path} " if request_rate is not None: - request += f"--request-rate {request_rate!s} " + request += f" --request-rate {request_rate!s} " if max_concurrency is not None: - request += f"--max-concurrency {max_concurrency!s}" + request += f" --max-concurrency {max_concurrency!s} " if custom_args is not None: for key, value in custom_args.items(): - request += f"{key} {value!s} " + request += f" {key} {value!s} " timeout = retries_timeout logger.debug(f"Command line: {request}") @@ -149,14 +150,64 @@ def execute_random_benchmark( ) +def execute_geospatial_benchmark( + base_url: str, + model: str, + num_prompts: int = 500, + request_rate: int | None = None, + max_concurrency: int | None = None, + hf_token: str | None = None, + benchmark_retries: int = 3, + retries_timeout: int = 5, + burstiness: float = 1, + interpreter: str = "python", +) -> dict[str, Any]: + """ + Execute benchmark with random dataset + :param base_url: url for vllm endpoint + :param model: model + :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"] + :param hf_token: huggingface token + :param benchmark_retries: number of benchmark execution retries + :param retries_timeout: timeout between initial retry + :param input_token_length: length of input tokens + :param output_token_length: length of output tokens + :return: results dictionary + """ + from importlib import resources + + data_set_path = resources.path( + "ado_actuators.vllm_performance", + "geospatial_valencia.jsonl", + ) + return execute_benchmark( + base_url=base_url, + backend="io-processor-plugin", + model=model, + data_set="custom", + interpreter=interpreter, + num_prompts=num_prompts, + request_rate=request_rate, + max_concurrency=max_concurrency, + hf_token=hf_token, + benchmark_retries=benchmark_retries, + retries_timeout=retries_timeout, + burstiness=burstiness, + custom_args={ + "--dataset-path": data_set_path, + "--endpoint": "/pooling", + "--skip-tokenizer-init": True, + }, + ) + + if __name__ == "__main__": - results = execute_benchmark( + results = execute_geospatial_benchmark( interpreter="python3.10", - base_url="http://localhost:28015", - data_set="random", - model="openai/gpt-oss-20b", - request_rate=None, - max_concurrency=None, + base_url="http://localhost:8000", + model="ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11", + request_rate=2, + max_concurrency=10, hf_token=os.getenv("HF_TOKEN"), num_prompts=100, ) From 52ccff47f116f9486f0c65214b12e3c116dd4706 Mon Sep 17 00:00:00 2001 From: michaelj Date: Wed, 15 Oct 2025 12:13:55 +0100 Subject: [PATCH 02/46] feat: add geospatial experiments endpoint and full --- .../vllm_performance/experiment_executor.py | 85 ++++-- .../vllm_performance/experiments.yaml | 266 ++++++++++++++++++ 2 files changed, 323 insertions(+), 28 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index 3da664a8..2e4f213c 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -22,6 +22,7 @@ VLLMDtype, ) from ado_actuators.vllm_performance.vllm_performance_test.execute_benchmark import ( + execute_geospatial_benchmark, execute_random_benchmark, ) from ray.actor import ActorHandle @@ -279,20 +280,34 @@ def run_resource_and_workload_experiment( start = time.time() result = None try: - result = execute_random_benchmark( - base_url=base_url, - model=values.get("model"), - interpreter=actuator_parameters.interpreter, - num_prompts=int(values.get("num_prompts")), - request_rate=request_rate, - max_concurrency=max_concurrency, - hf_token=actuator_parameters.hf_token, - benchmark_retries=actuator_parameters.benchmark_retries, - retries_timeout=actuator_parameters.retries_timeout, - number_input_tokens=int(values.get("number_input_tokens")), - max_output_tokens=int(values.get("max_output_tokens")), - burstiness=float(values.get("burstiness")), - ) + if experiment.identifier == "performance-testing-geospatial-full": + result = execute_geospatial_benchmark( + base_url=base_url, + model=values.get("model"), + interpreter=actuator_parameters.interpreter, + num_prompts=int(values.get("num_prompts")), + request_rate=request_rate, + max_concurrency=max_concurrency, + hf_token=actuator_parameters.hf_token, + benchmark_retries=actuator_parameters.benchmark_retries, + retries_timeout=actuator_parameters.retries_timeout, + burstiness=float(values.get("burstiness")), + ) + else: + result = execute_random_benchmark( + base_url=base_url, + model=values.get("model"), + interpreter=actuator_parameters.interpreter, + num_prompts=int(values.get("num_prompts")), + request_rate=request_rate, + max_concurrency=max_concurrency, + hf_token=actuator_parameters.hf_token, + benchmark_retries=actuator_parameters.benchmark_retries, + retries_timeout=actuator_parameters.retries_timeout, + number_input_tokens=int(values.get("number_input_tokens")), + max_output_tokens=int(values.get("max_output_tokens")), + burstiness=float(values.get("burstiness")), + ) logger.debug(f"benchmark executed in {time.time() - start} sec") except Exception as e: logger.error(f"Failed to execute VLLM performance test {e}") @@ -379,20 +394,34 @@ def run_workload_experiment( error = None measured_values = [] try: - result = execute_random_benchmark( - base_url=values.get("endpoint"), - model=values.get("model"), - interpreter=actuator_parameters.interpreter, - num_prompts=int(values.get("num_prompts")), - request_rate=request_rate, - max_concurrency=max_concurrency, - hf_token=actuator_parameters.hf_token, - benchmark_retries=actuator_parameters.benchmark_retries, - retries_timeout=actuator_parameters.retries_timeout, - number_input_tokens=int(values.get("number_input_tokens")), - max_output_tokens=int(values.get("max_output_tokens")), - burstiness=float(values.get("burstiness")), - ) + if experiment.identifier == "performance-testing-geospatial-endpoint": + result = execute_geospatial_benchmark( + base_url=values.get("endpoint"), + model=values.get("model"), + interpreter=actuator_parameters.interpreter, + num_prompts=int(values.get("num_prompts")), + request_rate=request_rate, + max_concurrency=max_concurrency, + hf_token=actuator_parameters.hf_token, + benchmark_retries=actuator_parameters.benchmark_retries, + retries_timeout=actuator_parameters.retries_timeout, + burstiness=float(values.get("burstiness")), + ) + else: + result = execute_random_benchmark( + base_url=values.get("endpoint"), + model=values.get("model"), + interpreter=actuator_parameters.interpreter, + num_prompts=int(values.get("num_prompts")), + request_rate=request_rate, + max_concurrency=max_concurrency, + hf_token=actuator_parameters.hf_token, + benchmark_retries=actuator_parameters.benchmark_retries, + retries_timeout=actuator_parameters.retries_timeout, + number_input_tokens=int(values.get("number_input_tokens")), + max_output_tokens=int(values.get("max_output_tokens")), + burstiness=float(values.get("burstiness")), + ) except Exception as e: logger.error(f"Failed to execute VLLM performance test {e}") error = f"Failed to execute VLLM performance test {e}" diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml index 1d03b13a..40aa9777 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml @@ -319,3 +319,269 @@ performance_testing-endpoint: - identifier: "p99_e2el_ms" metadata: description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations' +performance_testing-geospatial-endpoint: + identifier: performance-testing-geospatial-endpoint + actuatorIdentifier: "vllm_performance" + requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values + - identifier: 'model' + metadata: + description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"] + - identifier: 'endpoint' + metadata: + description: 'The endpoint(s) to test' + propertyDomain: + variableType: "UNKNOWN_VARIABLE_TYPE" + - identifier: 'request_rate' + metadata: + description: "The number of requests to send per second" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [-1,1000] + interval: 1 # -1 means send all requests at time 0 + optionalProperties: + - identifier: 'num_prompts' + metadata: + description: "The number of prompts to send (total number of requests)" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1,10001] + interval: 1 + - identifier: 'burstiness' + metadata: + description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ 0, 10 ] + interval: 1 + - identifier: 'max_concurrency' + metadata: + description: "The maximum number of concurrent requests to send" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ -1, 500 ] # -1 means no concurrency control + interval: 1 + defaultParameterization: + - value: 100 + property: + identifier: 'num_prompts' + - value: -1 + property: + identifier: 'max_concurrency' + - value: 1.0 + property: + identifier: 'burstiness' + # measurements + targetProperties: + - identifier: "duration" + - identifier: "completed" + - identifier: "total_input_tokens" + - identifier: "total_output_tokens" + - identifier: "request_throughput" + - identifier: "output_throughput" + - identifier: "total_token_throughput" + - identifier: "mean_ttft_ms" + - identifier: "median_ttft_ms" + - identifier: "std_ttft_ms" + - identifier: "p25_ttft_ms" + - identifier: "p50_ttft_ms" + - identifier: "p75_ttft_ms" + - identifier: "p99_ttft_ms" + - identifier: "mean_tpot_ms" + - identifier: "median_tpot_ms" + - identifier: "std_tpot_ms" + - identifier: "p25_tpot_ms" + - identifier: "p50_tpot_ms" + - identifier: "p75_tpot_ms" + - identifier: "p99_tpot_ms" + - identifier: "mean_itl_ms" + - identifier: "median_itl_ms" + - identifier: "std_itl_ms" + - identifier: "p25_itl_ms" + - identifier: "p50_itl_ms" + - identifier: "p75_itl_ms" + - identifier: "p99_itl_ms" + - identifier: "mean_e2el_ms" + - identifier: "median_e2el_ms" + - identifier: "std_e2el_ms" + - identifier: "p25_e2el_ms" + - identifier: "p50_e2el_ms" + - identifier: "p75_e2el_ms" + - identifier: "p99_e2el_ms" + metadata: + description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations' +performance_testing-geospatial-full: + identifier: performance-testing-geospatial-full + actuatorIdentifier: "vllm_performance" + requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values + - identifier: 'model' + metadata: + description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ] + - identifier: 'request_rate' + metadata: + description: "(benchmark) The number of requests to send per second" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [-1,1000] + interval: 1 # -1 means send all requests at time 0 + optionalProperties: + - identifier: 'num_prompts' + metadata: + description: "(benchmark) The number of prompts to send (total number of requests)" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1,10001] + interval: 1 + - identifier: 'max_concurrency' + metadata: + description: "(benchmark) The maximum number of concurrent requests to send" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ -1, 500 ] # -1 means no concurrency control + interval: 1 + - identifier: 'burstiness' + metadata: + description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ 0, 10 ] + interval: 1 + - identifier: image + metadata: + description: "(deployment) Docker image to use to create vllm deployments" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ] + - identifier: n_cpus + metadata: + description: "(deployment) the number of CPUs to use" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ 1,17 ] + interval: 1 + - identifier: memory + metadata: + description: "(deployment) the amount of memory to allocate to vLLM pod" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ "64Gi", "128Gi", "256Gi" ] + - identifier: dtype + metadata: + description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models." + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ "auto", "half", "float16", "bfloat16", "float", "float32" ] + - identifier: 'gpu_memory_utilization' + metadata: + description: "(deployment) The fraction of GPU memory to be used for the model executor," + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + values: [ .5, .75, .9 ] + - identifier: 'cpu_offload' + metadata: + description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU," + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + values: [ 0, 8, 16, 24, 32 ] + - identifier: 'max_num_seq' + metadata: + description: "(deployment) Maximum number of sequences per iteration" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [32,2049] + interval: 32 + - identifier: 'n_gpus' + metadata: + description: "(deployment) Number of GPUs to use" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1,9] + interval: 1 + - identifier: 'gpu_type' + metadata: + description: "(deployment) The GPU type to use" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ] + defaultParameterization: + - property: + identifier: 'image' + value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" + - property: + identifier: n_cpus + value: 8 + - property: + identifier: + memory + value: "128Gi" + - property: + identifier: dtype + value: "auto" + - property: + identifier: 'num_prompts' + value: 500 + - property: + identifier: 'max_concurrency' + value: -1 + - property: + identifier: 'burstiness' + value: 1.0 + - property: + identifier: 'gpu_memory_utilization' + value: .9 + - property: + identifier: 'cpu_offload' + value: 0 + - property: + identifier: 'max_num_seq' + value: 256 + - property: + identifier: 'n_gpus' + value: 1 + - property: + identifier: 'gpu_type' + value: 'NVIDIA-A100-80GB-PCIe' + # measurements + targetProperties: + - identifier: "duration" + - identifier: "completed" + - identifier: "total_input_tokens" + - identifier: "total_output_tokens" + - identifier: "request_throughput" + - identifier: "output_throughput" + - identifier: "total_token_throughput" + - identifier: "mean_ttft_ms" + - identifier: "median_ttft_ms" + - identifier: "std_ttft_ms" + - identifier: "p25_ttft_ms" + - identifier: "p50_ttft_ms" + - identifier: "p75_ttft_ms" + - identifier: "p99_ttft_ms" + - identifier: "mean_tpot_ms" + - identifier: "median_tpot_ms" + - identifier: "std_tpot_ms" + - identifier: "p25_tpot_ms" + - identifier: "p50_tpot_ms" + - identifier: "p75_tpot_ms" + - identifier: "p99_tpot_ms" + - identifier: "mean_itl_ms" + - identifier: "median_itl_ms" + - identifier: "std_itl_ms" + - identifier: "p25_itl_ms" + - identifier: "p50_itl_ms" + - identifier: "p75_itl_ms" + - identifier: "p99_itl_ms" + - identifier: "mean_e2el_ms" + - identifier: "median_e2el_ms" + - identifier: "std_e2el_ms" + - identifier: "p25_e2el_ms" + - identifier: "p50_e2el_ms" + - identifier: "p75_e2el_ms" + - identifier: "p99_e2el_ms" + metadata: + description: 'VLLM performance testing across compute resource and workload configuration' \ No newline at end of file From 98591b581dcdfb69b555ecfde48fec9fa4bf36d1 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Wed, 15 Oct 2025 20:49:41 +0100 Subject: [PATCH 03/46] various fixes to the vllm_performance actuator Signed-off-by: Christian Pinto --- .../ado_actuators/vllm_performance/actuator.py | 2 +- .../vllm_performance/experiment_executor.py | 4 +++- .../vllm_performance/experiments.yaml | 14 ++++++++++++-- .../k8/yaml_support/build_components.py | 6 +++++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py index a9e5dc30..a4fea988 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py @@ -176,7 +176,7 @@ async def submit( if experiment.deprecated is True: raise DeprecatedExperimentError(f"Experiment {experiment} is deprecated") - if experiment.identifier == "performance-testing-full": + if experiment.identifier in ["performance-testing-full", "performance-testing-geospatial-full"]: if not self.env_manager: raise MissingConfigurationForExperimentError( f"Actuator configuration did not contain sufficient information for a kubernetes environment manager to be created. " diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index 2e4f213c..f3594150 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -7,6 +7,7 @@ import subprocess import sys import time +import traceback import ray from ado_actuators.vllm_performance.actuator_parameters import ( @@ -152,6 +153,7 @@ def _create_environment( logger.error( f"Attempt {attempt}. Failed to create test environment {e}" ) + logger.error(traceback.format_exception(e)) error = f"Failed to create test environment {e}" time.sleep(tmout) tmout *= 2 @@ -310,7 +312,7 @@ def run_resource_and_workload_experiment( ) logger.debug(f"benchmark executed in {time.time() - start} sec") except Exception as e: - logger.error(f"Failed to execute VLLM performance test {e}") + logger.error(traceback.format_exception(e)) error = f"Failed to execute VLLM performance test {e}" finally: if pf is not None: diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml index 40aa9777..0c1859ca 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml @@ -420,7 +420,7 @@ performance_testing-geospatial-full: metadata: description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' propertyDomain: - variableType: "CATEGORICAL_VARIABLE_TYPE" + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ] - identifier: 'request_rate' metadata: @@ -455,7 +455,7 @@ performance_testing-geospatial-full: metadata: description: "(deployment) Docker image to use to create vllm deployments" propertyDomain: - variableType: "CATEGORICAL_VARIABLE_TYPE" + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ] - identifier: n_cpus metadata: @@ -495,6 +495,13 @@ performance_testing-geospatial-full: variableType: 'DISCRETE_VARIABLE_TYPE' domainRange: [32,2049] interval: 32 + - identifier: 'max_batch_tokens' + metadata: + description: "(deployment) maximum number of batched tokens per iteration" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ 8192, 32769] + interval: 1024 - identifier: 'n_gpus' metadata: description: "(deployment) Number of GPUs to use" @@ -540,6 +547,9 @@ performance_testing-geospatial-full: - property: identifier: 'max_num_seq' value: 256 + - property: + identifier: 'max_batch_tokens' + value: 16384 - property: identifier: 'n_gpus' value: 1 diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py index 1e2355a1..511a2ea6 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py @@ -41,7 +41,11 @@ def get_k8_name(model: str) -> str: :return: k8 unique name for a given LLM model """ m_parts = model.split("/") - return f"vllm-{m_parts[-1].lower()}-{uuid.uuid4().hex}".replace(".", "-") + + # Making sure the resulting name is not longer than 63 characters as it is + # the maximum allowed for a name in kubernetes. + name_prefix = m_parts[-1][:min(len(m_parts[-1]), 21)].rstrip("-") + return f"vllm-{name_prefix.lower()}-{uuid.uuid4()}".replace(".", "-") @staticmethod def _adjust_file_name(f: str) -> str: From bd62781809e02f3b540c6c778f82e167334e0cd3 Mon Sep 17 00:00:00 2001 From: michaelj Date: Wed, 15 Oct 2025 22:48:32 +0100 Subject: [PATCH 04/46] fix: add max_batch_tokens --- .../ado_actuators/vllm_performance/experiments.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml index 40aa9777..cd67986a 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml @@ -488,6 +488,13 @@ performance_testing-geospatial-full: propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' values: [ 0, 8, 16, 24, 32 ] + - identifier: 'max_batch_tokens' + metadata: + description: "(deployment) maximum size of the sum of the 1st image dimensions per iteration" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ 1024, 32769 ] + interval: 1024 - identifier: 'max_num_seq' metadata: description: "(deployment) Maximum number of sequences per iteration" @@ -546,6 +553,9 @@ performance_testing-geospatial-full: - property: identifier: 'gpu_type' value: 'NVIDIA-A100-80GB-PCIe' + - property: + identifier: 'max_batch_tokens' + value: 16384 # measurements targetProperties: - identifier: "duration" From c1dec4aab0e51534e35cf2da7db4285f38d0b5fe Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Tue, 28 Oct 2025 15:49:18 +0000 Subject: [PATCH 05/46] Updated vllm performance actuator to support geospatial Signed-off-by: Christian Pinto --- .../vllm_performance/experiment_executor.py | 3 + .../vllm_performance/experiments.yaml | 55 +++++++++++++++++- .../vllm_performance/k8/create_environment.py | 11 ++-- .../vllm_performance/k8/manage_components.py | 6 ++ .../k8/yaml_support/build_components.py | 56 +++++++++++++++---- .../execute_benchmark.py | 42 +++++++------- 6 files changed, 135 insertions(+), 38 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index f3594150..9be6921a 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -144,6 +144,9 @@ def _create_environment( reuse_deployment=False, pvc_name=actuator.pvc_template, namespace=actuator.namespace, + skip_tokenizer_init=values.get("skip_tokenizer_init"), + enforce_eager=values.get("enforce_eager"), + io_processor_plugin=values.get("io_processor_plugin") ) # Update manager env_manager.done_creating.remote(definition=definition) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml index 072a936d..69d3460d 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml @@ -120,6 +120,24 @@ performance_testing-full: propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ] + - identifier: 'skip_tokenizer_init' + metadata: + description: "(deployment) skip tokenizer intialization" + propertyDomain: + variableType: BINARY_VARIABLE_TYPE + values: [True, False] + - identifier: 'enforce_eager' + metadata: + description: "(deployment) enforce pytorch eager mode" + propertyDomain: + variableType: BINARY_VARIABLE_TYPE + values: [True, False] + - identifier: 'io_processor_plugin' + metadata: + description: 'IO Pocessor plugin to load for the model' + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: [ None, "terratorch_segmentation" ] defaultParameterization: - property: identifier: 'image' @@ -167,6 +185,15 @@ performance_testing-full: - property: identifier: 'gpu_type' value: 'NVIDIA-A100-80GB-PCIe' + - property: + identifier: 'skip_tokenizer_init' + value: False + - property: + identifier: 'enforce_eager' + value: False + - property: + identifier: 'io_processor_plugin' + value: None # measurements targetProperties: - identifier: "duration" @@ -522,6 +549,24 @@ performance_testing-geospatial-full: propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ] + - identifier: 'skip_tokenizer_init' + metadata: + description: "(deployment) skip tokenizer intialization" + propertyDomain: + variableType: BINARY_VARIABLE_TYPE + values: [True, False] + - identifier: 'enforce_eager' + metadata: + description: "(deployment) enforce pytorch eager mode" + propertyDomain: + variableType: BINARY_VARIABLE_TYPE + values: [True, False] + - identifier: 'io_processor_plugin' + metadata: + description: 'IO Pocessor plugin to load for the model' + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: [ None, "terratorch_segmentation" ] defaultParameterization: - property: identifier: 'image' @@ -564,8 +609,14 @@ performance_testing-geospatial-full: identifier: 'gpu_type' value: 'NVIDIA-A100-80GB-PCIe' - property: - identifier: 'max_batch_tokens' - value: 16384 + identifier: 'skip_tokenizer_init' + value: True + - property: + identifier: 'enforce_eager' + value: True + - property: + identifier: 'io_processor_plugin' + value: "terratorch_segmentation" # measurements targetProperties: - identifier: "duration" diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py index 87ee719d..3f0a0809 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py @@ -40,6 +40,9 @@ def create_test_environment( reuse_pvc: bool = True, pvc_name: str = "vllm-support", namespace: str = "vllm-testing", + enforce_eager: bool = False, + skip_tokenizer_init: bool = False, + io_processor_plugin: str | None = None ) -> None: """ Create test deployment @@ -113,15 +116,13 @@ def create_test_environment( n_gpus=n_gpus, n_cpus=n_cpus, memory=memory, - max_batch_tokens=max_batch_tokens, - gpu_memory_utilization=gpu_memory_utilization, - dtype=dtype, - cpu_offload=cpu_offload, - max_num_seq=max_num_seq, template=deployment_template, claim_name=pvc_name, hf_token=hf_token, reuse=reuse_deployment, + enforce_eager=enforce_eager, + skip_tokenizer_init=skip_tokenizer_init, + io_processor_plugin=io_processor_plugin ) logger.debug("deployment created") c_manager.wait_deployment_ready(k8_name=k8_name) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py index dfef4725..cd77a444 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py @@ -231,6 +231,9 @@ def create_deployment( claim_name: str | None = None, hf_token: str | None = None, reuse: bool = False, + enforce_eager: bool = False, + skip_tokenizer_init: bool = False, + io_processor_plugin: str | None = None ) -> None: """ create deployment for model @@ -293,6 +296,9 @@ def create_deployment( template=template, claim_name=claim_name, hf_token=hf_token, + enforce_eager=enforce_eager, + skip_tokenizer_init=skip_tokenizer_init, + io_processor_plugin=io_processor_plugin, ), ) except ApiException as e: diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py index 511a2ea6..b4069ace 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py @@ -77,6 +77,9 @@ def deployment_yaml( template: str = "deployment.yaml", claim_name: str | None = None, hf_token: str | None = None, + enforce_eager: bool = False, + skip_tokenizer_init: bool = False, + io_processor_plugin: str | None = None, ) -> dict[str, Any]: """ Generate deployment yaml @@ -138,6 +141,30 @@ def deployment_yaml( [{"name": PVC_NAME, "persistentVolumeClaim": {"claimName": claim_name}}] ) + vllm_serve_args = [ + model, + "--max-num-batched-tokens", + f"{max_batch_tokens}", + "--gpu-memory-utilization", + f"{gpu_memory_utilization}", + "--cpu-offload-gb", + f"{cpu_offload}", + "--max-num-seq", + f"{max_num_seq}", + "--tensor-parallel-size", + f"{n_gpus}", + "--dtype", + dtype.value, + ] + + if enforce_eager: + vllm_serve_args.append("--skip-tokenizer-init") + if skip_tokenizer_init: + vllm_serve_args.append("--enforce-eager") + if io_processor_plugin: + vllm_serve_args.append("--io-processor-plugin") + vllm_serve_args.append(io_processor_plugin) + # container container = spec["containers"][0] # image @@ -151,19 +178,25 @@ def deployment_yaml( limits["cpu"] = str(n_cpus) limits["memory"] = memory limits["nvidia.com/gpu"] = str(n_gpus) + + #command + container["command"] = ["vllm", "serve"] + container["args"] = vllm_serve_args # env variables to to set parameters for docker execution - container["env"] = [ - {"name": "MODEL", "value": model}, - {"name": "GPU_MEMORY_UTILIZATION", "value": str(gpu_memory_utilization)}, - {"name": "DTYPE", "value": dtype.value}, - {"name": "CPU_OFFLOAD_GB", "value": str(cpu_offload)}, - {"name": "MAX_NUM_BATCHED_TOKENS", "value": str(max_batch_tokens)}, - {"name": "MAX_NUM_SEQ", "value": str(max_num_seq)}, - {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)}, - ] + # container["env"] = [ + # {"name": "MODEL", "value": model}, + # {"name": "GPU_MEMORY_UTILIZATION", "value": str(gpu_memory_utilization)}, + # {"name": "DTYPE", "value": dtype.value}, + # {"name": "CPU_OFFLOAD_GB", "value": str(cpu_offload)}, + # {"name": "MAX_NUM_BATCHED_TOKENS", "value": str(max_batch_tokens)}, + # {"name": "MAX_NUM_SEQ", "value": str(max_num_seq)}, + # {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)}, + # ] if hf_token is not None: - container["env"].extend([{"name": "HF_TOKEN", "value": hf_token}]) + container["env"]=[{"name": "HF_TOKEN", "value": hf_token}] if claim_name is not None: + if "env" not in container: + container["env"] = [] container["env"].extend( [ { @@ -181,6 +214,9 @@ def deployment_yaml( ) # return + + import json + print(json.dumps(deployment_yaml, indent=2)) return deployment_yaml @staticmethod diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index a65463f9..952cb26a 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -176,29 +176,29 @@ def execute_geospatial_benchmark( """ from importlib import resources - data_set_path = resources.path( + with resources.path( "ado_actuators.vllm_performance", "geospatial_valencia.jsonl", - ) - return execute_benchmark( - base_url=base_url, - backend="io-processor-plugin", - model=model, - data_set="custom", - interpreter=interpreter, - num_prompts=num_prompts, - request_rate=request_rate, - max_concurrency=max_concurrency, - hf_token=hf_token, - benchmark_retries=benchmark_retries, - retries_timeout=retries_timeout, - burstiness=burstiness, - custom_args={ - "--dataset-path": data_set_path, - "--endpoint": "/pooling", - "--skip-tokenizer-init": True, - }, - ) + ) as data_set_path: + return execute_benchmark( + base_url=base_url, + backend="io-processor-plugin", + model=model, + data_set="custom", + interpreter=interpreter, + num_prompts=num_prompts, + request_rate=request_rate, + max_concurrency=max_concurrency, + hf_token=hf_token, + benchmark_retries=benchmark_retries, + retries_timeout=retries_timeout, + burstiness=burstiness, + custom_args={ + "--dataset-path": data_set_path, + "--endpoint": "/pooling", + "--skip-tokenizer-init": True, + }, + ) if __name__ == "__main__": From 2e030282585b6baacc43c06ee0ac98803f9dbc45 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Thu, 30 Oct 2025 09:32:29 +0000 Subject: [PATCH 06/46] Termorarily avoiding cpu14 Signed-off-by: Christian Pinto --- .../vllm_performance/k8/yaml_support/deployment.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml index 2b90302a..25851982 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml @@ -27,6 +27,15 @@ spec: - name: http containerPort: 8000 protocol: TCP + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - operator: NotIn + key: kubernetes.io/hostname + values: + - adcpu014 # funny node startupProbe: exec: command: From 80c68c4c0c0113ffbe764f72bff7a60d527c2d9d Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Thu, 30 Oct 2025 09:42:58 +0000 Subject: [PATCH 07/46] Termorarily avoiding cpu14 Signed-off-by: Christian Pinto --- .../k8/yaml_support/deployment.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml index 25851982..bc9c10a6 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml @@ -19,6 +19,15 @@ spec: app.kubernetes.io/name: vllm app.kubernetes.io/instance: vllm-testing spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - operator: NotIn + key: kubernetes.io/hostname + values: + - adcpu014 # funny node containers: - name: vllm image: "vllm/vllm-openai:v0.6.3" @@ -27,15 +36,6 @@ spec: - name: http containerPort: 8000 protocol: TCP - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - operator: NotIn - key: kubernetes.io/hostname - values: - - adcpu014 # funny node startupProbe: exec: command: From 592e17940ffae7971936b268678944bdcf8c9107 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Thu, 30 Oct 2025 11:55:19 +0000 Subject: [PATCH 08/46] Added india dataset Signed-off-by: Christian Pinto --- .../ado_actuators/vllm_performance/geospatial_india.jsonl | 1 + .../vllm_performance/vllm_performance_test/execute_benchmark.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl new file mode 100644 index 00000000..693bbc09 --- /dev/null +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl @@ -0,0 +1 @@ +{"prompt":{"data": {"data": "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif","data_format": "url","out_data_format": "b64_json","indices": [1, 2, 3, 8, 11, 12]},"priority": 0,"softmax": false}} diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index 952cb26a..cf3ed96a 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -178,7 +178,7 @@ def execute_geospatial_benchmark( with resources.path( "ado_actuators.vllm_performance", - "geospatial_valencia.jsonl", + "geospatial_india.jsonl", ) as data_set_path: return execute_benchmark( base_url=base_url, From 94c7490b34edf6d59918087a431f0683ee2b3b87 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Thu, 30 Oct 2025 14:45:22 +0000 Subject: [PATCH 09/46] Fixed BaseSamplerConfig Signed-off-by: Christian Pinto --- orchestrator/modules/operators/randomwalk.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/orchestrator/modules/operators/randomwalk.py b/orchestrator/modules/operators/randomwalk.py index 1018c17a..aa050494 100644 --- a/orchestrator/modules/operators/randomwalk.py +++ b/orchestrator/modules/operators/randomwalk.py @@ -207,8 +207,11 @@ def sampler(self) -> BaseSampler | GroupSampler: sampler = SequentialSampleSelector() case CombinedWalkModeEnum.RANDOMGROUPED: sampler = RandomGroupSampleSelector(group=self.grouping) + sampler = RandomGroupSampleSelector(group=self.grouping) case CombinedWalkModeEnum.SEQUENTIALGROUPED: - sampler = SequentialGroupSampleSelector(group=self.grouping) + sampler = SequentialGroupSampleSelector( + group=self.grouping + ) case _: # this can never happen, as we are validating this above pass @@ -218,10 +221,12 @@ def sampler(self) -> BaseSampler | GroupSampler: case CombinedWalkModeEnum.RANDOMGROUPED: sampler = ExplicitEntitySpaceGroupedGridSampleGenerator( mode=WalkModeEnum.RANDOM, group=self.grouping + mode=WalkModeEnum.RANDOM, group=self.grouping ) case CombinedWalkModeEnum.SEQUENTIALGROUPED: sampler = ExplicitEntitySpaceGroupedGridSampleGenerator( mode=WalkModeEnum.SEQUENTIAL, group=self.grouping + mode=WalkModeEnum.SEQUENTIAL, group=self.grouping ) case CombinedWalkModeEnum.RANDOM: sampler = ExplicitEntitySpaceGridSampleGenerator( From 3fd83b83cb6988031918a22d835bcd3301255e9d Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Thu, 30 Oct 2025 15:48:05 +0000 Subject: [PATCH 10/46] Some changes to the vllmperformance experiments Signed-off-by: Christian Pinto --- .../ado_actuators/vllm_performance/experiments.yaml | 9 --------- .../k8/yaml_support/build_components.py | 10 ++++------ 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml index 69d3460d..53ee0c33 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml @@ -132,12 +132,6 @@ performance_testing-full: propertyDomain: variableType: BINARY_VARIABLE_TYPE values: [True, False] - - identifier: 'io_processor_plugin' - metadata: - description: 'IO Pocessor plugin to load for the model' - propertyDomain: - variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: [ None, "terratorch_segmentation" ] defaultParameterization: - property: identifier: 'image' @@ -191,9 +185,6 @@ performance_testing-full: - property: identifier: 'enforce_eager' value: False - - property: - identifier: 'io_processor_plugin' - value: None # measurements targetProperties: - identifier: "duration" diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py index b4069ace..a3277a08 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py @@ -6,6 +6,7 @@ import sys import uuid from enum import Enum +import json from typing import Any import yaml @@ -158,9 +159,9 @@ def deployment_yaml( ] if enforce_eager: - vllm_serve_args.append("--skip-tokenizer-init") - if skip_tokenizer_init: vllm_serve_args.append("--enforce-eager") + if skip_tokenizer_init: + vllm_serve_args.append("--skip-tokenizer-init") if io_processor_plugin: vllm_serve_args.append("--io-processor-plugin") vllm_serve_args.append(io_processor_plugin) @@ -213,10 +214,7 @@ def deployment_yaml( ] ) - # return - - import json - print(json.dumps(deployment_yaml, indent=2)) + logger.debug(json.dumps(deployment_yaml, indent=2)) return deployment_yaml @staticmethod From 90ae6bbbbcc96df29ee4b89c45d48cde4aa4efde Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Fri, 31 Oct 2025 09:23:01 +0000 Subject: [PATCH 11/46] Some changes to changes to the experiment and reverted the deployment template Signed-off-by: Christian Pinto --- .../ado_actuators/vllm_performance/actuator.py | 2 +- .../vllm_performance/k8/yaml_support/build_components.py | 2 +- .../vllm_performance/k8/yaml_support/deployment.yaml | 9 --------- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py index a4fea988..2e522bf6 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py @@ -197,7 +197,7 @@ async def submit( ) # Execute experiment - # Note: Here the experiment instance is just past for convenience since we retrieved it above + # Note: Here the experiment instance is just passed for convenience since we retrieved it above run_resource_and_workload_experiment.remote( request=request, experiment=experiment, diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py index a3277a08..0abcc8c9 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py @@ -162,7 +162,7 @@ def deployment_yaml( vllm_serve_args.append("--enforce-eager") if skip_tokenizer_init: vllm_serve_args.append("--skip-tokenizer-init") - if io_processor_plugin: + if io_processor_plugin is not None: vllm_serve_args.append("--io-processor-plugin") vllm_serve_args.append(io_processor_plugin) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml index bc9c10a6..2b90302a 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml @@ -19,15 +19,6 @@ spec: app.kubernetes.io/name: vllm app.kubernetes.io/instance: vllm-testing spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - operator: NotIn - key: kubernetes.io/hostname - values: - - adcpu014 # funny node containers: - name: vllm image: "vllm/vllm-openai:v0.6.3" From a7509754d612e0e0063ddd3c67c66a2a01e212cc Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Fri, 31 Oct 2025 09:28:36 +0000 Subject: [PATCH 12/46] Removed some clutter from deployment template Signed-off-by: Christian Pinto --- .../vllm_performance/k8/yaml_support/deployment.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml index 2b90302a..2659550d 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml @@ -68,6 +68,4 @@ spec: emptyDir: medium: Memory nodeSelector: - nvidia.com/gpu.product: NVIDIA-A100-80GB-PCIe - #nvidia.com/gpu.product: Tesla-V100-PCIE-16GB - #kubernetes.io/hostname: cpu15 \ No newline at end of file + nvidia.com/gpu.product: NVIDIA-A100-80GB-PCIe \ No newline at end of file From c432cff57b38f6dc2e77b7a7210ffe928fe7b2e2 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Fri, 31 Oct 2025 11:05:03 +0000 Subject: [PATCH 13/46] Few more fixes Signed-off-by: Christian Pinto --- orchestrator/modules/operators/randomwalk.py | 3 - .../vllm_performance/actuator.py | 26 +- .../performance_testing.yaml} | 312 +----------------- .../performance_testing_geospatial.yaml | 308 +++++++++++++++++ 4 files changed, 328 insertions(+), 321 deletions(-) rename plugins/actuators/vllm_performance/ado_actuators/vllm_performance/{experiments.yaml => experiments/performance_testing.yaml} (51%) create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml diff --git a/orchestrator/modules/operators/randomwalk.py b/orchestrator/modules/operators/randomwalk.py index aa050494..eff9a6f2 100644 --- a/orchestrator/modules/operators/randomwalk.py +++ b/orchestrator/modules/operators/randomwalk.py @@ -207,7 +207,6 @@ def sampler(self) -> BaseSampler | GroupSampler: sampler = SequentialSampleSelector() case CombinedWalkModeEnum.RANDOMGROUPED: sampler = RandomGroupSampleSelector(group=self.grouping) - sampler = RandomGroupSampleSelector(group=self.grouping) case CombinedWalkModeEnum.SEQUENTIALGROUPED: sampler = SequentialGroupSampleSelector( group=self.grouping @@ -221,12 +220,10 @@ def sampler(self) -> BaseSampler | GroupSampler: case CombinedWalkModeEnum.RANDOMGROUPED: sampler = ExplicitEntitySpaceGroupedGridSampleGenerator( mode=WalkModeEnum.RANDOM, group=self.grouping - mode=WalkModeEnum.RANDOM, group=self.grouping ) case CombinedWalkModeEnum.SEQUENTIALGROUPED: sampler = ExplicitEntitySpaceGroupedGridSampleGenerator( mode=WalkModeEnum.SEQUENTIAL, group=self.grouping - mode=WalkModeEnum.SEQUENTIAL, group=self.grouping ) case CombinedWalkModeEnum.RANDOM: sampler = ExplicitEntitySpaceGridSampleGenerator( diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py index 2e522bf6..88d4e67c 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py @@ -54,14 +54,26 @@ def catalog( ) -> ExperimentCatalog: """Returns the Experiments your actuator provides""" - # The catalog be formed in code here or read from a file containing the Experiments models - # This shows reading from a file - + # Loading experiment definitions for yaml files contained in the `experiments` directory. + # NOTE: Only files can be placed in the experiments directory, + # but each file can contain multiple experiment definitions path = os.path.abspath(__file__) - path = os.path.split(path)[0] - with open(os.path.join(path, "experiments.yaml")) as f: - data = yaml.safe_load(f) - experiments = [Experiment(**data[e]) for e in data] + exp_dir = os.path.join(os.path.split(path)[0], "experiments") + experiments = [] + for exp_file in os.listdir(exp_dir): + logger.debug(f"Loading experiments from {exp_file}") + exp_file_path = os.path.join(exp_dir, exp_file) + if os.path.isdir(exp_file_path): + logger.error(f"{exp_file_path} is a directory. Only files are supported in the experiments directory") + raise Exception(f"{exp_file_path} is a directory. Only files are supported in the experiments directory") + with open(exp_file_path) as f: + try: + data = yaml.safe_load(f) + except yaml.YAMLError as e: + logger.error(f"File {exp_file} is a malformed YAML - {e}") + raise Exception (f"File {exp_file} is a malformed YAML - {e}") + + experiments.extend([Experiment(**data[e]) for e in data]) return ExperimentCatalog( catalogIdentifier=cls.identifier, diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml similarity index 51% rename from plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml rename to plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml index 53ee0c33..216d6ae4 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml @@ -336,314 +336,4 @@ performance_testing-endpoint: - identifier: "p75_e2el_ms" - identifier: "p99_e2el_ms" metadata: - description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations' -performance_testing-geospatial-endpoint: - identifier: performance-testing-geospatial-endpoint - actuatorIdentifier: "vllm_performance" - requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values - - identifier: 'model' - metadata: - description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' - propertyDomain: - variableType: "CATEGORICAL_VARIABLE_TYPE" - values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"] - - identifier: 'endpoint' - metadata: - description: 'The endpoint(s) to test' - propertyDomain: - variableType: "UNKNOWN_VARIABLE_TYPE" - - identifier: 'request_rate' - metadata: - description: "The number of requests to send per second" - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [-1,1000] - interval: 1 # -1 means send all requests at time 0 - optionalProperties: - - identifier: 'num_prompts' - metadata: - description: "The number of prompts to send (total number of requests)" - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [1,10001] - interval: 1 - - identifier: 'burstiness' - metadata: - description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness." - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ 0, 10 ] - interval: 1 - - identifier: 'max_concurrency' - metadata: - description: "The maximum number of concurrent requests to send" - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ -1, 500 ] # -1 means no concurrency control - interval: 1 - defaultParameterization: - - value: 100 - property: - identifier: 'num_prompts' - - value: -1 - property: - identifier: 'max_concurrency' - - value: 1.0 - property: - identifier: 'burstiness' - # measurements - targetProperties: - - identifier: "duration" - - identifier: "completed" - - identifier: "total_input_tokens" - - identifier: "total_output_tokens" - - identifier: "request_throughput" - - identifier: "output_throughput" - - identifier: "total_token_throughput" - - identifier: "mean_ttft_ms" - - identifier: "median_ttft_ms" - - identifier: "std_ttft_ms" - - identifier: "p25_ttft_ms" - - identifier: "p50_ttft_ms" - - identifier: "p75_ttft_ms" - - identifier: "p99_ttft_ms" - - identifier: "mean_tpot_ms" - - identifier: "median_tpot_ms" - - identifier: "std_tpot_ms" - - identifier: "p25_tpot_ms" - - identifier: "p50_tpot_ms" - - identifier: "p75_tpot_ms" - - identifier: "p99_tpot_ms" - - identifier: "mean_itl_ms" - - identifier: "median_itl_ms" - - identifier: "std_itl_ms" - - identifier: "p25_itl_ms" - - identifier: "p50_itl_ms" - - identifier: "p75_itl_ms" - - identifier: "p99_itl_ms" - - identifier: "mean_e2el_ms" - - identifier: "median_e2el_ms" - - identifier: "std_e2el_ms" - - identifier: "p25_e2el_ms" - - identifier: "p50_e2el_ms" - - identifier: "p75_e2el_ms" - - identifier: "p99_e2el_ms" - metadata: - description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations' -performance_testing-geospatial-full: - identifier: performance-testing-geospatial-full - actuatorIdentifier: "vllm_performance" - requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values - - identifier: 'model' - metadata: - description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' - propertyDomain: - variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ] - - identifier: 'request_rate' - metadata: - description: "(benchmark) The number of requests to send per second" - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [-1,1000] - interval: 1 # -1 means send all requests at time 0 - optionalProperties: - - identifier: 'num_prompts' - metadata: - description: "(benchmark) The number of prompts to send (total number of requests)" - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [1,10001] - interval: 1 - - identifier: 'max_concurrency' - metadata: - description: "(benchmark) The maximum number of concurrent requests to send" - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ -1, 500 ] # -1 means no concurrency control - interval: 1 - - identifier: 'burstiness' - metadata: - description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness." - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ 0, 10 ] - interval: 1 - - identifier: image - metadata: - description: "(deployment) Docker image to use to create vllm deployments" - propertyDomain: - variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ] - - identifier: n_cpus - metadata: - description: "(deployment) the number of CPUs to use" - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ 1,17 ] - interval: 1 - - identifier: memory - metadata: - description: "(deployment) the amount of memory to allocate to vLLM pod" - propertyDomain: - variableType: "CATEGORICAL_VARIABLE_TYPE" - values: [ "64Gi", "128Gi", "256Gi" ] - - identifier: dtype - metadata: - description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models." - propertyDomain: - variableType: "CATEGORICAL_VARIABLE_TYPE" - values: [ "auto", "half", "float16", "bfloat16", "float", "float32" ] - - identifier: 'gpu_memory_utilization' - metadata: - description: "(deployment) The fraction of GPU memory to be used for the model executor," - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - values: [ .5, .75, .9 ] - - identifier: 'cpu_offload' - metadata: - description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU," - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - values: [ 0, 8, 16, 24, 32 ] - - identifier: 'max_batch_tokens' - metadata: - description: "(deployment) maximum size of the sum of the 1st image dimensions per iteration" - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ 1024, 32769 ] - interval: 1024 - - identifier: 'max_num_seq' - metadata: - description: "(deployment) Maximum number of sequences per iteration" - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [32,2049] - interval: 32 - - identifier: 'max_batch_tokens' - metadata: - description: "(deployment) maximum number of batched tokens per iteration" - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ 8192, 32769] - interval: 1024 - - identifier: 'n_gpus' - metadata: - description: "(deployment) Number of GPUs to use" - propertyDomain: - variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [1,9] - interval: 1 - - identifier: 'gpu_type' - metadata: - description: "(deployment) The GPU type to use" - propertyDomain: - variableType: "CATEGORICAL_VARIABLE_TYPE" - values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ] - - identifier: 'skip_tokenizer_init' - metadata: - description: "(deployment) skip tokenizer intialization" - propertyDomain: - variableType: BINARY_VARIABLE_TYPE - values: [True, False] - - identifier: 'enforce_eager' - metadata: - description: "(deployment) enforce pytorch eager mode" - propertyDomain: - variableType: BINARY_VARIABLE_TYPE - values: [True, False] - - identifier: 'io_processor_plugin' - metadata: - description: 'IO Pocessor plugin to load for the model' - propertyDomain: - variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: [ None, "terratorch_segmentation" ] - defaultParameterization: - - property: - identifier: 'image' - value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" - - property: - identifier: n_cpus - value: 8 - - property: - identifier: - memory - value: "128Gi" - - property: - identifier: dtype - value: "auto" - - property: - identifier: 'num_prompts' - value: 500 - - property: - identifier: 'max_concurrency' - value: -1 - - property: - identifier: 'burstiness' - value: 1.0 - - property: - identifier: 'gpu_memory_utilization' - value: .9 - - property: - identifier: 'cpu_offload' - value: 0 - - property: - identifier: 'max_num_seq' - value: 256 - - property: - identifier: 'max_batch_tokens' - value: 16384 - - property: - identifier: 'n_gpus' - value: 1 - - property: - identifier: 'gpu_type' - value: 'NVIDIA-A100-80GB-PCIe' - - property: - identifier: 'skip_tokenizer_init' - value: True - - property: - identifier: 'enforce_eager' - value: True - - property: - identifier: 'io_processor_plugin' - value: "terratorch_segmentation" - # measurements - targetProperties: - - identifier: "duration" - - identifier: "completed" - - identifier: "total_input_tokens" - - identifier: "total_output_tokens" - - identifier: "request_throughput" - - identifier: "output_throughput" - - identifier: "total_token_throughput" - - identifier: "mean_ttft_ms" - - identifier: "median_ttft_ms" - - identifier: "std_ttft_ms" - - identifier: "p25_ttft_ms" - - identifier: "p50_ttft_ms" - - identifier: "p75_ttft_ms" - - identifier: "p99_ttft_ms" - - identifier: "mean_tpot_ms" - - identifier: "median_tpot_ms" - - identifier: "std_tpot_ms" - - identifier: "p25_tpot_ms" - - identifier: "p50_tpot_ms" - - identifier: "p75_tpot_ms" - - identifier: "p99_tpot_ms" - - identifier: "mean_itl_ms" - - identifier: "median_itl_ms" - - identifier: "std_itl_ms" - - identifier: "p25_itl_ms" - - identifier: "p50_itl_ms" - - identifier: "p75_itl_ms" - - identifier: "p99_itl_ms" - - identifier: "mean_e2el_ms" - - identifier: "median_e2el_ms" - - identifier: "std_e2el_ms" - - identifier: "p25_e2el_ms" - - identifier: "p50_e2el_ms" - - identifier: "p75_e2el_ms" - - identifier: "p99_e2el_ms" - metadata: - description: 'VLLM performance testing across compute resource and workload configuration' \ No newline at end of file + description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations' \ No newline at end of file diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml new file mode 100644 index 00000000..ad62052d --- /dev/null +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -0,0 +1,308 @@ +# Copyright (c) IBM Corporation +# SPDX-License-Identifier: MIT + +# The input to an experiment is an Entity. For the Entity to be a valid input +# it's properties which match what is defined here +performance_testing-geospatial-endpoint: + identifier: performance-testing-geospatial-endpoint + actuatorIdentifier: "vllm_performance" + requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values + - identifier: 'model' + metadata: + description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"] + - identifier: 'endpoint' + metadata: + description: 'The endpoint(s) to test' + propertyDomain: + variableType: "UNKNOWN_VARIABLE_TYPE" + - identifier: 'request_rate' + metadata: + description: "The number of requests to send per second" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [-1,1000] + interval: 1 # -1 means send all requests at time 0 + optionalProperties: + - identifier: 'num_prompts' + metadata: + description: "The number of prompts to send (total number of requests)" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1,10001] + interval: 1 + - identifier: 'burstiness' + metadata: + description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ 0, 10 ] + interval: 1 + - identifier: 'max_concurrency' + metadata: + description: "The maximum number of concurrent requests to send" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ -1, 500 ] # -1 means no concurrency control + interval: 1 + defaultParameterization: + - value: 100 + property: + identifier: 'num_prompts' + - value: -1 + property: + identifier: 'max_concurrency' + - value: 1.0 + property: + identifier: 'burstiness' + # measurements + targetProperties: + - identifier: "duration" + - identifier: "completed" + - identifier: "total_input_tokens" + - identifier: "total_output_tokens" + - identifier: "request_throughput" + - identifier: "output_throughput" + - identifier: "total_token_throughput" + - identifier: "mean_ttft_ms" + - identifier: "median_ttft_ms" + - identifier: "std_ttft_ms" + - identifier: "p25_ttft_ms" + - identifier: "p50_ttft_ms" + - identifier: "p75_ttft_ms" + - identifier: "p99_ttft_ms" + - identifier: "mean_tpot_ms" + - identifier: "median_tpot_ms" + - identifier: "std_tpot_ms" + - identifier: "p25_tpot_ms" + - identifier: "p50_tpot_ms" + - identifier: "p75_tpot_ms" + - identifier: "p99_tpot_ms" + - identifier: "mean_itl_ms" + - identifier: "median_itl_ms" + - identifier: "std_itl_ms" + - identifier: "p25_itl_ms" + - identifier: "p50_itl_ms" + - identifier: "p75_itl_ms" + - identifier: "p99_itl_ms" + - identifier: "mean_e2el_ms" + - identifier: "median_e2el_ms" + - identifier: "std_e2el_ms" + - identifier: "p25_e2el_ms" + - identifier: "p50_e2el_ms" + - identifier: "p75_e2el_ms" + - identifier: "p99_e2el_ms" + metadata: + description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations' +performance_testing-geospatial-full: + identifier: performance-testing-geospatial-full + actuatorIdentifier: "vllm_performance" + requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values + - identifier: 'model' + metadata: + description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ] + - identifier: 'request_rate' + metadata: + description: "(benchmark) The number of requests to send per second" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [-1,1000] + interval: 1 # -1 means send all requests at time 0 + optionalProperties: + - identifier: 'num_prompts' + metadata: + description: "(benchmark) The number of prompts to send (total number of requests)" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1,10001] + interval: 1 + - identifier: 'max_concurrency' + metadata: + description: "(benchmark) The maximum number of concurrent requests to send" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ -1, 500 ] # -1 means no concurrency control + interval: 1 + - identifier: 'burstiness' + metadata: + description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ 0, 10 ] + interval: 1 + - identifier: image + metadata: + description: "(deployment) Docker image to use to create vllm deployments" + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ] + - identifier: n_cpus + metadata: + description: "(deployment) the number of CPUs to use" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ 1,17 ] + interval: 1 + - identifier: memory + metadata: + description: "(deployment) the amount of memory to allocate to vLLM pod" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ "64Gi", "128Gi", "256Gi" ] + - identifier: dtype + metadata: + description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models." + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ "auto", "half", "float16", "bfloat16", "float", "float32" ] + - identifier: 'gpu_memory_utilization' + metadata: + description: "(deployment) The fraction of GPU memory to be used for the model executor," + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + values: [ .5, .75, .9 ] + - identifier: 'cpu_offload' + metadata: + description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU," + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + values: [ 0, 8, 16, 24, 32 ] + - identifier: 'max_num_seq' + metadata: + description: "(deployment) Maximum number of sequences per iteration" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [32,2049] + interval: 32 + - identifier: 'max_batch_tokens' + metadata: + description: "(deployment) maximum number of batched tokens per iteration" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ 8192, 32769] + interval: 1024 + - identifier: 'n_gpus' + metadata: + description: "(deployment) Number of GPUs to use" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1,9] + interval: 1 + - identifier: 'gpu_type' + metadata: + description: "(deployment) The GPU type to use" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ] + - identifier: 'skip_tokenizer_init' + metadata: + description: "(deployment) skip tokenizer intialization" + propertyDomain: + variableType: BINARY_VARIABLE_TYPE + values: [True, False] + - identifier: 'enforce_eager' + metadata: + description: "(deployment) enforce pytorch eager mode" + propertyDomain: + variableType: BINARY_VARIABLE_TYPE + values: [True, False] + - identifier: 'io_processor_plugin' + metadata: + description: 'IO Pocessor plugin to load for the model' + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: [ None, "terratorch_segmentation" ] + defaultParameterization: + - property: + identifier: 'image' + value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" + - property: + identifier: n_cpus + value: 8 + - property: + identifier: + memory + value: "128Gi" + - property: + identifier: dtype + value: "auto" + - property: + identifier: 'num_prompts' + value: 500 + - property: + identifier: 'max_concurrency' + value: -1 + - property: + identifier: 'burstiness' + value: 1.0 + - property: + identifier: 'gpu_memory_utilization' + value: .9 + - property: + identifier: 'cpu_offload' + value: 0 + - property: + identifier: 'max_num_seq' + value: 256 + - property: + identifier: 'max_batch_tokens' + value: 16384 + - property: + identifier: 'n_gpus' + value: 1 + - property: + identifier: 'gpu_type' + value: 'NVIDIA-A100-80GB-PCIe' + - property: + identifier: 'skip_tokenizer_init' + value: True + - property: + identifier: 'enforce_eager' + value: True + - property: + identifier: 'io_processor_plugin' + value: "terratorch_segmentation" + # measurements + targetProperties: + - identifier: "duration" + - identifier: "completed" + - identifier: "total_input_tokens" + - identifier: "total_output_tokens" + - identifier: "request_throughput" + - identifier: "output_throughput" + - identifier: "total_token_throughput" + - identifier: "mean_ttft_ms" + - identifier: "median_ttft_ms" + - identifier: "std_ttft_ms" + - identifier: "p25_ttft_ms" + - identifier: "p50_ttft_ms" + - identifier: "p75_ttft_ms" + - identifier: "p99_ttft_ms" + - identifier: "mean_tpot_ms" + - identifier: "median_tpot_ms" + - identifier: "std_tpot_ms" + - identifier: "p25_tpot_ms" + - identifier: "p50_tpot_ms" + - identifier: "p75_tpot_ms" + - identifier: "p99_tpot_ms" + - identifier: "mean_itl_ms" + - identifier: "median_itl_ms" + - identifier: "std_itl_ms" + - identifier: "p25_itl_ms" + - identifier: "p50_itl_ms" + - identifier: "p75_itl_ms" + - identifier: "p99_itl_ms" + - identifier: "mean_e2el_ms" + - identifier: "median_e2el_ms" + - identifier: "std_e2el_ms" + - identifier: "p25_e2el_ms" + - identifier: "p50_e2el_ms" + - identifier: "p75_e2el_ms" + - identifier: "p99_e2el_ms" + metadata: + description: 'VLLM performance testing across compute resource and workload configuration' \ No newline at end of file From b851d033e45436bc79dd2b56bc4c4233c2363d55 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Mon, 3 Nov 2025 09:24:43 +0000 Subject: [PATCH 14/46] Fixed bug in validate_entitiy Signed-off-by: Christian Pinto --- orchestrator/schema/experiment.py | 6 +++--- orchestrator/schema/property_value.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/orchestrator/schema/experiment.py b/orchestrator/schema/experiment.py index 9e17ee6e..deb87a30 100644 --- a/orchestrator/schema/experiment.py +++ b/orchestrator/schema/experiment.py @@ -636,14 +636,14 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool: } if validate_point_against_properties( point, - constitutive_properties=self.requiredConstitutiveProperties, + constitutive_properties=self.requiredConstitutiveProperties + list(self.optionalProperties), ): return True # It's not an exact match - check if partial match if not validate_point_against_properties( point, - constitutive_properties=self.requiredConstitutiveProperties, + constitutive_properties=self.requiredConstitutiveProperties + list(self.optionalProperties), allow_partial_matches=True, ): # no partial match - missing required properties or has incorrect values for them @@ -654,7 +654,7 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool: return False # It has the required properties with valid values but there are additional properties - # See if these properties are optional propertiesof the experiment + # See if these properties are optional properties of the experiment potential_optional_properties: set[str] = point.keys() - { cp.identifier for cp in self.requiredProperties } diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py index a429412a..d7b56107 100644 --- a/orchestrator/schema/property_value.py +++ b/orchestrator/schema/property_value.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: MIT import enum +import logging import typing import pydantic @@ -13,6 +14,7 @@ PropertyDescriptor, ) +logger = logging.getLogger("property_value") class ValueTypeEnum(str, enum.Enum): NUMERIC_VALUE_TYPE = "NUMERIC_VALUE_TYPE" # the value is a bool,int, float etc. @@ -189,6 +191,12 @@ def validate_point_against_properties( cp.identifier for cp in constitutive_properties } + logger.debug( + f"Validating point's constitutive properties " + f"(allow_partial_matches = {allow_partial_matches}) {constitutive_property_identifiers_for_point}, " + f"against the space constitutive properties {constitutive_property_identifiers_for_entity_space}" + ) + matching_constitutive_property_identifiers = ( constitutive_property_identifiers_for_point.intersection( constitutive_property_identifiers_for_entity_space @@ -221,6 +229,11 @@ def validate_point_against_properties( if not constitutive_property.propertyDomain.valueInDomain( point[constitutive_property.identifier] ): + logger.warning( + f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) " + "is not in the target consitutive property " + f"domain ({constitutive_property.propertyDomain.domainRange})" + ) return False return True From 7055c38850d67e24104c39260d671c4659a99285 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Mon, 3 Nov 2025 11:32:16 +0000 Subject: [PATCH 15/46] One more fix to to a log message Signed-off-by: Christian Pinto --- orchestrator/schema/property_value.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py index d7b56107..0ed81e4f 100644 --- a/orchestrator/schema/property_value.py +++ b/orchestrator/schema/property_value.py @@ -232,7 +232,7 @@ def validate_point_against_properties( logger.warning( f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) " "is not in the target consitutive property " - f"domain ({constitutive_property.propertyDomain.domainRange})" + f"domain ({constitutive_property.propertyDomain.domain_values()})" ) return False From 5bdf90263b21e8061ac86fb6b0a298c477e47ee1 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Mon, 3 Nov 2025 11:34:43 +0000 Subject: [PATCH 16/46] One more fix to to a log message Signed-off-by: Christian Pinto --- orchestrator/schema/property_value.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py index 0ed81e4f..46f595ef 100644 --- a/orchestrator/schema/property_value.py +++ b/orchestrator/schema/property_value.py @@ -232,7 +232,7 @@ def validate_point_against_properties( logger.warning( f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) " "is not in the target consitutive property " - f"domain ({constitutive_property.propertyDomain.domain_values()})" + f"domain ({constitutive_property.propertyDomain.domain_values})" ) return False From dbab4c7cb55fb664e4fe74b0eb6c41782dbbc1de Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Mon, 3 Nov 2025 11:35:41 +0000 Subject: [PATCH 17/46] One more fix to to a log message Signed-off-by: Christian Pinto --- orchestrator/schema/property_value.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py index 0ed81e4f..46f595ef 100644 --- a/orchestrator/schema/property_value.py +++ b/orchestrator/schema/property_value.py @@ -232,7 +232,7 @@ def validate_point_against_properties( logger.warning( f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) " "is not in the target consitutive property " - f"domain ({constitutive_property.propertyDomain.domain_values()})" + f"domain ({constitutive_property.propertyDomain.domain_values})" ) return False From fd100b6a1faf9bfce4e985c43c8e6b1e31eaee7d Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Mon, 3 Nov 2025 12:59:43 +0000 Subject: [PATCH 18/46] Fixes to vllm_performance actuator Signed-off-by: Christian Pinto --- .../datasets/india_url_in_b64_out.jsonl | 1 + .../valencia_url_in_b64_out.jsonl} | 0 .../vllm_performance/experiment_executor.py | 4 ++++ .../experiments/performance_testing.yaml | 18 ++++++++++++++++++ .../performance_testing_geospatial.yaml | 18 ++++++++++++++++++ .../vllm_performance_test/execute_benchmark.py | 14 +++++++++++--- 6 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl rename plugins/actuators/vllm_performance/ado_actuators/vllm_performance/{geospatial_valencia.jsonl => datasets/valencia_url_in_b64_out.jsonl} (100%) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl new file mode 100644 index 00000000..693bbc09 --- /dev/null +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl @@ -0,0 +1 @@ +{"prompt":{"data": {"data": "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif","data_format": "url","out_data_format": "b64_json","indices": [1, 2, 3, 8, 11, 12]},"priority": 0,"softmax": false}} diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_valencia.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/valencia_url_in_b64_out.jsonl similarity index 100% rename from plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_valencia.jsonl rename to plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/valencia_url_in_b64_out.jsonl diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index 9be6921a..459e8473 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -297,6 +297,7 @@ def run_resource_and_workload_experiment( benchmark_retries=actuator_parameters.benchmark_retries, retries_timeout=actuator_parameters.retries_timeout, burstiness=float(values.get("burstiness")), + dataset = values.get("dataset"), ) else: result = execute_random_benchmark( @@ -312,6 +313,7 @@ def run_resource_and_workload_experiment( number_input_tokens=int(values.get("number_input_tokens")), max_output_tokens=int(values.get("max_output_tokens")), burstiness=float(values.get("burstiness")), + dataset = values.get("dataset"), ) logger.debug(f"benchmark executed in {time.time() - start} sec") except Exception as e: @@ -411,6 +413,7 @@ def run_workload_experiment( benchmark_retries=actuator_parameters.benchmark_retries, retries_timeout=actuator_parameters.retries_timeout, burstiness=float(values.get("burstiness")), + dataset = values.get("dataset"), ) else: result = execute_random_benchmark( @@ -426,6 +429,7 @@ def run_workload_experiment( number_input_tokens=int(values.get("number_input_tokens")), max_output_tokens=int(values.get("max_output_tokens")), burstiness=float(values.get("burstiness")), + dataset = values.get("dataset"), ) except Exception as e: logger.error(f"Failed to execute VLLM performance test {e}") diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml index 216d6ae4..c9537fd7 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml @@ -56,6 +56,12 @@ performance_testing-full: variableType: 'DISCRETE_VARIABLE_TYPE' domainRange: [ 1, 10000 ] interval: 1 + - identifier: 'dataset' + metadata: + description: "(benchmark) The dataset to be used fof the experiment" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ 'random' ] - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" @@ -161,6 +167,9 @@ performance_testing-full: - property: identifier: 'max_output_tokens' value: 128 + - property: + identifier: 'dataset' + value: 'random' - property: identifier: 'gpu_memory_utilization' value: .9 @@ -282,6 +291,12 @@ performance_testing-endpoint: variableType: 'DISCRETE_VARIABLE_TYPE' domainRange: [ -1, 500 ] # -1 means no concurrency control interval: 1 + - identifier: 'dataset' + metadata: + description: "(benchmark) The dataset to be used fof the experiment" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ 'random' ] defaultParameterization: - value: 1000 property: @@ -298,6 +313,9 @@ performance_testing-endpoint: - value: 128 property: identifier: 'max_output_tokens' + - property: + identifier: 'dataset' + value: 'random' # measurements targetProperties: - identifier: "duration" diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index ad62052d..5d976439 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -47,6 +47,12 @@ performance_testing-geospatial-endpoint: variableType: 'DISCRETE_VARIABLE_TYPE' domainRange: [ -1, 500 ] # -1 means no concurrency control interval: 1 + - identifier: 'dataset' + metadata: + description: "The dataset to be used for the experiment" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ 'india_url_in_b64_out', 'valencia_url_in_b64_out' ] defaultParameterization: - value: 100 property: @@ -57,6 +63,9 @@ performance_testing-geospatial-endpoint: - value: 1.0 property: identifier: 'burstiness' + - property: + identifier: 'dataset' + value: 'india_url_in_b64_out' # measurements targetProperties: - identifier: "duration" @@ -135,6 +144,12 @@ performance_testing-geospatial-full: variableType: 'DISCRETE_VARIABLE_TYPE' domainRange: [ 0, 10 ] interval: 1 + - identifier: 'dataset' + metadata: + description: "(benchmark) The dataset to be used for the experiment" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ 'india_url_in_b64_out', 'valencia_url_in_b64_out' ] - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" @@ -267,6 +282,9 @@ performance_testing-geospatial-full: - property: identifier: 'io_processor_plugin' value: "terratorch_segmentation" + - property: + identifier: 'dataset' + value: 'india_url_in_b64_out' # measurements targetProperties: - identifier: "duration" diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index cf3ed96a..1d81ee36 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -12,6 +12,10 @@ get_results, ) +default_geospatial_datasets_filenames = { + "india_url_in_b64_out": "india_url_in_b64_out.jsonl", + "valencia_url_in_b64_out": "valencia_url_in_b64_out.jsonl", +} def execute_benchmark( base_url: str, @@ -107,6 +111,7 @@ def execute_benchmark( def execute_random_benchmark( base_url: str, model: str, + dataset: str, num_prompts: int = 500, request_rate: int | None = None, max_concurrency: int | None = None, @@ -134,7 +139,7 @@ def execute_random_benchmark( return execute_benchmark( base_url=base_url, model=model, - data_set="random", + data_set=dataset, interpreter=interpreter, num_prompts=num_prompts, request_rate=request_rate, @@ -153,6 +158,7 @@ def execute_random_benchmark( def execute_geospatial_benchmark( base_url: str, model: str, + dataset: str, num_prompts: int = 500, request_rate: int | None = None, max_concurrency: int | None = None, @@ -176,9 +182,11 @@ def execute_geospatial_benchmark( """ from importlib import resources + dataset_filename = default_geospatial_datasets_filenames[dataset] + with resources.path( - "ado_actuators.vllm_performance", - "geospatial_india.jsonl", + "ado_actuators.vllm_performance.datasets", + dataset_filename, ) as data_set_path: return execute_benchmark( base_url=base_url, From f7ceb520be5fde683d6e8b5c8f5a568fd82c160c Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Mon, 3 Nov 2025 09:24:43 +0000 Subject: [PATCH 19/46] fix(experiment): Fixed bug in validate_entitiy Signed-off-by: Christian Pinto --- orchestrator/schema/experiment.py | 8 +++++--- orchestrator/schema/property_value.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/orchestrator/schema/experiment.py b/orchestrator/schema/experiment.py index 9e17ee6e..61da6ec4 100644 --- a/orchestrator/schema/experiment.py +++ b/orchestrator/schema/experiment.py @@ -636,14 +636,16 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool: } if validate_point_against_properties( point, - constitutive_properties=self.requiredConstitutiveProperties, + constitutive_properties=self.requiredConstitutiveProperties + + list(self.optionalProperties), ): return True # It's not an exact match - check if partial match if not validate_point_against_properties( point, - constitutive_properties=self.requiredConstitutiveProperties, + constitutive_properties=self.requiredConstitutiveProperties + + list(self.optionalProperties), allow_partial_matches=True, ): # no partial match - missing required properties or has incorrect values for them @@ -654,7 +656,7 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool: return False # It has the required properties with valid values but there are additional properties - # See if these properties are optional propertiesof the experiment + # See if these properties are optional properties of the experiment potential_optional_properties: set[str] = point.keys() - { cp.identifier for cp in self.requiredProperties } diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py index a429412a..08da1bb0 100644 --- a/orchestrator/schema/property_value.py +++ b/orchestrator/schema/property_value.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: MIT import enum +import logging import typing import pydantic @@ -13,6 +14,8 @@ PropertyDescriptor, ) +logger = logging.getLogger("property_value") + class ValueTypeEnum(str, enum.Enum): NUMERIC_VALUE_TYPE = "NUMERIC_VALUE_TYPE" # the value is a bool,int, float etc. @@ -189,6 +192,12 @@ def validate_point_against_properties( cp.identifier for cp in constitutive_properties } + logger.debug( + f"Validating point's constitutive properties " + f"(allow_partial_matches = {allow_partial_matches}) {constitutive_property_identifiers_for_point}, " + f"against the space constitutive properties {constitutive_property_identifiers_for_entity_space}" + ) + matching_constitutive_property_identifiers = ( constitutive_property_identifiers_for_point.intersection( constitutive_property_identifiers_for_entity_space @@ -221,6 +230,11 @@ def validate_point_against_properties( if not constitutive_property.propertyDomain.valueInDomain( point[constitutive_property.identifier] ): + logger.warning( + f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) " + "is not in the target consitutive property " + f"domain ({constitutive_property.propertyDomain.domain_values})" + ) return False return True From df4f9bc1369c5744f01a25de30c43c82b3f94184 Mon Sep 17 00:00:00 2001 From: michaelj Date: Mon, 3 Nov 2025 15:21:16 +0000 Subject: [PATCH 20/46] fix: Not using reference which may be parameterized --- orchestrator/utilities/run_experiment.py | 44 +++++++++++++++--------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/orchestrator/utilities/run_experiment.py b/orchestrator/utilities/run_experiment.py index 0ed6959d..85064c4a 100644 --- a/orchestrator/utilities/run_experiment.py +++ b/orchestrator/utilities/run_experiment.py @@ -24,7 +24,7 @@ def local_execution_closure( registry: ActuatorRegistry, actuator_configuration_identifiers: list[str] | None = None, -) -> Callable[[ExperimentReference, Entity], MeasurementRequest]: +) -> Callable[[ExperimentReference, Entity], MeasurementRequest] | None: """Create a callable that submits a local measurement request. The function keeps a dictionary of Actuator actors so that each actuator @@ -66,31 +66,41 @@ def local_execution_closure( def execute_local( reference: ExperimentReference, entity: Entity - ) -> MeasurementRequest: + ) -> MeasurementRequest | None: # instantiate the actuator for this experiment identifier. - experiment = registry.experimentForReference(reference) - if experiment.actuatorIdentifier not in actuators: + if reference.actuatorIdentifier not in actuators: actuator_class = registry.actuatorForIdentifier( - experiment.actuatorIdentifier + reference.actuatorIdentifier ) - if experiment.actuatorIdentifier in actuator_configurations: + if reference.actuatorIdentifier in actuator_configurations: config = actuator_configurations[ - experiment.actuatorIdentifier + reference.actuatorIdentifier ].parameters else: config = actuator_class.default_parameters() - actuators[experiment.actuatorIdentifier] = actuator_class.remote( + actuators[reference.actuatorIdentifier] = actuator_class.remote( queue=queue, params=config ) - actuator = actuators[experiment.actuatorIdentifier] - # Submit the measurement request asynchronously. - actuator.submit.remote( - entities=[entity], - experimentReference=experiment.reference, - requesterid="run_experiment", - requestIndex=0, - ) + actuator = actuators[reference.actuatorIdentifier] + # Submit the measurement request asynchronously, handle errors gracefully. + try: + actuator.submit.remote( + entities=[entity], + experimentReference=reference, + requesterid="run_experiment", + requestIndex=0, + ) + except Exception as e: + print( + f"[ERROR] Failed to submit measurement request for {reference} to actuator '{reference.actuatorIdentifier}': {e}" + ) + import traceback + + traceback.print_exc() + # Either skip, or return None, or propagate. Let's return None. + return None + return queue.get() return execute_local @@ -239,7 +249,7 @@ def run( print("Skipping validation") if valid: - print(f"Executing: {reference.experimentIdentifier}") + print(f"Executing: {reference}") request = execute(reference, entity) print("Result:") print(f"{request.series_representation(output_format='target')}\n") From d744287b1ec3ab53b3d1c93c7aae161c4d219e28 Mon Sep 17 00:00:00 2001 From: michaelj Date: Mon, 3 Nov 2025 16:34:43 +0000 Subject: [PATCH 21/46] fix: validate_entity validate_entity was incorrectly identifying Entities with optional properties as invalid This was because validate_point_against_properties(allow_partial_matches=True) does not work as code expected. Code expected that given {point props} and {required props}, if {required props}.issubset(point_props) it would return True. However, it was checking the opposite i.e.{point_probs}.issubset{required_props} --- orchestrator/schema/experiment.py | 84 +++++++++++++++---------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/orchestrator/schema/experiment.py b/orchestrator/schema/experiment.py index 61da6ec4..46b7eb62 100644 --- a/orchestrator/schema/experiment.py +++ b/orchestrator/schema/experiment.py @@ -624,71 +624,71 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool: """Returns True if Experiment can be applied to entity, false otherwise This method only checks constitutive properties. - - All properties of the Entity that match the experiments required or optional properties must have - values in the domain of that property - - All required properties of the experiment must have a matching constitutive property - - If strict_optional is True all properties of the Entity that are not required properties of the Experiment - must match optional properties of the experiment. + - The entity has valid values for all required properties of the experiment + - The entity has valid values for any optional properties of the experiment it contains + - If strict_optional is True all properties of the Entity are properties (required+optional) of the experiment """ point = { v.property.identifier: v.value for v in entity.constitutive_property_values } - if validate_point_against_properties( - point, - constitutive_properties=self.requiredConstitutiveProperties - + list(self.optionalProperties), - ): - return True - - # It's not an exact match - check if partial match - if not validate_point_against_properties( - point, - constitutive_properties=self.requiredConstitutiveProperties - + list(self.optionalProperties), - allow_partial_matches=True, - ): - # no partial match - missing required properties or has incorrect values for them - logging.getLogger("experiment").warning( - f"The entity is missing or has invalid values for required properties of " - f" {self.identifier}" - ) - return False - # It has the required properties with valid values but there are additional properties - # See if these properties are optional properties of the experiment - potential_optional_properties: set[str] = point.keys() - { - cp.identifier for cp in self.requiredProperties + # + # Get required and optional property sets of the experiment + # + required_property_identifiers = { + cp.identifier for cp in self.requiredConstitutiveProperties } - optional_properties = potential_optional_properties & { + optional_property_identifiers = { cp.identifier for cp in self.optionalProperties } - # If strict_optional is on all the additional properties must be optional properties - if ( - len(optional_properties) != len(potential_optional_properties) - and strict_optional - ): + + # + # Get the equivalent sets from the entity + # + required_properties_present = point.keys() & required_property_identifiers + optional_properties_present = point.keys() & optional_property_identifiers + additional_properties_present = ( + point.keys() - required_properties_present - optional_properties_present + ) + + # First check against strict optional as it is a quick fail condition + if additional_properties_present and strict_optional: logging.getLogger("experiment").warning( f"Strict property checking is on and the following entity " f"properties are not required or optional properties of {self.identifier}:" - f"{potential_optional_properties-optional_properties} " + f"{additional_properties_present} " + ) + + # Check if all the required properties are present with values in domain + if not validate_point_against_properties( + point={k: v for k, v in point.items() if k in required_properties_present}, + constitutive_properties=self.requiredConstitutiveProperties, + ): + logging.getLogger("experiment").warning( + f"The entity is missing values for required properties of {self.identifier}: {required_property_identifiers - required_properties_present}" ) return False - is_valid = validate_point_against_properties( - point={key: point[key] for key in optional_properties}, + # All required properties are there + # Now check optional properties, if given + # We can set partial_match=True because: + # - If we wanted full match of optional properties (strict_optional), but it wasn't present, + # we would have already exited + if optional_properties_present and not validate_point_against_properties( + point={k: v for k, v in point.items() if k in optional_properties_present}, constitutive_properties=list(self.optionalProperties), allow_partial_matches=True, - ) - if not is_valid: + ): logging.getLogger("experiment").warning( f"The entity has properties that match optional properties" f"of {self.identifier} - " - f"{potential_optional_properties - optional_properties} - " + f"{optional_properties_present} - " f"but its values for those properties are not in the domain of the optional properties" ) + return False - return is_valid + return True class ParameterizedExperiment(Experiment): From 1869a49af194cbc3307e841a530d47d6f3ea9733 Mon Sep 17 00:00:00 2001 From: michaelj Date: Mon, 3 Nov 2025 16:53:45 +0000 Subject: [PATCH 22/46] fix: missing return --- orchestrator/schema/experiment.py | 1 + 1 file changed, 1 insertion(+) diff --git a/orchestrator/schema/experiment.py b/orchestrator/schema/experiment.py index 46b7eb62..07bbfadf 100644 --- a/orchestrator/schema/experiment.py +++ b/orchestrator/schema/experiment.py @@ -659,6 +659,7 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool: f"properties are not required or optional properties of {self.identifier}:" f"{additional_properties_present} " ) + return False # Check if all the required properties are present with values in domain if not validate_point_against_properties( From 883e8771d801558f05565d73e83f7c89e9c7cfee Mon Sep 17 00:00:00 2001 From: michaelj Date: Mon, 3 Nov 2025 16:54:05 +0000 Subject: [PATCH 23/46] test: for validate_entity --- tests/schema/test_experiment.py | 194 +++++++++++++++++++++++++++++++- 1 file changed, 193 insertions(+), 1 deletion(-) diff --git a/tests/schema/test_experiment.py b/tests/schema/test_experiment.py index 14f3230d..72401bf1 100644 --- a/tests/schema/test_experiment.py +++ b/tests/schema/test_experiment.py @@ -8,8 +8,12 @@ from orchestrator.modules.actuators.registry import ( ActuatorRegistry, ) +from orchestrator.schema.domain import PropertyDomain, VariableTypeEnum from orchestrator.schema.entity import Entity -from orchestrator.schema.experiment import Experiment, ParameterizedExperiment +from orchestrator.schema.experiment import ( + Experiment, + ParameterizedExperiment, +) from orchestrator.schema.property import ( AbstractProperty, ConstitutiveProperty, @@ -815,3 +819,191 @@ def test_experiment_provides_requirements( mock_parameterizable_experiment ) ) + + +@pytest.fixture(scope="module") +def nevergrad_opt_3d_test_func_experiment(): + # Define required constitutive properties (x0, x1, x2, all continuous) + required_props = [ + ConstitutiveProperty( + identifier="x0", + propertyDomain=PropertyDomain( + variableType=VariableTypeEnum.CONTINUOUS_VARIABLE_TYPE + ), + ), + ConstitutiveProperty( + identifier="x1", + propertyDomain=PropertyDomain( + variableType=VariableTypeEnum.CONTINUOUS_VARIABLE_TYPE + ), + ), + ConstitutiveProperty( + identifier="x2", + propertyDomain=PropertyDomain( + variableType=VariableTypeEnum.CONTINUOUS_VARIABLE_TYPE + ), + ), + ] + # Optional property: name (categorical) + optional_props = ( + ConstitutiveProperty( + identifier="name", + propertyDomain=PropertyDomain( + variableType=VariableTypeEnum.CATEGORICAL_VARIABLE_TYPE, + values=["rosenbrock", "griewank", "sphere"], + ), + ), + ) + default_param = ( + ConstitutivePropertyValue( + value="rosenbrock", + property=ConstitutivePropertyDescriptor(identifier="name"), + ), + ) + return Experiment( + actuatorIdentifier="custom_experiments", + identifier="nevergrad_opt_3d_test_func", + targetProperties=[], + requiredProperties=tuple(required_props), + optionalProperties=optional_props, + defaultParameterization=default_param, + ) + + +def entity_with_props(props): + return Entity(constitutive_property_values=tuple(props)) + + +def test_validate_entity_required_only(nevergrad_opt_3d_test_func_experiment): + props = [ + ConstitutivePropertyValue( + value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0") + ), + ConstitutivePropertyValue( + value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1") + ), + ConstitutivePropertyValue( + value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2") + ), + ] + entity = entity_with_props(props) + assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is True + + +def test_validate_entity_with_optional_valid(nevergrad_opt_3d_test_func_experiment): + props = [ + ConstitutivePropertyValue( + value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0") + ), + ConstitutivePropertyValue( + value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1") + ), + ConstitutivePropertyValue( + value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2") + ), + ConstitutivePropertyValue( + value="sphere", property=ConstitutivePropertyDescriptor(identifier="name") + ), + ] + entity = entity_with_props(props) + assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is True + + +def test_validate_entity_with_optional_invalid(nevergrad_opt_3d_test_func_experiment): + props = [ + ConstitutivePropertyValue( + value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0") + ), + ConstitutivePropertyValue( + value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1") + ), + ConstitutivePropertyValue( + value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2") + ), + ConstitutivePropertyValue( + value="foobar", property=ConstitutivePropertyDescriptor(identifier="name") + ), + ] + entity = entity_with_props(props) + assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is False + + +def test_validate_entity_missing_required(nevergrad_opt_3d_test_func_experiment): + # missing x2 + props = [ + ConstitutivePropertyValue( + value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0") + ), + ConstitutivePropertyValue( + value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1") + ), + ] + entity = entity_with_props(props) + assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is False + + +def test_validate_entity_missing_required_with_optional_valid( + nevergrad_opt_3d_test_func_experiment, +): + # missing x2 but valid name + props = [ + ConstitutivePropertyValue( + value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0") + ), + ConstitutivePropertyValue( + value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1") + ), + ConstitutivePropertyValue( + value="griewank", property=ConstitutivePropertyDescriptor(identifier="name") + ), + ] + entity = entity_with_props(props) + assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is False + + +def test_validate_entity_additional_property_strict_optional_false( + nevergrad_opt_3d_test_func_experiment, +): + props = [ + ConstitutivePropertyValue( + value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0") + ), + ConstitutivePropertyValue( + value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1") + ), + ConstitutivePropertyValue( + value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2") + ), + ConstitutivePropertyValue( + value=10, property=ConstitutivePropertyDescriptor(identifier="test") + ), + ] + entity = entity_with_props(props) + # Default: strict_optional=False, extra property is fine + assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is True + + +def test_validate_entity_additional_property_strict_optional_true( + nevergrad_opt_3d_test_func_experiment, +): + props = [ + ConstitutivePropertyValue( + value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0") + ), + ConstitutivePropertyValue( + value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1") + ), + ConstitutivePropertyValue( + value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2") + ), + ConstitutivePropertyValue( + value=10, property=ConstitutivePropertyDescriptor(identifier="test") + ), + ] + entity = entity_with_props(props) + assert ( + nevergrad_opt_3d_test_func_experiment.validate_entity( + entity, strict_optional=True + ) + is False + ) From 064750f8403e140ba38a253b9debdd69e1dea9d5 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Tue, 4 Nov 2025 09:30:49 +0000 Subject: [PATCH 24/46] chore: fixed formatting with black Signed-off-by: Christian Pinto --- orchestrator/modules/operators/randomwalk.py | 4 +--- .../ado_actuators/vllm_performance/actuator.py | 15 +++++++++++---- .../vllm_performance/experiment_executor.py | 10 +++++----- .../vllm_performance/k8/create_environment.py | 4 ++-- .../vllm_performance/k8/manage_components.py | 2 +- .../k8/yaml_support/build_components.py | 8 ++++---- .../vllm_performance_test/execute_benchmark.py | 1 + 7 files changed, 25 insertions(+), 19 deletions(-) diff --git a/orchestrator/modules/operators/randomwalk.py b/orchestrator/modules/operators/randomwalk.py index eff9a6f2..1018c17a 100644 --- a/orchestrator/modules/operators/randomwalk.py +++ b/orchestrator/modules/operators/randomwalk.py @@ -208,9 +208,7 @@ def sampler(self) -> BaseSampler | GroupSampler: case CombinedWalkModeEnum.RANDOMGROUPED: sampler = RandomGroupSampleSelector(group=self.grouping) case CombinedWalkModeEnum.SEQUENTIALGROUPED: - sampler = SequentialGroupSampleSelector( - group=self.grouping - ) + sampler = SequentialGroupSampleSelector(group=self.grouping) case _: # this can never happen, as we are validating this above pass diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py index 88d4e67c..dd45091f 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py @@ -64,14 +64,18 @@ def catalog( logger.debug(f"Loading experiments from {exp_file}") exp_file_path = os.path.join(exp_dir, exp_file) if os.path.isdir(exp_file_path): - logger.error(f"{exp_file_path} is a directory. Only files are supported in the experiments directory") - raise Exception(f"{exp_file_path} is a directory. Only files are supported in the experiments directory") + logger.error( + f"{exp_file_path} is a directory. Only files are supported in the experiments directory" + ) + raise Exception( + f"{exp_file_path} is a directory. Only files are supported in the experiments directory" + ) with open(exp_file_path) as f: try: data = yaml.safe_load(f) except yaml.YAMLError as e: logger.error(f"File {exp_file} is a malformed YAML - {e}") - raise Exception (f"File {exp_file} is a malformed YAML - {e}") + raise Exception(f"File {exp_file} is a malformed YAML - {e}") experiments.extend([Experiment(**data[e]) for e in data]) @@ -188,7 +192,10 @@ async def submit( if experiment.deprecated is True: raise DeprecatedExperimentError(f"Experiment {experiment} is deprecated") - if experiment.identifier in ["performance-testing-full", "performance-testing-geospatial-full"]: + if experiment.identifier in [ + "performance-testing-full", + "performance-testing-geospatial-full", + ]: if not self.env_manager: raise MissingConfigurationForExperimentError( f"Actuator configuration did not contain sufficient information for a kubernetes environment manager to be created. " diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index 459e8473..693bdb58 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -146,7 +146,7 @@ def _create_environment( namespace=actuator.namespace, skip_tokenizer_init=values.get("skip_tokenizer_init"), enforce_eager=values.get("enforce_eager"), - io_processor_plugin=values.get("io_processor_plugin") + io_processor_plugin=values.get("io_processor_plugin"), ) # Update manager env_manager.done_creating.remote(definition=definition) @@ -297,7 +297,7 @@ def run_resource_and_workload_experiment( benchmark_retries=actuator_parameters.benchmark_retries, retries_timeout=actuator_parameters.retries_timeout, burstiness=float(values.get("burstiness")), - dataset = values.get("dataset"), + dataset=values.get("dataset"), ) else: result = execute_random_benchmark( @@ -313,7 +313,7 @@ def run_resource_and_workload_experiment( number_input_tokens=int(values.get("number_input_tokens")), max_output_tokens=int(values.get("max_output_tokens")), burstiness=float(values.get("burstiness")), - dataset = values.get("dataset"), + dataset=values.get("dataset"), ) logger.debug(f"benchmark executed in {time.time() - start} sec") except Exception as e: @@ -413,7 +413,7 @@ def run_workload_experiment( benchmark_retries=actuator_parameters.benchmark_retries, retries_timeout=actuator_parameters.retries_timeout, burstiness=float(values.get("burstiness")), - dataset = values.get("dataset"), + dataset=values.get("dataset"), ) else: result = execute_random_benchmark( @@ -429,7 +429,7 @@ def run_workload_experiment( number_input_tokens=int(values.get("number_input_tokens")), max_output_tokens=int(values.get("max_output_tokens")), burstiness=float(values.get("burstiness")), - dataset = values.get("dataset"), + dataset=values.get("dataset"), ) except Exception as e: logger.error(f"Failed to execute VLLM performance test {e}") diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py index 3f0a0809..c3a2a2b1 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py @@ -42,7 +42,7 @@ def create_test_environment( namespace: str = "vllm-testing", enforce_eager: bool = False, skip_tokenizer_init: bool = False, - io_processor_plugin: str | None = None + io_processor_plugin: str | None = None, ) -> None: """ Create test deployment @@ -122,7 +122,7 @@ def create_test_environment( reuse=reuse_deployment, enforce_eager=enforce_eager, skip_tokenizer_init=skip_tokenizer_init, - io_processor_plugin=io_processor_plugin + io_processor_plugin=io_processor_plugin, ) logger.debug("deployment created") c_manager.wait_deployment_ready(k8_name=k8_name) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py index cd77a444..9fddc978 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py @@ -233,7 +233,7 @@ def create_deployment( reuse: bool = False, enforce_eager: bool = False, skip_tokenizer_init: bool = False, - io_processor_plugin: str | None = None + io_processor_plugin: str | None = None, ) -> None: """ create deployment for model diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py index 0abcc8c9..41d8cdb4 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py @@ -1,12 +1,12 @@ # Copyright (c) IBM Corporation # SPDX-License-Identifier: MIT +import json import logging import os import sys import uuid from enum import Enum -import json from typing import Any import yaml @@ -45,7 +45,7 @@ def get_k8_name(model: str) -> str: # Making sure the resulting name is not longer than 63 characters as it is # the maximum allowed for a name in kubernetes. - name_prefix = m_parts[-1][:min(len(m_parts[-1]), 21)].rstrip("-") + name_prefix = m_parts[-1][: min(len(m_parts[-1]), 21)].rstrip("-") return f"vllm-{name_prefix.lower()}-{uuid.uuid4()}".replace(".", "-") @staticmethod @@ -180,7 +180,7 @@ def deployment_yaml( limits["memory"] = memory limits["nvidia.com/gpu"] = str(n_gpus) - #command + # command container["command"] = ["vllm", "serve"] container["args"] = vllm_serve_args # env variables to to set parameters for docker execution @@ -194,7 +194,7 @@ def deployment_yaml( # {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)}, # ] if hf_token is not None: - container["env"]=[{"name": "HF_TOKEN", "value": hf_token}] + container["env"] = [{"name": "HF_TOKEN", "value": hf_token}] if claim_name is not None: if "env" not in container: container["env"] = [] diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index 1d81ee36..e3d39cc8 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -17,6 +17,7 @@ "valencia_url_in_b64_out": "valencia_url_in_b64_out.jsonl", } + def execute_benchmark( base_url: str, model: str, From 35bb2e2f396bc056e7e108eac756f28b5609e4ac Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Tue, 4 Nov 2025 09:35:00 +0000 Subject: [PATCH 25/46] chore: Removed dataset file as it was relocated to a different folder Signed-off-by: Christian Pinto --- .../ado_actuators/vllm_performance/geospatial_india.jsonl | 1 - 1 file changed, 1 deletion(-) delete mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl deleted file mode 100644 index 693bbc09..00000000 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"prompt":{"data": {"data": "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif","data_format": "url","out_data_format": "b64_json","indices": [1, 2, 3, 8, 11, 12]},"priority": 0,"softmax": false}} From c68ab3540c06788f5249cdb5bd21c87be05ec7f3 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Tue, 4 Nov 2025 16:03:38 +0000 Subject: [PATCH 26/46] feat: Added custom dataset geospatial experiment Signed-off-by: Christian Pinto --- .../vllm_performance/actuator.py | 1 + .../vllm_performance/experiment_executor.py | 5 +- .../performance_testing_geospatial.yaml | 220 +++++++++++++++++- .../execute_benchmark.py | 71 +++--- 4 files changed, 268 insertions(+), 29 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py index dd45091f..38e9f47a 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py @@ -195,6 +195,7 @@ async def submit( if experiment.identifier in [ "performance-testing-full", "performance-testing-geospatial-full", + "performance-testing-geospatial-full-custom-dataset", ]: if not self.env_manager: raise MissingConfigurationForExperimentError( diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index 693bdb58..d588a884 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -285,7 +285,10 @@ def run_resource_and_workload_experiment( start = time.time() result = None try: - if experiment.identifier == "performance-testing-geospatial-full": + if experiment.identifier in [ + "performance-testing-geospatial-full", + "performance-testing-geospatial-full-custom-dataset", + ]: result = execute_geospatial_benchmark( base_url=base_url, model=values.get("model"), diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index 5d976439..9edd8668 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -216,7 +216,7 @@ performance_testing-geospatial-full: values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ] - identifier: 'skip_tokenizer_init' metadata: - description: "(deployment) skip tokenizer intialization" + description: "(deployment) skip tokenizer initialization" propertyDomain: variableType: BINARY_VARIABLE_TYPE values: [True, False] @@ -286,6 +286,224 @@ performance_testing-geospatial-full: identifier: 'dataset' value: 'india_url_in_b64_out' # measurements + targetProperties: + - identifier: "duration" + - identifier: "completed" + - identifier: "total_input_tokens" + - identifier: "total_output_tokens" + - identifier: "request_throughput" + - identifier: "output_throughput" + - identifier: "total_token_throughput" + - identifier: "mean_ttft_ms" + - identifier: "median_ttft_ms" + - identifier: "std_ttft_ms" + - identifier: "p25_ttft_ms" + - identifier: "p50_ttft_ms" + - identifier: "p75_ttft_ms" + - identifier: "p99_ttft_ms" + - identifier: "mean_tpot_ms" + - identifier: "median_tpot_ms" + - identifier: "std_tpot_ms" + - identifier: "p25_tpot_ms" + - identifier: "p50_tpot_ms" + - identifier: "p75_tpot_ms" + - identifier: "p99_tpot_ms" + - identifier: "mean_itl_ms" + - identifier: "median_itl_ms" + - identifier: "std_itl_ms" + - identifier: "p25_itl_ms" + - identifier: "p50_itl_ms" + - identifier: "p75_itl_ms" + - identifier: "p99_itl_ms" + - identifier: "mean_e2el_ms" + - identifier: "median_e2el_ms" + - identifier: "std_e2el_ms" + - identifier: "p25_e2el_ms" + - identifier: "p50_e2el_ms" + - identifier: "p75_e2el_ms" + - identifier: "p99_e2el_ms" + metadata: + description: 'VLLM performance testing across compute resource and workload configuration' +performance_testing-geospatial-full-custom-dataset: + identifier: performance-testing-geospatial-full-custom-dataset + actuatorIdentifier: "vllm_performance" + requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values + - identifier: 'model' + metadata: + description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ] + - identifier: 'request_rate' + metadata: + description: "(benchmark) The number of requests to send per second" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [-1,1000] + interval: 1 # -1 means send all requests at time 0 + optionalProperties: + - identifier: 'num_prompts' + metadata: + description: "(benchmark) The number of prompts to send (total number of requests)" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1,10001] + interval: 1 + - identifier: 'max_concurrency' + metadata: + description: "(benchmark) The maximum number of concurrent requests to send" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ -1, 500 ] # -1 means no concurrency control + interval: 1 + - identifier: 'burstiness' + metadata: + description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness." + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ 0, 10 ] + interval: 1 + - identifier: image + metadata: + description: "(deployment) Docker image to use to create vllm deployments" + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ] + - identifier: n_cpus + metadata: + description: "(deployment) the number of CPUs to use" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ 1,17 ] + interval: 1 + - identifier: memory + metadata: + description: "(deployment) the amount of memory to allocate to vLLM pod" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ "64Gi", "128Gi", "256Gi" ] + - identifier: dtype + metadata: + description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models." + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ "auto", "half", "float16", "bfloat16", "float", "float32" ] + - identifier: 'gpu_memory_utilization' + metadata: + description: "(deployment) The fraction of GPU memory to be used for the model executor," + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + values: [ .5, .75, .9 ] + - identifier: 'cpu_offload' + metadata: + description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU," + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + values: [ 0, 8, 16, 24, 32 ] + - identifier: 'max_num_seq' + metadata: + description: "(deployment) Maximum number of sequences per iteration" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [32,2049] + interval: 32 + - identifier: 'max_batch_tokens' + metadata: + description: "(deployment) maximum number of batched tokens per iteration" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [ 8192, 32769] + interval: 1024 + - identifier: 'n_gpus' + metadata: + description: "(deployment) Number of GPUs to use" + propertyDomain: + variableType: 'DISCRETE_VARIABLE_TYPE' + domainRange: [1,9] + interval: 1 + - identifier: 'gpu_type' + metadata: + description: "(deployment) The GPU type to use" + propertyDomain: + variableType: "CATEGORICAL_VARIABLE_TYPE" + values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ] + - identifier: 'skip_tokenizer_init' + metadata: + description: "(deployment) skip tokenizer initialization" + propertyDomain: + variableType: BINARY_VARIABLE_TYPE + values: [True, False] + - identifier: 'enforce_eager' + metadata: + description: "(deployment) enforce pytorch eager mode" + propertyDomain: + variableType: BINARY_VARIABLE_TYPE + values: [True, False] + - identifier: 'io_processor_plugin' + metadata: + description: 'IO Pocessor plugin to load for the model' + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: [ None, "terratorch_segmentation" ] + - identifier: 'dataset' + metadata: + description: "(benchmark) The dataset to be used for the experiment" + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + defaultParameterization: + - property: + identifier: 'image' + value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" + - property: + identifier: n_cpus + value: 8 + - property: + identifier: + memory + value: "128Gi" + - property: + identifier: dtype + value: "auto" + - property: + identifier: 'num_prompts' + value: 500 + - property: + identifier: 'max_concurrency' + value: -1 + - property: + identifier: 'burstiness' + value: 1.0 + - property: + identifier: 'gpu_memory_utilization' + value: .9 + - property: + identifier: 'cpu_offload' + value: 0 + - property: + identifier: 'max_num_seq' + value: 256 + - property: + identifier: 'max_batch_tokens' + value: 16384 + - property: + identifier: 'n_gpus' + value: 1 + - property: + identifier: 'gpu_type' + value: 'NVIDIA-A100-80GB-PCIe' + - property: + identifier: 'skip_tokenizer_init' + value: True + - property: + identifier: 'enforce_eager' + value: True + - property: + identifier: 'io_processor_plugin' + value: "terratorch_segmentation" + - property: + identifier: 'dataset' + value: None + # measurements targetProperties: - identifier: "duration" - identifier: "completed" diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index e3d39cc8..8b651cfc 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -12,6 +12,8 @@ get_results, ) +logger = logging.getLogger("vllm-bench") + default_geospatial_datasets_filenames = { "india_url_in_b64_out": "india_url_in_b64_out.jsonl", "valencia_url_in_b64_out": "valencia_url_in_b64_out.jsonl", @@ -51,7 +53,6 @@ def execute_benchmark( keys are vllm benchmark arguments. values are the values to pass to the arguments :return: results dictionary """ - logger = logging.getLogger("vllm-bench") logger.debug( f"executing benchmark, invoking service at {base_url} with the parameters: " @@ -181,34 +182,50 @@ def execute_geospatial_benchmark( :param output_token_length: length of output tokens :return: results dictionary """ - from importlib import resources - - dataset_filename = default_geospatial_datasets_filenames[dataset] - - with resources.path( - "ado_actuators.vllm_performance.datasets", - dataset_filename, - ) as data_set_path: - return execute_benchmark( - base_url=base_url, - backend="io-processor-plugin", - model=model, - data_set="custom", - interpreter=interpreter, - num_prompts=num_prompts, - request_rate=request_rate, - max_concurrency=max_concurrency, - hf_token=hf_token, - benchmark_retries=benchmark_retries, - retries_timeout=retries_timeout, - burstiness=burstiness, - custom_args={ - "--dataset-path": data_set_path, - "--endpoint": "/pooling", - "--skip-tokenizer-init": True, - }, + + if dataset in default_geospatial_datasets_filenames: + from pathlib import Path + + dataset_filename = default_geospatial_datasets_filenames[dataset] + parent_path = Path(__file__).parents[1].absolute() + data_set_path = os.path.join(parent_path, "datasets", dataset_filename) + else: + # This can only happen with the performance-testing-geospatial-full-custom-dataset + # experiment, otherwise the dataset name is always one of the allowed ones. + # Here the assumption is that the dataset file is placed in the process working directory. + ray_working_dir = os.getcwd() + data_set_path = os.path.join(ray_working_dir, dataset) + + if not os.path.exists(data_set_path) or not os.path.isfile(data_set_path): + logger.warning( + f"The dataset filename provided does not exist or does not point to a valid file: {data_set_path}" + ) + raise Exception( + f"The dataset filename provided does not exist or does not point to a valid file: {data_set_path}" ) + logger.debug(f"Dataset path {data_set_path}") + + return execute_benchmark( + base_url=base_url, + backend="io-processor-plugin", + model=model, + data_set="custom", + interpreter=interpreter, + num_prompts=num_prompts, + request_rate=request_rate, + max_concurrency=max_concurrency, + hf_token=hf_token, + benchmark_retries=benchmark_retries, + retries_timeout=retries_timeout, + burstiness=burstiness, + custom_args={ + "--dataset-path": data_set_path, + "--endpoint": "/pooling", + "--skip-tokenizer-init": True, + }, + ) + if __name__ == "__main__": results = execute_geospatial_benchmark( From 41815b6073d83570962f11468be04c7ca9ed782a Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Thu, 6 Nov 2025 13:28:51 +0000 Subject: [PATCH 27/46] fix: Reworked vllm_catalog actuator experiments catalog loading Signed-off-by: Christian Pinto --- .../vllm_performance/actuator.py | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py index 38e9f47a..ec38f1fe 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py @@ -3,8 +3,8 @@ import json import logging -import os import uuid +from pathlib import Path import ray import yaml @@ -57,27 +57,24 @@ def catalog( # Loading experiment definitions for yaml files contained in the `experiments` directory. # NOTE: Only files can be placed in the experiments directory, # but each file can contain multiple experiment definitions - path = os.path.abspath(__file__) - exp_dir = os.path.join(os.path.split(path)[0], "experiments") + curr_path = Path(__file__) + exp_dir = curr_path.parent / Path("experiments") + logger.debug(f"Experiments dir {exp_dir.absolute()}") experiments = [] - for exp_file in os.listdir(exp_dir): - logger.debug(f"Loading experiments from {exp_file}") - exp_file_path = os.path.join(exp_dir, exp_file) - if os.path.isdir(exp_file_path): - logger.error( - f"{exp_file_path} is a directory. Only files are supported in the experiments directory" - ) - raise Exception( - f"{exp_file_path} is a directory. Only files are supported in the experiments directory" - ) - with open(exp_file_path) as f: - try: - data = yaml.safe_load(f) - except yaml.YAMLError as e: - logger.error(f"File {exp_file} is a malformed YAML - {e}") - raise Exception(f"File {exp_file} is a malformed YAML - {e}") + for exp_file in exp_dir.iterdir(): + if exp_file.is_dir(): + continue - experiments.extend([Experiment(**data[e]) for e in data]) + logger.debug(f"Loading experiments from {exp_file.name}") + try: + file_data = exp_file.read_text() + data = yaml.safe_load(file_data) + except yaml.YAMLError: + error_message = f"File {exp_file.name} is a malformed YAML" + logger.error(error_message) + raise ValueError(error_message) + + experiments.extend([Experiment.model_validate(data[e]) for e in data]) return ExperimentCatalog( catalogIdentifier=cls.identifier, From 53bf77ae3423e3b9ab8290a5f5d9c5350e8830c3 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Thu, 6 Nov 2025 13:49:03 +0000 Subject: [PATCH 28/46] chore: Improved experiment definition language and properties typing Signed-off-by: Christian Pinto --- .../experiments/performance_testing.yaml | 7 ++++--- .../performance_testing_geospatial.yaml | 19 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml index c9537fd7..032bfec1 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml @@ -58,7 +58,7 @@ performance_testing-full: interval: 1 - identifier: 'dataset' metadata: - description: "(benchmark) The dataset to be used fof the experiment" + description: "(benchmark) The dataset to be used for the experiment" propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" values: [ 'random' ] @@ -128,7 +128,7 @@ performance_testing-full: values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ] - identifier: 'skip_tokenizer_init' metadata: - description: "(deployment) skip tokenizer intialization" + description: "(deployment) skip tokenizer initialization" propertyDomain: variableType: BINARY_VARIABLE_TYPE values: [True, False] @@ -248,6 +248,7 @@ performance_testing-endpoint: description: 'The endpoint(s) to test' propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: ["http://localhost:8000"] - identifier: 'request_rate' metadata: description: "The number of requests to send per second" @@ -293,7 +294,7 @@ performance_testing-endpoint: interval: 1 - identifier: 'dataset' metadata: - description: "(benchmark) The dataset to be used fof the experiment" + description: "(benchmark) The dataset to be used for the experiment" propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" values: [ 'random' ] diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index 9edd8668..29d3a81d 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -17,7 +17,8 @@ performance_testing-geospatial-endpoint: metadata: description: 'The endpoint(s) to test' propertyDomain: - variableType: "UNKNOWN_VARIABLE_TYPE" + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: ["http://localhost:8000"] - identifier: 'request_rate' metadata: description: "The number of requests to send per second" @@ -341,6 +342,12 @@ performance_testing-geospatial-full-custom-dataset: variableType: 'DISCRETE_VARIABLE_TYPE' domainRange: [-1,1000] interval: 1 # -1 means send all requests at time 0 + - identifier: 'dataset' + metadata: + description: "(benchmark) The dataset to be used for the experiment" + propertyDomain: + variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: [" custom_dataset.jsonl "] optionalProperties: - identifier: 'num_prompts' metadata: @@ -444,12 +451,7 @@ performance_testing-geospatial-full-custom-dataset: description: 'IO Pocessor plugin to load for the model' propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: [ None, "terratorch_segmentation" ] - - identifier: 'dataset' - metadata: - description: "(benchmark) The dataset to be used for the experiment" - propertyDomain: - variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" + values: [ "terratorch_segmentation" ] defaultParameterization: - property: identifier: 'image' @@ -500,9 +502,6 @@ performance_testing-geospatial-full-custom-dataset: - property: identifier: 'io_processor_plugin' value: "terratorch_segmentation" - - property: - identifier: 'dataset' - value: None # measurements targetProperties: - identifier: "duration" From 18f217c331e55781d07453a4548115e89bf7abc2 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Thu, 6 Nov 2025 13:56:19 +0000 Subject: [PATCH 29/46] fix: Improved logic for fetching a dataset in the geospatial benchmark Signed-off-by: Christian Pinto --- .../k8/yaml_support/build_components.py | 11 +------ .../execute_benchmark.py | 33 ++++++++----------- 2 files changed, 14 insertions(+), 30 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py index ffca8985..36ab4fe0 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py @@ -183,16 +183,7 @@ def deployment_yaml( # command container["command"] = ["vllm", "serve"] container["args"] = vllm_serve_args - # env variables to to set parameters for docker execution - # container["env"] = [ - # {"name": "MODEL", "value": model}, - # {"name": "GPU_MEMORY_UTILIZATION", "value": str(gpu_memory_utilization)}, - # {"name": "DTYPE", "value": dtype.value}, - # {"name": "CPU_OFFLOAD_GB", "value": str(cpu_offload)}, - # {"name": "MAX_NUM_BATCHED_TOKENS", "value": str(max_batch_tokens)}, - # {"name": "MAX_NUM_SEQ", "value": str(max_num_seq)}, - # {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)}, - # ] + if hf_token is not None: container["env"] = [{"name": "HF_TOKEN", "value": hf_token}] if claim_name is not None: diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index a23285fe..0120694c 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -63,13 +63,7 @@ def execute_benchmark( logger.debug( f"request_rate {request_rate}, max_concurrency {max_concurrency}, benchmark retries {benchmark_retries}" ) - # The code below is commented as we are switching from a script invocation to command line - # invocation. If we want to bring back script execution for any reason, this code must be - # uncommented - # parameters - # code = os.path.abspath( - # os.path.join(os.path.dirname(__file__), "benchmark_serving.py") - # ) + request = f"export HF_TOKEN={hf_token} && " if hf_token is not None else "" f_name = f"{uuid.uuid4().hex}.json" request += ( @@ -182,27 +176,26 @@ def execute_geospatial_benchmark( :param output_token_length: length of output tokens :return: results dictionary """ + from pathlib import Path if dataset in default_geospatial_datasets_filenames: - from pathlib import Path - dataset_filename = default_geospatial_datasets_filenames[dataset] - parent_path = Path(__file__).parents[1].absolute() - data_set_path = os.path.join(parent_path, "datasets", dataset_filename) + parent_path = Path(__file__).parents[1] + data_set_path = parent_path / "datasets" / dataset_filename else: # This can only happen with the performance-testing-geospatial-full-custom-dataset # experiment, otherwise the dataset name is always one of the allowed ones. # Here the assumption is that the dataset file is placed in the process working directory. - ray_working_dir = os.getcwd() - data_set_path = os.path.join(ray_working_dir, dataset) + ray_working_dir = Path.cwd() + data_set_path = ray_working_dir / dataset - if not os.path.exists(data_set_path) or not os.path.isfile(data_set_path): - logger.warning( - f"The dataset filename provided does not exist or does not point to a valid file: {data_set_path}" - ) - raise Exception( - f"The dataset filename provided does not exist or does not point to a valid file: {data_set_path}" + if not data_set_path.is_file(): + error_string = ( + "The dataset filename provided does not exist or " + f"does not point to a valid file: {data_set_path}" ) + logger.warning(error_string) + raise ValueError(error_string) logger.debug(f"Dataset path {data_set_path}") @@ -220,7 +213,7 @@ def execute_geospatial_benchmark( retries_timeout=retries_timeout, burstiness=burstiness, custom_args={ - "--dataset-path": data_set_path, + "--dataset-path": f"{data_set_path.resolve()}", "--endpoint": "/pooling", "--skip-tokenizer-init": True, }, From e93779454af7494a7de4d152bb7ff106fc6d1bb1 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Thu, 6 Nov 2025 16:24:28 +0000 Subject: [PATCH 30/46] chore: various fixes around after review Signed-off-by: Christian Pinto --- .../experiments/performance_testing.yaml | 2 +- .../performance_testing_geospatial.yaml | 8 ++-- .../execute_benchmark.py | 42 +++++++++---------- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml index 032bfec1..a60a17d4 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml @@ -134,7 +134,7 @@ performance_testing-full: values: [True, False] - identifier: 'enforce_eager' metadata: - description: "(deployment) enforce pytorch eager mode" + description: "(deployment) enforce PyTorch eager mode" propertyDomain: variableType: BINARY_VARIABLE_TYPE values: [True, False] diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index 29d3a81d..43f8e3e2 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -229,7 +229,7 @@ performance_testing-geospatial-full: values: [True, False] - identifier: 'io_processor_plugin' metadata: - description: 'IO Pocessor plugin to load for the model' + description: 'IO Processor plugin to load for the model' propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: [ None, "terratorch_segmentation" ] @@ -347,7 +347,7 @@ performance_testing-geospatial-full-custom-dataset: description: "(benchmark) The dataset to be used for the experiment" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: [" custom_dataset.jsonl "] + values: ["custom_dataset.jsonl"] optionalProperties: - identifier: 'num_prompts' metadata: @@ -442,13 +442,13 @@ performance_testing-geospatial-full-custom-dataset: values: [True, False] - identifier: 'enforce_eager' metadata: - description: "(deployment) enforce pytorch eager mode" + description: "(deployment) enforce PyTorch eager mode" propertyDomain: variableType: BINARY_VARIABLE_TYPE values: [True, False] - identifier: 'io_processor_plugin' metadata: - description: 'IO Pocessor plugin to load for the model' + description: 'IO Processor plugin to load for the model' propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: [ "terratorch_segmentation" ] diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index 0120694c..028abdd3 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -23,7 +23,7 @@ def execute_benchmark( base_url: str, model: str, - data_set: str, + dataset: str, backend: str = "openai", interpreter: str = "python", num_prompts: int = 500, @@ -32,7 +32,7 @@ def execute_benchmark( hf_token: str | None = None, benchmark_retries: int = 3, retries_timeout: int = 5, - data_set_path: str | None = None, + dataset_path: str | None = None, custom_args: dict[str, Any] | None = None, burstiness: float = 1, ) -> dict[str, Any]: @@ -40,7 +40,7 @@ def execute_benchmark( Execute benchmark :param base_url: url for vllm endpoint :param model: model - :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"] + :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"] :param interpreter - name of Python interpreter :param num_prompts: number of prompts :param request_rate: request rate @@ -48,7 +48,7 @@ def execute_benchmark( :param hf_token: huggingface token :param benchmark_retries: number of benchmark execution retries :param retries_timeout: timeout between initial retry - :param data_set_path: path to the dataset + :param dataset_path: path to the dataset :param custom_args: custom arguments to pass to the benchmark. keys are vllm benchmark arguments. values are the values to pass to the arguments :return: results dictionary @@ -58,7 +58,7 @@ def execute_benchmark( f"executing benchmark, invoking service at {base_url} with the parameters: " ) logger.debug( - f"model {model}, data set {data_set}, python {interpreter}, num prompts {num_prompts}" + f"model {model}, data set {dataset}, python {interpreter}, num prompts {num_prompts}" ) logger.debug( f"request_rate {request_rate}, max_concurrency {max_concurrency}, benchmark retries {benchmark_retries}" @@ -67,16 +67,14 @@ def execute_benchmark( request = f"export HF_TOKEN={hf_token} && " if hf_token is not None else "" f_name = f"{uuid.uuid4().hex}.json" request += ( - # changing from script invocation to cli invocation - # f"{interpreter} {code} --backend openai --base-url {base_url} --dataset-name {data_set} " - f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {data_set} " + f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {dataset} " f"--model {model} --seed 12345 --num-prompts {num_prompts!s} --save-result --metric-percentiles " f'"25,75,99" --percentile-metrics "ttft,tpot,itl,e2el" --result-dir . --result-filename {f_name} ' f"--burstiness {burstiness} " ) - if data_set_path is not None: - request += f" --dataset-path {data_set_path} " + if dataset_path is not None: + request += f" --dataset-path {dataset_path} " if request_rate is not None: request += f" --request-rate {request_rate!s} " if max_concurrency is not None: @@ -123,7 +121,7 @@ def execute_random_benchmark( Execute benchmark with random dataset :param base_url: url for vllm endpoint :param model: model - :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"] + :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"] :param hf_token: huggingface token :param benchmark_retries: number of benchmark execution retries :param retries_timeout: timeout between initial retry @@ -135,7 +133,7 @@ def execute_random_benchmark( return execute_benchmark( base_url=base_url, model=model, - data_set=dataset, + dataset=dataset, interpreter=interpreter, num_prompts=num_prompts, request_rate=request_rate, @@ -168,12 +166,12 @@ def execute_geospatial_benchmark( Execute benchmark with random dataset :param base_url: url for vllm endpoint :param model: model - :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"] + :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"] :param hf_token: huggingface token :param benchmark_retries: number of benchmark execution retries :param retries_timeout: timeout between initial retry - :param input_token_length: length of input tokens - :param output_token_length: length of output tokens + :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1 + :param interpreter: python interpreter to use :return: results dictionary """ from pathlib import Path @@ -181,29 +179,29 @@ def execute_geospatial_benchmark( if dataset in default_geospatial_datasets_filenames: dataset_filename = default_geospatial_datasets_filenames[dataset] parent_path = Path(__file__).parents[1] - data_set_path = parent_path / "datasets" / dataset_filename + dataset_path = parent_path / "datasets" / dataset_filename else: # This can only happen with the performance-testing-geospatial-full-custom-dataset # experiment, otherwise the dataset name is always one of the allowed ones. # Here the assumption is that the dataset file is placed in the process working directory. ray_working_dir = Path.cwd() - data_set_path = ray_working_dir / dataset + dataset_path = ray_working_dir / dataset - if not data_set_path.is_file(): + if not dataset_path.is_file(): error_string = ( "The dataset filename provided does not exist or " - f"does not point to a valid file: {data_set_path}" + f"does not point to a valid file: {dataset_path}" ) logger.warning(error_string) raise ValueError(error_string) - logger.debug(f"Dataset path {data_set_path}") + logger.debug(f"Dataset path {dataset_path}") return execute_benchmark( base_url=base_url, backend="io-processor-plugin", model=model, - data_set="custom", + dataset="custom", interpreter=interpreter, num_prompts=num_prompts, request_rate=request_rate, @@ -213,7 +211,7 @@ def execute_geospatial_benchmark( retries_timeout=retries_timeout, burstiness=burstiness, custom_args={ - "--dataset-path": f"{data_set_path.resolve()}", + "--dataset-path": f"{dataset_path.resolve()}", "--endpoint": "/pooling", "--skip-tokenizer-init": True, }, From bad013389fbd453c271ca752778abc6fa4742d62 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Fri, 7 Nov 2025 11:37:47 +0000 Subject: [PATCH 31/46] chore(performance_testing_geospatial): removing target properties that are irrelevant to geospatial tests Signed-off-by: Christian Pinto --- .../performance_testing_geospatial.yaml | 69 ------------------- 1 file changed, 69 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index 43f8e3e2..65ee2733 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -74,29 +74,6 @@ performance_testing-geospatial-endpoint: - identifier: "total_input_tokens" - identifier: "total_output_tokens" - identifier: "request_throughput" - - identifier: "output_throughput" - - identifier: "total_token_throughput" - - identifier: "mean_ttft_ms" - - identifier: "median_ttft_ms" - - identifier: "std_ttft_ms" - - identifier: "p25_ttft_ms" - - identifier: "p50_ttft_ms" - - identifier: "p75_ttft_ms" - - identifier: "p99_ttft_ms" - - identifier: "mean_tpot_ms" - - identifier: "median_tpot_ms" - - identifier: "std_tpot_ms" - - identifier: "p25_tpot_ms" - - identifier: "p50_tpot_ms" - - identifier: "p75_tpot_ms" - - identifier: "p99_tpot_ms" - - identifier: "mean_itl_ms" - - identifier: "median_itl_ms" - - identifier: "std_itl_ms" - - identifier: "p25_itl_ms" - - identifier: "p50_itl_ms" - - identifier: "p75_itl_ms" - - identifier: "p99_itl_ms" - identifier: "mean_e2el_ms" - identifier: "median_e2el_ms" - identifier: "std_e2el_ms" @@ -293,29 +270,6 @@ performance_testing-geospatial-full: - identifier: "total_input_tokens" - identifier: "total_output_tokens" - identifier: "request_throughput" - - identifier: "output_throughput" - - identifier: "total_token_throughput" - - identifier: "mean_ttft_ms" - - identifier: "median_ttft_ms" - - identifier: "std_ttft_ms" - - identifier: "p25_ttft_ms" - - identifier: "p50_ttft_ms" - - identifier: "p75_ttft_ms" - - identifier: "p99_ttft_ms" - - identifier: "mean_tpot_ms" - - identifier: "median_tpot_ms" - - identifier: "std_tpot_ms" - - identifier: "p25_tpot_ms" - - identifier: "p50_tpot_ms" - - identifier: "p75_tpot_ms" - - identifier: "p99_tpot_ms" - - identifier: "mean_itl_ms" - - identifier: "median_itl_ms" - - identifier: "std_itl_ms" - - identifier: "p25_itl_ms" - - identifier: "p50_itl_ms" - - identifier: "p75_itl_ms" - - identifier: "p99_itl_ms" - identifier: "mean_e2el_ms" - identifier: "median_e2el_ms" - identifier: "std_e2el_ms" @@ -509,29 +463,6 @@ performance_testing-geospatial-full-custom-dataset: - identifier: "total_input_tokens" - identifier: "total_output_tokens" - identifier: "request_throughput" - - identifier: "output_throughput" - - identifier: "total_token_throughput" - - identifier: "mean_ttft_ms" - - identifier: "median_ttft_ms" - - identifier: "std_ttft_ms" - - identifier: "p25_ttft_ms" - - identifier: "p50_ttft_ms" - - identifier: "p75_ttft_ms" - - identifier: "p99_ttft_ms" - - identifier: "mean_tpot_ms" - - identifier: "median_tpot_ms" - - identifier: "std_tpot_ms" - - identifier: "p25_tpot_ms" - - identifier: "p50_tpot_ms" - - identifier: "p75_tpot_ms" - - identifier: "p99_tpot_ms" - - identifier: "mean_itl_ms" - - identifier: "median_itl_ms" - - identifier: "std_itl_ms" - - identifier: "p25_itl_ms" - - identifier: "p50_itl_ms" - - identifier: "p75_itl_ms" - - identifier: "p99_itl_ms" - identifier: "mean_e2el_ms" - identifier: "median_e2el_ms" - identifier: "std_e2el_ms" From 164e13e35e7750777616f7210e08e34b991644b6 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Fri, 7 Nov 2025 11:39:20 +0000 Subject: [PATCH 32/46] chore(execute_benchmark): docstrings cleanup Signed-off-by: Christian Pinto --- .../execute_benchmark.py | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index 028abdd3..839aa528 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -41,16 +41,19 @@ def execute_benchmark( :param base_url: url for vllm endpoint :param model: model :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"] - :param interpreter - name of Python interpreter + :param backend: name of the vLLM benchmark backend to be used ["vllm", "openai", "openai-chat", "openai-audio", "openai-embeddings"] + :param interpreter: name of Python interpreter :param num_prompts: number of prompts :param request_rate: request rate - :param max_concurrency: max concurrency + :param max_concurrency: maximum number of concurrent requests :param hf_token: huggingface token :param benchmark_retries: number of benchmark execution retries :param retries_timeout: timeout between initial retry :param dataset_path: path to the dataset :param custom_args: custom arguments to pass to the benchmark. + :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1 keys are vllm benchmark arguments. values are the values to pass to the arguments + :return: results dictionary """ @@ -68,7 +71,7 @@ def execute_benchmark( f_name = f"{uuid.uuid4().hex}.json" request += ( f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {dataset} " - f"--model {model} --seed 12345 --num-prompts {num_prompts!s} --save-result --metric-percentiles " + f"--model {model} --seed 12345 --num-prompts 10 --save-result --metric-percentiles " f'"25,75,99" --percentile-metrics "ttft,tpot,itl,e2el" --result-dir . --result-filename {f_name} ' f"--burstiness {burstiness} " ) @@ -122,11 +125,17 @@ def execute_random_benchmark( :param base_url: url for vllm endpoint :param model: model :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"] + :param num_prompts: number of prompts + :param request_rate: request rate + :param max_concurrency: maximum number of concurrent requests :param hf_token: huggingface token :param benchmark_retries: number of benchmark execution retries :param retries_timeout: timeout between initial retry - :param input_token_length: length of input tokens - :param output_token_length: length of output tokens + :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1 + :param number_input_tokens: maximum number of input tokens for each request, + :param max_output_tokens: maximum number of output tokens for each request, + :param interpreter: name of Python interpreter + :return: results dictionary """ # Call execute_benchmark with the appropriate arguments @@ -167,11 +176,15 @@ def execute_geospatial_benchmark( :param base_url: url for vllm endpoint :param model: model :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"] + :param num_prompts: number of prompts + :param request_rate: request rate + :param max_concurrency: maximum number of concurrent requests :param hf_token: huggingface token :param benchmark_retries: number of benchmark execution retries :param retries_timeout: timeout between initial retry :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1 :param interpreter: python interpreter to use + :return: results dictionary """ from pathlib import Path From 7da44ddd98b31075d200163fe3f22ba7c676a570 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Wed, 19 Nov 2025 11:33:07 +0000 Subject: [PATCH 33/46] feat(group_samplers): Improved GroupSampler groups building process Moved to reading the entities from storage only when are returned by the generathor rather than getching the whole space when initializing the sampler. Signed-off-by: Christian Pinto --- .../core/discoveryspace/group_samplers.py | 96 +++++++++++-------- 1 file changed, 54 insertions(+), 42 deletions(-) diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py index 9a20b3c3..4149eccf 100644 --- a/orchestrator/core/discoveryspace/group_samplers.py +++ b/orchestrator/core/discoveryspace/group_samplers.py @@ -21,24 +21,18 @@ def _build_entity_group_values( - entity: Entity, group: list[str] + entity: dict, group: list[str] ) -> frozenset[tuple[str, Any]]: """ - :return: A frozen set of (key,value) paris + :return: A frozen set of (key,value) pairs """ # build a dictionary of entity values given the group - return frozenset( - { - (v.property.identifier, v.value) - for v in entity.constitutive_property_values - if v.property.identifier in group - } - ) + return frozenset({(k, v) for k, v in entity.items() if k in group}) def _build_groups_dict( - entities: list[Entity], group: list[str] -) -> dict[frozenset[tuple[str, Any]], list[Entity]]: + entities: list[dict], group: list[str] +) -> dict[frozenset[tuple[str, Any]], list[dict]]: """ builds a dict of lists of entities, combining entities based on group definitions :param entities: list of entities @@ -55,7 +49,7 @@ def _build_groups_dict( return groups -def _build_groups_list(entities: list[Entity], group: list[str]) -> list[list[Entity]]: +def _build_groups_list(entities: list[dict], group: list[str]) -> list[list[dict]]: """ builds a list of lists of entities, combining entities based on group definitions :param entities: list of entities @@ -68,7 +62,7 @@ def _build_groups_list(entities: list[Entity], group: list[str]) -> list[list[En async def _get_grouped_sample_async( generator: AsyncGenerator[list[Entity], None], -) -> list[Entity] | None: +) -> list[dict] | None: try: return await anext(generator) except (StopAsyncIteration, StopIteration): @@ -77,7 +71,7 @@ async def _get_grouped_sample_async( def _get_grouped_sample( generator: Generator[list[Entity], None, None], -) -> list[Entity] | None: +) -> list[dict] | None: try: return next(generator) except (StopAsyncIteration, StopIteration): @@ -85,8 +79,8 @@ def _get_grouped_sample( async def _sequential_iterator_async( - entities: list[Entity], group: list[str] -) -> AsyncGenerator[list[Entity], None]: + entities: list[dict], group: list[str] +) -> AsyncGenerator[list[dict], None]: """ Sequential iterator through discovery space with grouping :param entities: list of entities @@ -100,8 +94,8 @@ async def _sequential_iterator_async( def _sequential_iterator( - entities: list[Entity], group: list[str] -) -> Generator[list[Entity], None, None]: + entities: list[dict], group: list[str] +) -> Generator[list[dict], None, None]: """ Sequential iterator through discovery space with grouping :param entities: list of entities @@ -115,8 +109,8 @@ def _sequential_iterator( async def _random_iterator_async( - entities: list[Entity], group: list[str] -) -> AsyncGenerator[list[Entity], None]: + entities: list[dict], group: list[str] +) -> AsyncGenerator[list[dict], None]: """ Random iterator through discovery space with grouping :param entities: list of entities @@ -133,8 +127,8 @@ async def _random_iterator_async( def _random_iterator( - entities: list[Entity], group: list[str] -) -> Generator[list[Entity], None, None]: + entities: list[dict], group: list[str] +) -> Generator[list[dict], None, None]: """ Random iterator through discovery space with grouping :param entities: list of entities @@ -151,7 +145,9 @@ def _random_iterator( def _sequential_group_iterator( - generator: Generator[list[Entity], None, None], batchsize: int + generator: Generator[list[dict], None, None], + discoverySpace: DiscoverySpace, + batchsize: int, ) -> Generator[list[Entity], None, None]: """ Sequential group iterator @@ -174,8 +170,10 @@ def _sequential_group_iterator( # mark that we are done and break done = True break + # Retrieve entity from the store + entity = discoverySpace.entity_for_point(sample[0]) # append a new entity to batch - batch.append(sample[0]) + batch.append(entity) # remove entity from samples sample = sample[1:] # submit a batch and clean it up @@ -186,7 +184,9 @@ def _sequential_group_iterator( async def _sequential_group_iterator_async( - generator: AsyncGenerator[list[Entity], None], batchsize: int + generator: AsyncGenerator[list[Entity], None], + discoverySpaceManager: DiscoverySpaceManager, + batchsize: int, ) -> AsyncGenerator[list[Entity], None]: """ Async sequential group iterator @@ -210,8 +210,10 @@ async def _sequential_group_iterator_async( # mark that we are done done = True break + # Retrieve entity from the store + entity = ray.get(discoverySpaceManager.entity_for_point(sample[0])) # append a new entity to batch - batch.append(sample[0]) + batch.append(entity) # remove entity from samples sample = sample[1:] # submit a batch and clean it up @@ -283,7 +285,7 @@ async def remoteEntityIterator( remoteDiscoverySpace=remoteDiscoverySpace ) return _sequential_group_iterator_async( - generator=grooped_iterator, batchsize=batchsize + generator=grooped_iterator, discbatchsize=batchsize ) @@ -388,29 +390,24 @@ def _get_remote_space_entities( # build list of entities return self._get_space_entities(discoverySpace=dspace) - def _get_space_entities(self, discoverySpace: DiscoverySpace) -> list[Entity]: + def _get_space_entities(self, discoverySpace: DiscoverySpace) -> list[dict]: """ Building list of entities for a discovery space :param discoverySpace: discovery space :return: list of entities """ - # get entity space entity_space = discoverySpace.entitySpace - # create sampler generator - self.samplerCompatibleWithEntitySpace(entity_space) - # create iterator - iterator = super().entityIterator(discoverySpace, batchsize=1) - # get entities - entity_list = [] - for e in iterator: - entity_list.append(e[0]) - return entity_list + property_names = [c.identifier for c in entity_space.constitutiveProperties] + return [ + dict(zip(property_names, point)) + for point in entity_space.sequential_point_iterator() + ] def entityGroupIterator( self, discoverySpace: DiscoverySpace, - ) -> Generator[list[Entity], None, None]: + ) -> Generator[list[dict], None, None]: """Returns an iterator that samples groups of entities from a discoveryspace Note: The number of entities returned on each call to the iterator can vary as it depends on @@ -420,13 +417,26 @@ def entityGroupIterator( discoverySpace: An orchestrator.model.space.DiscoverySpace instance """ + entitySpace = discoverySpace.entitySpace + + if not ExplicitEntitySpaceGroupedGridSampleGenerator.samplerCompatibleWithEntitySpace( + entitySpace=entitySpace + ): + raise ValueError( + f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entitySpace}" + ) + + entities = self._get_space_entities(discoverySpace=discoverySpace) + def iterator_closure() -> Generator[list[Entity], None, None]: def sequential_iterator() -> Generator[list[Entity], None, None]: - entities = self._get_space_entities(discoverySpace=discoverySpace) return _sequential_iterator(entities=entities, group=self.group) def random_iterator() -> Generator[list[Entity], None, None]: - entities = self._get_space_entities(discoverySpace=discoverySpace) + import time + + now = time.perf_counter() + print(f"Getting all entities took {time.perf_counter() - now}") return _random_iterator(entities=entities, group=self.group) if self.mode == WalkModeEnum.SEQUENTIAL: @@ -490,7 +500,9 @@ def entityIterator( """Returns an iterator over a sequence of entities ordered by group""" grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace) return _sequential_group_iterator( - generator=grouped_iterator, batchsize=batchsize + generator=grouped_iterator, + batchsize=batchsize, + discoverySpace=discoverySpace, ) async def remoteEntityIterator( From 18a65c309b62bcd10516cd6ab95d437d22800e09 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Thu, 20 Nov 2025 11:06:56 +0000 Subject: [PATCH 34/46] fix(group_samplers): Last fixes to make the group samplers tests pass Signed-off-by: Christian Pinto --- .../core/discoveryspace/group_samplers.py | 154 ++++++++++-------- tests/core/test_group_samplers.py | 35 +++- 2 files changed, 118 insertions(+), 71 deletions(-) diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py index 4149eccf..dad9617d 100644 --- a/orchestrator/core/discoveryspace/group_samplers.py +++ b/orchestrator/core/discoveryspace/group_samplers.py @@ -20,19 +20,60 @@ moduleLog = logging.getLogger("groupsamplers") +def _get_space_entities(discoverySpace: DiscoverySpace) -> list[dict]: + """ + Building list of entities for a discovery space + + :param discoverySpace: discovery space + :return: list of entities + """ + entity_space = discoverySpace.entitySpace + property_names = [c.identifier for c in entity_space.constitutiveProperties] + return [ + dict(zip(property_names, point)) + for point in entity_space.sequential_point_iterator() + ] + + +def _get_remote_space_entities( + remoteDiscoverySpace: DiscoverySpaceManager, +) -> list[dict]: + """ + Building list of entities for a discovery space + + :param remoteDiscoverySpace: discovery space actor + :return: list of entities dict + """ + # get discovery space + # noinspection PyUnresolvedReferences + dspace = ray.get(remoteDiscoverySpace.discoverySpace.remote()) + # build list of entities + return _get_space_entities(discoverySpace=dspace) + + def _build_entity_group_values( - entity: dict, group: list[str] + entity: dict | Entity, group: list[str] ) -> frozenset[tuple[str, Any]]: """ :return: A frozen set of (key,value) pairs """ # build a dictionary of entity values given the group + if isinstance(entity, Entity): + return frozenset( + { + (v.property.identifier, v.value) + for v in entity.constitutive_property_values + if v.property.identifier in group + } + ) + # We get a dict in case of generator type of samplers where we defer fetching the + # full entity from store until it is returned by the iterator. return frozenset({(k, v) for k, v in entity.items() if k in group}) def _build_groups_dict( - entities: list[dict], group: list[str] -) -> dict[frozenset[tuple[str, Any]], list[dict]]: + entities: list[dict | Entity], group: list[str] +) -> dict[frozenset[tuple[str, Any]], list[dict | Entity]]: """ builds a dict of lists of entities, combining entities based on group definitions :param entities: list of entities @@ -49,19 +90,20 @@ def _build_groups_dict( return groups -def _build_groups_list(entities: list[dict], group: list[str]) -> list[list[dict]]: +def _build_groups_list( + entities: list[dict | Entity], group: list[str] +) -> list[list[dict | Entity]]: """ builds a list of lists of entities, combining entities based on group definitions :param entities: list of entities :param group: group definition :return: """ - return list(_build_groups_dict(entities=entities, group=group).values()) async def _get_grouped_sample_async( - generator: AsyncGenerator[list[Entity], None], + generator: AsyncGenerator[list[dict], None], ) -> list[dict] | None: try: return await anext(generator) @@ -79,8 +121,8 @@ def _get_grouped_sample( async def _sequential_iterator_async( - entities: list[dict], group: list[str] -) -> AsyncGenerator[list[dict], None]: + entities: list[dict | Entity], group: list[str] +) -> AsyncGenerator[list[dict | Entity], None]: """ Sequential iterator through discovery space with grouping :param entities: list of entities @@ -94,8 +136,8 @@ async def _sequential_iterator_async( def _sequential_iterator( - entities: list[dict], group: list[str] -) -> Generator[list[dict], None, None]: + entities: list[dict | Entity], group: list[str] +) -> Generator[list[dict | Entity], None, None]: """ Sequential iterator through discovery space with grouping :param entities: list of entities @@ -171,7 +213,11 @@ def _sequential_group_iterator( done = True break # Retrieve entity from the store - entity = discoverySpace.entity_for_point(sample[0]) + if type(sample[0]) is dict: + entity = discoverySpace.entity_for_point(sample[0]) + else: + # The sample is already an Entity + entity = sample[0] # append a new entity to batch batch.append(entity) # remove entity from samples @@ -184,8 +230,8 @@ def _sequential_group_iterator( async def _sequential_group_iterator_async( - generator: AsyncGenerator[list[Entity], None], - discoverySpaceManager: DiscoverySpaceManager, + generator: AsyncGenerator[list[dict], None], + remoteDiscoverySpace: DiscoverySpaceManager, batchsize: int, ) -> AsyncGenerator[list[Entity], None]: """ @@ -210,8 +256,13 @@ async def _sequential_group_iterator_async( # mark that we are done done = True break - # Retrieve entity from the store - entity = ray.get(discoverySpaceManager.entity_for_point(sample[0])) + if type(sample[0]) is dict: + # Retrieve entity from the store + entity = ray.get( + remoteDiscoverySpace.entity_for_point.remote(sample[0]) + ) + else: + entity = sample[0] # append a new entity to batch batch.append(entity) # remove entity from samples @@ -275,17 +326,21 @@ def entityIterator( ) -> Generator[list[Entity], None, None]: grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace) return _sequential_group_iterator( - generator=grouped_iterator, batchsize=batchsize + generator=grouped_iterator, + discoverySpace=discoverySpace, + batchsize=batchsize, ) async def remoteEntityIterator( self, remoteDiscoverySpace: DiscoverySpaceManager, batchsize=1 ) -> AsyncGenerator[list[Entity], None]: - grooped_iterator = await self.remoteEntityGroupIterator( + grouped_iterator = await self.remoteEntityGroupIterator( remoteDiscoverySpace=remoteDiscoverySpace ) return _sequential_group_iterator_async( - generator=grooped_iterator, discbatchsize=batchsize + generator=grouped_iterator, + remoteDiscoverySpace=remoteDiscoverySpace, + batchsize=batchsize, ) @@ -340,7 +395,9 @@ def entityIterator( ) -> Generator[list[Entity], None, None]: grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace) return _sequential_group_iterator( - generator=grouped_iterator, batchsize=batchsize + generator=grouped_iterator, + discoverySpace=discoverySpace, + batchsize=batchsize, ) async def remoteEntityIterator( @@ -350,7 +407,9 @@ async def remoteEntityIterator( remoteDiscoverySpace=remoteDiscoverySpace ) return _sequential_group_iterator_async( - generator=grouped_iterator, batchsize=batchsize + generator=grouped_iterator, + remoteDiscoverySpace=remoteDiscoverySpace, + batchsize=batchsize, ) @@ -375,35 +434,6 @@ def __init__(self, mode: WalkModeEnum, group: list[str]): f"Initializing ExplicitEntitySpaceGroupedGridSampleGenerator, group: {group}" ) - def _get_remote_space_entities( - self, discoverySpaceActor: DiscoverySpaceManager - ) -> list[Entity]: - """ - Building list of entities for a discovery space - - :param discoverySpaceActor: discovery space actor - :return: list of entities - """ - # get discovery space - # noinspection PyUnresolvedReferences - dspace = ray.get(discoverySpaceActor.discoverySpace.remote()) - # build list of entities - return self._get_space_entities(discoverySpace=dspace) - - def _get_space_entities(self, discoverySpace: DiscoverySpace) -> list[dict]: - """ - Building list of entities for a discovery space - - :param discoverySpace: discovery space - :return: list of entities - """ - entity_space = discoverySpace.entitySpace - property_names = [c.identifier for c in entity_space.constitutiveProperties] - return [ - dict(zip(property_names, point)) - for point in entity_space.sequential_point_iterator() - ] - def entityGroupIterator( self, discoverySpace: DiscoverySpace, @@ -426,7 +456,7 @@ def entityGroupIterator( f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entitySpace}" ) - entities = self._get_space_entities(discoverySpace=discoverySpace) + entities = _get_space_entities(discoverySpace=discoverySpace) def iterator_closure() -> Generator[list[Entity], None, None]: def sequential_iterator() -> Generator[list[Entity], None, None]: @@ -451,13 +481,11 @@ async def remoteEntityGroupIterator( """Returns an async iterator that returns groups of entities as defined by the instances group property""" async def iterator_closure( - spaceActor: DiscoverySpaceManager, + remoteDiscoverySpace: DiscoverySpaceManager, ) -> AsyncGenerator[list[Entity], None]: # noinspection PyUnresolvedReferences - entitySpace = await spaceActor.entitySpace.remote() - # noinspection PyUnresolvedReferences - measurementSpace = await spaceActor.measurementSpace.remote() + entitySpace = await remoteDiscoverySpace.entitySpace.remote() if not ExplicitEntitySpaceGroupedGridSampleGenerator.samplerCompatibleWithEntitySpace( entitySpace=entitySpace @@ -466,13 +494,9 @@ async def iterator_closure( f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entitySpace}" ) - observedProperties = [] - for experiment in measurementSpace.experiments: - observedProperties.extend(experiment.observedProperties) - def sequential_iterator() -> AsyncGenerator[list[Entity], None]: - entities = self._get_remote_space_entities( - discoverySpaceActor=spaceActor + entities = _get_remote_space_entities( + remoteDiscoverySpace=remoteDiscoverySpace ) return _sequential_iterator_async( entities=entities, @@ -480,8 +504,8 @@ def sequential_iterator() -> AsyncGenerator[list[Entity], None]: ) def random_iterator() -> AsyncGenerator[list[Entity], None]: - entities = self._get_remote_space_entities( - discoverySpaceActor=spaceActor + entities = _get_remote_space_entities( + remoteDiscoverySpace=remoteDiscoverySpace ) return _random_iterator_async( entities=entities, @@ -501,8 +525,8 @@ def entityIterator( grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace) return _sequential_group_iterator( generator=grouped_iterator, - batchsize=batchsize, discoverySpace=discoverySpace, + batchsize=batchsize, ) async def remoteEntityIterator( @@ -512,5 +536,7 @@ async def remoteEntityIterator( remoteDiscoverySpace=remoteDiscoverySpace ) return _sequential_group_iterator_async( - generator=grouped_iterator, batchsize=batchsize + generator=grouped_iterator, + remoteDiscoverySpace=remoteDiscoverySpace, + batchsize=batchsize, ) diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py index 97de8203..f712c5f1 100644 --- a/tests/core/test_group_samplers.py +++ b/tests/core/test_group_samplers.py @@ -20,6 +20,7 @@ from orchestrator.modules.operators.discovery_space_manager import ( DiscoverySpaceManager, ) +from orchestrator.schema.entity import Entity from orchestrator.schema.entityspace import EntitySpaceRepresentation @@ -49,14 +50,14 @@ def check_group_order( if isinstance(sampler, ExplicitEntitySpaceGroupedGridSampleGenerator): ids = [cp.identifier for cp in space.entitySpace.constitutiveProperties] entities = [ - space.entity_for_point(dict(zip(ids, p))) - for p in space.entitySpace.sequential_point_iterator() + dict(zip(ids, p)) for p in space.entitySpace.sequential_point_iterator() ] groups = _build_groups_dict(entities=entities, group=group) expected_group_order = list(groups.keys()) if sampler.mode == WalkModeEnum.RANDOM: assert group_order != expected_group_order else: + assert group_order == expected_group_order else: entities = space.matchingEntities() @@ -114,13 +115,23 @@ def test_group_sampler_local( for i, group in enumerate(sampler.entityGroupIterator(space)): count += len(group) for entity in group: - print(i, count, entity.identifier) + print(i, count, entity.identifier if isinstance(entity, Entity) else entity) node_value = { - e.valueForConstitutivePropertyIdentifier("nodes").value for e in group + ( + e["nodes"] + if type(e) is dict + else e.valueForConstitutivePropertyIdentifier("nodes").value + ) + for e in group } cpu_value = { - e.valueForConstitutivePropertyIdentifier("cpu_family").value for e in group + ( + e["cpu_family"] + if type(e) is dict + else e.valueForConstitutivePropertyIdentifier("cpu_family").value + ) + for e in group } assert ( @@ -215,10 +226,20 @@ async def test_group_sampler_remote( count += len(group) group_count += 1 node_value = { - e.valueForConstitutivePropertyIdentifier("nodes").value for e in group + ( + e["nodes"] + if type(e) is dict + else e.valueForConstitutivePropertyIdentifier("nodes").value + ) + for e in group } cpu_value = { - e.valueForConstitutivePropertyIdentifier("cpu_family").value for e in group + ( + e["cpu_family"] + if type(e) is dict + else e.valueForConstitutivePropertyIdentifier("cpu_family").value + ) + for e in group } assert ( From 8fa538ac6347d65fed6aec14d44db6b9d5cb462d Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Thu, 20 Nov 2025 13:57:28 +0000 Subject: [PATCH 35/46] chore(group_sampler): Generic cleanup Signed-off-by: Christian Pinto --- .../core/discoveryspace/group_samplers.py | 40 +++++-------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py index dad9617d..295f34f8 100644 --- a/orchestrator/core/discoveryspace/group_samplers.py +++ b/orchestrator/core/discoveryspace/group_samplers.py @@ -20,7 +20,7 @@ moduleLog = logging.getLogger("groupsamplers") -def _get_space_entities(discoverySpace: DiscoverySpace) -> list[dict]: +def _get_space_points(discoverySpace: DiscoverySpace) -> list[dict]: """ Building list of entities for a discovery space @@ -35,22 +35,6 @@ def _get_space_entities(discoverySpace: DiscoverySpace) -> list[dict]: ] -def _get_remote_space_entities( - remoteDiscoverySpace: DiscoverySpaceManager, -) -> list[dict]: - """ - Building list of entities for a discovery space - - :param remoteDiscoverySpace: discovery space actor - :return: list of entities dict - """ - # get discovery space - # noinspection PyUnresolvedReferences - dspace = ray.get(remoteDiscoverySpace.discoverySpace.remote()) - # build list of entities - return _get_space_entities(discoverySpace=dspace) - - def _build_entity_group_values( entity: dict | Entity, group: list[str] ) -> frozenset[tuple[str, Any]]: @@ -169,7 +153,8 @@ async def _random_iterator_async( def _random_iterator( - entities: list[dict], group: list[str] + entities: list[dict], + group: list[str], ) -> Generator[list[dict], None, None]: """ Random iterator through discovery space with grouping @@ -178,6 +163,7 @@ def _random_iterator( :return: """ group_list = _build_groups_list(entities=entities, group=group) + randomized = np.random.choice( a=range(len(group_list)), size=len(group_list), replace=False ) @@ -456,18 +442,18 @@ def entityGroupIterator( f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entitySpace}" ) - entities = _get_space_entities(discoverySpace=discoverySpace) + points = _get_space_points(discoverySpace=discoverySpace) def iterator_closure() -> Generator[list[Entity], None, None]: def sequential_iterator() -> Generator[list[Entity], None, None]: - return _sequential_iterator(entities=entities, group=self.group) + return _sequential_iterator(entities=points, group=self.group) def random_iterator() -> Generator[list[Entity], None, None]: import time now = time.perf_counter() print(f"Getting all entities took {time.perf_counter() - now}") - return _random_iterator(entities=entities, group=self.group) + return _random_iterator(entities=points, group=self.group) if self.mode == WalkModeEnum.SEQUENTIAL: return sequential_iterator() @@ -486,6 +472,8 @@ async def iterator_closure( # noinspection PyUnresolvedReferences entitySpace = await remoteDiscoverySpace.entitySpace.remote() + discoverySpace = await remoteDiscoverySpace.discoverySpace.remote() + points = _get_space_points(discoverySpace=discoverySpace) if not ExplicitEntitySpaceGroupedGridSampleGenerator.samplerCompatibleWithEntitySpace( entitySpace=entitySpace @@ -495,20 +483,14 @@ async def iterator_closure( ) def sequential_iterator() -> AsyncGenerator[list[Entity], None]: - entities = _get_remote_space_entities( - remoteDiscoverySpace=remoteDiscoverySpace - ) return _sequential_iterator_async( - entities=entities, + entities=points, group=self.group, ) def random_iterator() -> AsyncGenerator[list[Entity], None]: - entities = _get_remote_space_entities( - remoteDiscoverySpace=remoteDiscoverySpace - ) return _random_iterator_async( - entities=entities, + entities=points, group=self.group, ) From 3ed7ec603d88ab12a663d610c62713cad5aad803 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Fri, 21 Nov 2025 09:34:37 +0000 Subject: [PATCH 36/46] chore(group_sampler): Applied snake casing wherever possible Signed-off-by: Christian Pinto --- .../core/discoveryspace/group_samplers.py | 98 +++++++++---------- tests/core/test_group_samplers.py | 3 +- 2 files changed, 51 insertions(+), 50 deletions(-) diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py index 295f34f8..e142bb20 100644 --- a/orchestrator/core/discoveryspace/group_samplers.py +++ b/orchestrator/core/discoveryspace/group_samplers.py @@ -20,14 +20,14 @@ moduleLog = logging.getLogger("groupsamplers") -def _get_space_points(discoverySpace: DiscoverySpace) -> list[dict]: +def _get_space_points(discovery_space: DiscoverySpace) -> list[dict]: """ Building list of entities for a discovery space - :param discoverySpace: discovery space + :param discovery_space: discovery space :return: list of entities """ - entity_space = discoverySpace.entitySpace + entity_space = discovery_space.entitySpace property_names = [c.identifier for c in entity_space.constitutiveProperties] return [ dict(zip(property_names, point)) @@ -174,8 +174,8 @@ def _random_iterator( def _sequential_group_iterator( generator: Generator[list[dict], None, None], - discoverySpace: DiscoverySpace, - batchsize: int, + discovery_space: DiscoverySpace, + batch_size: int, ) -> Generator[list[Entity], None, None]: """ Sequential group iterator @@ -189,7 +189,7 @@ def _sequential_group_iterator( # loop while not done while not done: # loop through the batch size - for _ in range(batchsize): + for _ in range(batch_size): if len(sample) == 0: # get the new group sample = _get_grouped_sample(generator=generator) @@ -200,7 +200,7 @@ def _sequential_group_iterator( break # Retrieve entity from the store if type(sample[0]) is dict: - entity = discoverySpace.entity_for_point(sample[0]) + entity = discovery_space.entity_for_point(sample[0]) else: # The sample is already an Entity entity = sample[0] @@ -217,8 +217,8 @@ def _sequential_group_iterator( async def _sequential_group_iterator_async( generator: AsyncGenerator[list[dict], None], - remoteDiscoverySpace: DiscoverySpaceManager, - batchsize: int, + remote_discovery_space: DiscoverySpaceManager, + batch_size: int, ) -> AsyncGenerator[list[Entity], None]: """ Async sequential group iterator @@ -232,7 +232,7 @@ async def _sequential_group_iterator_async( # loop while not done while not done: # loop through the batch size - for _ in range(batchsize): + for _ in range(batch_size): if len(sample) == 0: # get the new group sample = await _get_grouped_sample_async(generator=generator) @@ -245,7 +245,7 @@ async def _sequential_group_iterator_async( if type(sample[0]) is dict: # Retrieve entity from the store entity = ray.get( - remoteDiscoverySpace.entity_for_point.remote(sample[0]) + remote_discovery_space.entity_for_point.remote(sample[0]) ) else: entity = sample[0] @@ -267,7 +267,7 @@ class SequentialGroupSampleSelector(GroupSampler): @classmethod def samplerCompatibleWithDiscoverySpaceRemote( - cls, remoteDiscoverySpace: DiscoverySpaceManager + cls, remote_discovery_space: DiscoverySpaceManager ): return True @@ -280,9 +280,9 @@ def __init__(self, group: list[str]): def entityGroupIterator( self, - discoverySpace: DiscoverySpace, + discovery_space: DiscoverySpace, ) -> Generator[list[Entity], None, None]: - """Returns an iterator that samples groups of entities from a discoveryspace + """Returns an iterator that samples groups of entities from a discovery space The group definition should be specified on initializing an instance of a subclass of this class @@ -290,9 +290,9 @@ def entityGroupIterator( the number of members of the associated group Parameters: - discoverySpace: An orchestrator.model.space.DiscoverySpace instance + discovery_space: An orchestrator.model.space.DiscoverySpace instance """ - entities = discoverySpace.matchingEntities() + entities = discovery_space.matchingEntities() return _sequential_iterator(entities=entities, group=self.group) async def remoteEntityGroupIterator( @@ -310,11 +310,11 @@ async def iterator_closure(): def entityIterator( self, discoverySpace: DiscoverySpace, batchsize=1 ) -> Generator[list[Entity], None, None]: - grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace) + grouped_iterator = self.entityGroupIterator(discovery_space=discoverySpace) return _sequential_group_iterator( generator=grouped_iterator, - discoverySpace=discoverySpace, - batchsize=batchsize, + discovery_space=discoverySpace, + batch_size=batchsize, ) async def remoteEntityIterator( @@ -325,8 +325,8 @@ async def remoteEntityIterator( ) return _sequential_group_iterator_async( generator=grouped_iterator, - remoteDiscoverySpace=remoteDiscoverySpace, - batchsize=batchsize, + remote_discovery_space=remoteDiscoverySpace, + batch_size=batchsize, ) @@ -352,7 +352,7 @@ def entityGroupIterator( self, discoverySpace: DiscoverySpace, ) -> Generator[list[Entity], None, None]: - """Returns an iterator that samples groups of entities from a discoveryspace + """Returns an iterator that samples groups of entities from a discovery space The group definition should be specified on initializing an instance of a subclass of this class @@ -360,7 +360,7 @@ def entityGroupIterator( the number of members of the associated group Parameters: - discoverySpace: An orchestrator.model.space.DiscoverySpace instance + discovery_space: An orchestrator.model.space.DiscoverySpace instance """ entities = discoverySpace.matchingEntities() return _random_iterator(entities=entities, group=self.group) @@ -369,12 +369,12 @@ async def remoteEntityGroupIterator( self, remoteDiscoverySpace: DiscoverySpaceManager ) -> AsyncGenerator[list[Entity], None]: async def iterator_closure( - stateHandle: DiscoverySpaceManager, + state_handle: DiscoverySpaceManager, ): - entities = await stateHandle.matchingEntitiesInSource.remote() + entities = await state_handle.matchingEntitiesInSource.remote() return _random_iterator_async(entities=entities, group=self.group) - return await iterator_closure(stateHandle=remoteDiscoverySpace) + return await iterator_closure(state_handle=remoteDiscoverySpace) def entityIterator( self, discoverySpace: DiscoverySpace, batchsize=1 @@ -382,8 +382,8 @@ def entityIterator( grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace) return _sequential_group_iterator( generator=grouped_iterator, - discoverySpace=discoverySpace, - batchsize=batchsize, + discovery_space=discoverySpace, + batch_size=batchsize, ) async def remoteEntityIterator( @@ -394,8 +394,8 @@ async def remoteEntityIterator( ) return _sequential_group_iterator_async( generator=grouped_iterator, - remoteDiscoverySpace=remoteDiscoverySpace, - batchsize=batchsize, + remote_discovery_space=remoteDiscoverySpace, + batch_size=batchsize, ) @@ -422,27 +422,27 @@ def __init__(self, mode: WalkModeEnum, group: list[str]): def entityGroupIterator( self, - discoverySpace: DiscoverySpace, + discovery_space: DiscoverySpace, ) -> Generator[list[dict], None, None]: - """Returns an iterator that samples groups of entities from a discoveryspace + """Returns an iterator that samples groups of entities from a discovery space Note: The number of entities returned on each call to the iterator can vary as it depends on the number of members of the associated group Parameters: - discoverySpace: An orchestrator.model.space.DiscoverySpace instance + discovery_space: An orchestrator.model.space.DiscoverySpace instance """ - entitySpace = discoverySpace.entitySpace + entity_space = discovery_space.entitySpace if not ExplicitEntitySpaceGroupedGridSampleGenerator.samplerCompatibleWithEntitySpace( - entitySpace=entitySpace + entity_space ): raise ValueError( - f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entitySpace}" + f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entity_space}" ) - points = _get_space_points(discoverySpace=discoverySpace) + points = _get_space_points(discovery_space=discovery_space) def iterator_closure() -> Generator[list[Entity], None, None]: def sequential_iterator() -> Generator[list[Entity], None, None]: @@ -467,19 +467,19 @@ async def remoteEntityGroupIterator( """Returns an async iterator that returns groups of entities as defined by the instances group property""" async def iterator_closure( - remoteDiscoverySpace: DiscoverySpaceManager, + remote_discovery_space: DiscoverySpaceManager, ) -> AsyncGenerator[list[Entity], None]: # noinspection PyUnresolvedReferences - entitySpace = await remoteDiscoverySpace.entitySpace.remote() - discoverySpace = await remoteDiscoverySpace.discoverySpace.remote() - points = _get_space_points(discoverySpace=discoverySpace) + entity_space = await remote_discovery_space.entitySpace.remote() + discovery_space = await remote_discovery_space.discoverySpace.remote() + points = _get_space_points(discovery_space=discovery_space) if not ExplicitEntitySpaceGroupedGridSampleGenerator.samplerCompatibleWithEntitySpace( - entitySpace=entitySpace + entity_space ): raise ValueError( - f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entitySpace}" + f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entity_space}" ) def sequential_iterator() -> AsyncGenerator[list[Entity], None]: @@ -501,14 +501,14 @@ def random_iterator() -> AsyncGenerator[list[Entity], None]: return await iterator_closure(remoteDiscoverySpace) def entityIterator( - self, discoverySpace: DiscoverySpace, batchsize=1 + self, discovery_space: DiscoverySpace, batchsize=1 ) -> Generator[list[Entity], None, None]: """Returns an iterator over a sequence of entities ordered by group""" - grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace) + grouped_iterator = self.entityGroupIterator(discovery_space=discovery_space) return _sequential_group_iterator( generator=grouped_iterator, - discoverySpace=discoverySpace, - batchsize=batchsize, + discovery_space=discovery_space, + batch_size=batchsize, ) async def remoteEntityIterator( @@ -519,6 +519,6 @@ async def remoteEntityIterator( ) return _sequential_group_iterator_async( generator=grouped_iterator, - remoteDiscoverySpace=remoteDiscoverySpace, - batchsize=batchsize, + remote_discovery_space=remoteDiscoverySpace, + batch_size=batchsize, ) diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py index f712c5f1..3e153da4 100644 --- a/tests/core/test_group_samplers.py +++ b/tests/core/test_group_samplers.py @@ -291,7 +291,8 @@ async def test_group_sampler_sequential_remote( assert RandomGroupSampleSelector.samplerCompatibleWithDiscoverySpaceRemote(manager) iterator = await sampler.remoteEntityIterator( - remoteDiscoverySpace=manager, batchsize=5 + remoteDiscoverySpace=manager, + batchsize=5, ) count = 0 From 21859059c5276e4a86e67345ab0324fe8f2351ba Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Fri, 21 Nov 2025 09:49:08 +0000 Subject: [PATCH 37/46] chore(group_sampler): Addressed review comments Signed-off-by: Christian Pinto --- .../core/discoveryspace/group_samplers.py | 26 ++++++++----------- tests/core/test_group_samplers.py | 8 +++--- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py index e142bb20..4b411d95 100644 --- a/orchestrator/core/discoveryspace/group_samplers.py +++ b/orchestrator/core/discoveryspace/group_samplers.py @@ -22,10 +22,10 @@ def _get_space_points(discovery_space: DiscoverySpace) -> list[dict]: """ - Building list of entities for a discovery space + Building list of points for a discovery space :param discovery_space: discovery space - :return: list of entities + :return: list of points """ entity_space = discovery_space.entitySpace property_names = [c.identifier for c in entity_space.constitutiveProperties] @@ -199,7 +199,7 @@ def _sequential_group_iterator( done = True break # Retrieve entity from the store - if type(sample[0]) is dict: + if isinstance(sample[0], dict): entity = discovery_space.entity_for_point(sample[0]) else: # The sample is already an Entity @@ -242,7 +242,7 @@ async def _sequential_group_iterator_async( # mark that we are done done = True break - if type(sample[0]) is dict: + if isinstance(sample[0], dict): # Retrieve entity from the store entity = ray.get( remote_discovery_space.entity_for_point.remote(sample[0]) @@ -280,7 +280,7 @@ def __init__(self, group: list[str]): def entityGroupIterator( self, - discovery_space: DiscoverySpace, + discoverySpace: DiscoverySpace, ) -> Generator[list[Entity], None, None]: """Returns an iterator that samples groups of entities from a discovery space @@ -292,7 +292,7 @@ def entityGroupIterator( Parameters: discovery_space: An orchestrator.model.space.DiscoverySpace instance """ - entities = discovery_space.matchingEntities() + entities = discoverySpace.matchingEntities() return _sequential_iterator(entities=entities, group=self.group) async def remoteEntityGroupIterator( @@ -310,7 +310,7 @@ async def iterator_closure(): def entityIterator( self, discoverySpace: DiscoverySpace, batchsize=1 ) -> Generator[list[Entity], None, None]: - grouped_iterator = self.entityGroupIterator(discovery_space=discoverySpace) + grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace) return _sequential_group_iterator( generator=grouped_iterator, discovery_space=discoverySpace, @@ -422,7 +422,7 @@ def __init__(self, mode: WalkModeEnum, group: list[str]): def entityGroupIterator( self, - discovery_space: DiscoverySpace, + discoverySpace: DiscoverySpace, ) -> Generator[list[dict], None, None]: """Returns an iterator that samples groups of entities from a discovery space @@ -433,7 +433,7 @@ def entityGroupIterator( discovery_space: An orchestrator.model.space.DiscoverySpace instance """ - entity_space = discovery_space.entitySpace + entity_space = discoverySpace.entitySpace if not ExplicitEntitySpaceGroupedGridSampleGenerator.samplerCompatibleWithEntitySpace( entity_space @@ -442,17 +442,13 @@ def entityGroupIterator( f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entity_space}" ) - points = _get_space_points(discovery_space=discovery_space) + points = _get_space_points(discovery_space=discoverySpace) def iterator_closure() -> Generator[list[Entity], None, None]: def sequential_iterator() -> Generator[list[Entity], None, None]: return _sequential_iterator(entities=points, group=self.group) def random_iterator() -> Generator[list[Entity], None, None]: - import time - - now = time.perf_counter() - print(f"Getting all entities took {time.perf_counter() - now}") return _random_iterator(entities=points, group=self.group) if self.mode == WalkModeEnum.SEQUENTIAL: @@ -504,7 +500,7 @@ def entityIterator( self, discovery_space: DiscoverySpace, batchsize=1 ) -> Generator[list[Entity], None, None]: """Returns an iterator over a sequence of entities ordered by group""" - grouped_iterator = self.entityGroupIterator(discovery_space=discovery_space) + grouped_iterator = self.entityGroupIterator(discoverySpace=discovery_space) return _sequential_group_iterator( generator=grouped_iterator, discovery_space=discovery_space, diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py index 3e153da4..5b91bb27 100644 --- a/tests/core/test_group_samplers.py +++ b/tests/core/test_group_samplers.py @@ -120,7 +120,7 @@ def test_group_sampler_local( node_value = { ( e["nodes"] - if type(e) is dict + if isinstance(e, dict) else e.valueForConstitutivePropertyIdentifier("nodes").value ) for e in group @@ -128,7 +128,7 @@ def test_group_sampler_local( cpu_value = { ( e["cpu_family"] - if type(e) is dict + if isinstance(e, dict) else e.valueForConstitutivePropertyIdentifier("cpu_family").value ) for e in group @@ -228,7 +228,7 @@ async def test_group_sampler_remote( node_value = { ( e["nodes"] - if type(e) is dict + if isinstance(e, dict) else e.valueForConstitutivePropertyIdentifier("nodes").value ) for e in group @@ -236,7 +236,7 @@ async def test_group_sampler_remote( cpu_value = { ( e["cpu_family"] - if type(e) is dict + if isinstance(e, dict) else e.valueForConstitutivePropertyIdentifier("cpu_family").value ) for e in group From 665bde0c87e6ad5bbe0eb5e8d452509da077171e Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Fri, 21 Nov 2025 10:29:06 +0000 Subject: [PATCH 38/46] chore(group_sampler): Reverted wrong conversion to snake casing Signed-off-by: Christian Pinto --- orchestrator/core/discoveryspace/group_samplers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py index 4b411d95..15569e56 100644 --- a/orchestrator/core/discoveryspace/group_samplers.py +++ b/orchestrator/core/discoveryspace/group_samplers.py @@ -497,13 +497,13 @@ def random_iterator() -> AsyncGenerator[list[Entity], None]: return await iterator_closure(remoteDiscoverySpace) def entityIterator( - self, discovery_space: DiscoverySpace, batchsize=1 + self, discoverySpace: DiscoverySpace, batchsize=1 ) -> Generator[list[Entity], None, None]: """Returns an iterator over a sequence of entities ordered by group""" - grouped_iterator = self.entityGroupIterator(discoverySpace=discovery_space) + grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace) return _sequential_group_iterator( generator=grouped_iterator, - discovery_space=discovery_space, + discovery_space=discoverySpace, batch_size=batchsize, ) From 0a9730ca0485bacf0aab06418ad16467772054f8 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Fri, 21 Nov 2025 14:45:05 +0000 Subject: [PATCH 39/46] chore(group_sampler): Simplified handling of points for creating groups Signed-off-by: Christian Pinto --- .../core/discoveryspace/group_samplers.py | 247 +++++++++++------- tests/core/test_group_samplers.py | 12 +- 2 files changed, 155 insertions(+), 104 deletions(-) diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py index 15569e56..da201f16 100644 --- a/orchestrator/core/discoveryspace/group_samplers.py +++ b/orchestrator/core/discoveryspace/group_samplers.py @@ -6,7 +6,6 @@ from typing import Any import numpy as np -import ray from orchestrator.core.discoveryspace.samplers import ( ExplicitEntitySpaceGridSampleGenerator, @@ -35,29 +34,64 @@ def _get_space_points(discovery_space: DiscoverySpace) -> list[dict]: ] -def _build_entity_group_values( - entity: dict | Entity, group: list[str] +def _get_space_matching_points(discovery_space: DiscoverySpace) -> list[dict]: + """ + Building list of points from the matching entities for a discovery space + + :param discovery_space: discovery space + :return: list of points + """ + matching_entities = discovery_space.matchingEntities() + points = [] + for entity in matching_entities: + point = { + v.property.identifier: v.value for v in entity.constitutive_property_values + } + # we save the entity identifier to retrieve the entity from the store at a later stage, + # assuming they are cached locally + point["entity_identifier"] = entity.identifier + points.append(point) + + return points + + +def _entity_for_point(point: dict, discovery_space: DiscoverySpace) -> list[Entity]: + if "entity_identifier" in point: + entity = discovery_space.sample_store.entityWithIdentifier( + entityIdentifier=point["entity_identifier"] + ) + else: + entity = discovery_space.entity_for_point(point) + + return entity + + +async def _entity_for_point_async( + point: dict, remote_discovery_space: DiscoverySpaceManager +) -> list[Entity]: + if "entity_identifier" in point: + entity = await remote_discovery_space.storedEntityWithIdentifier.remote( + entityIdentifier=point["entity_identifier"] + ) + else: + entity = await remote_discovery_space.entity_for_point.remote(point) + + return entity + + +def _build_point_group_values( + point: dict, group: list[str] ) -> frozenset[tuple[str, Any]]: """ :return: A frozen set of (key,value) pairs """ - # build a dictionary of entity values given the group - if isinstance(entity, Entity): - return frozenset( - { - (v.property.identifier, v.value) - for v in entity.constitutive_property_values - if v.property.identifier in group - } - ) - # We get a dict in case of generator type of samplers where we defer fetching the - # full entity from store until it is returned by the iterator. - return frozenset({(k, v) for k, v in entity.items() if k in group}) + + return frozenset({(k, v) for k, v in point.items() if k in group}) def _build_groups_dict( - entities: list[dict | Entity], group: list[str] -) -> dict[frozenset[tuple[str, Any]], list[dict | Entity]]: + points: list[dict], group: list[str] +) -> dict[frozenset[tuple[str, Any]], list[Entity]]: """ builds a dict of lists of entities, combining entities based on group definitions :param entities: list of entities @@ -65,30 +99,28 @@ def _build_groups_dict( :return: A dictionary whose keys are groups and whose values are list of entities """ groups = {} - for element in entities: - grp = _build_entity_group_values(entity=element, group=group) + for point in points: + grp = _build_point_group_values(point=point, group=group) lst = groups.get(grp, []) - lst.append(element) + lst.append(point) groups[grp] = lst return groups -def _build_groups_list( - entities: list[dict | Entity], group: list[str] -) -> list[list[dict | Entity]]: +def _build_groups_list(points: list[dict], group: list[str]) -> list[list[dict]]: """ - builds a list of lists of entities, combining entities based on group definitions - :param entities: list of entities + builds a list of lists of points, combining entities based on group definitions + :param points: list of points :param group: group definition :return: """ - return list(_build_groups_dict(entities=entities, group=group).values()) + return list(_build_groups_dict(points=points, group=group).values()) async def _get_grouped_sample_async( - generator: AsyncGenerator[list[dict], None], -) -> list[dict] | None: + generator: AsyncGenerator[list[Entity], None], +) -> list[Entity] | None: try: return await anext(generator) except (StopAsyncIteration, StopIteration): @@ -97,7 +129,7 @@ async def _get_grouped_sample_async( def _get_grouped_sample( generator: Generator[list[Entity], None, None], -) -> list[dict] | None: +) -> list[Entity] | None: try: return next(generator) except (StopAsyncIteration, StopIteration): @@ -105,82 +137,104 @@ def _get_grouped_sample( async def _sequential_iterator_async( - entities: list[dict | Entity], group: list[str] -) -> AsyncGenerator[list[dict | Entity], None]: + points: list[dict], + group: list[str], + remote_discovery_space=DiscoverySpaceManager, +) -> AsyncGenerator[list[Entity], None]: """ Sequential iterator through discovery space with grouping - :param entities: list of entities + :param points: list of points :param group: group definition :return: """ - group_list = _build_groups_list(entities=entities, group=group) + group_list = _build_groups_list(points=points, group=group) for i in range(len(group_list)): - lst = group_list[i] + entity_list = [ + await _entity_for_point_async( + point=point, remote_discovery_space=remote_discovery_space + ) + for point in group_list[i] + ] + lst = entity_list yield lst def _sequential_iterator( - entities: list[dict | Entity], group: list[str] -) -> Generator[list[dict | Entity], None, None]: + points: list[dict], + group: list[str], + discovery_space: DiscoverySpace, +) -> Generator[list[Entity], None, None]: """ Sequential iterator through discovery space with grouping - :param entities: list of entities + :param points: list of points :param group: group definition :return: """ - group_list = _build_groups_list(entities=entities, group=group) + group_list = _build_groups_list(points=points, group=group) for i in range(len(group_list)): - lst = group_list[i] - yield lst + entity_list = [ + _entity_for_point(point=point, discovery_space=discovery_space) + for point in group_list[i] + ] + yield entity_list async def _random_iterator_async( - entities: list[dict], group: list[str] -) -> AsyncGenerator[list[dict], None]: + points: list[dict], + group: list[str], + remote_discovery_space: DiscoverySpaceManager, +) -> AsyncGenerator[list[Entity], None]: """ Random iterator through discovery space with grouping - :param entities: list of entities + :param points: list of points :param group: group definition :return: """ - group_list = _build_groups_list(entities=entities, group=group) + group_list = _build_groups_list(points=points, group=group) randomized = np.random.choice( a=range(len(group_list)), size=len(group_list), replace=False ) for i in range(len(randomized)): - lst = group_list[randomized[i]] - yield lst + entity_list = [ + await _entity_for_point_async( + remote_discovery_space=remote_discovery_space, point=point + ) + for point in group_list[randomized[i]] + ] + yield entity_list def _random_iterator( - entities: list[dict], + points: list[dict], group: list[str], -) -> Generator[list[dict], None, None]: + discovery_space: DiscoverySpace, +) -> Generator[list[Entity], None, None]: """ Random iterator through discovery space with grouping - :param entities: list of entities + :param points: list of points :param group: group definition :return: """ - group_list = _build_groups_list(entities=entities, group=group) - + group_list = _build_groups_list(points=points, group=group) randomized = np.random.choice( a=range(len(group_list)), size=len(group_list), replace=False ) for i in range(len(randomized)): - lst = group_list[randomized[i]] - yield lst + entity_list = [ + _entity_for_point(point=point, discovery_space=discovery_space) + for point in group_list[randomized[i]] + ] + yield entity_list def _sequential_group_iterator( generator: Generator[list[dict], None, None], - discovery_space: DiscoverySpace, batch_size: int, ) -> Generator[list[Entity], None, None]: """ Sequential group iterator :param generator: grouped iterator - :param batchsize: batch size + :param batch_size: batch size :return: """ sample = [] @@ -198,16 +252,10 @@ def _sequential_group_iterator( # mark that we are done and break done = True break - # Retrieve entity from the store - if isinstance(sample[0], dict): - entity = discovery_space.entity_for_point(sample[0]) - else: - # The sample is already an Entity - entity = sample[0] - # append a new entity to batch + + # append a new entity to the batch + entity = sample.pop(0) batch.append(entity) - # remove entity from samples - sample = sample[1:] # submit a batch and clean it up # The last batch may be empty - if so don't return it if batch: @@ -217,13 +265,12 @@ def _sequential_group_iterator( async def _sequential_group_iterator_async( generator: AsyncGenerator[list[dict], None], - remote_discovery_space: DiscoverySpaceManager, batch_size: int, ) -> AsyncGenerator[list[Entity], None]: """ Async sequential group iterator :param generator: grouped iterator - :param batchsize: batch size + :param batch_size: batch size :return: """ sample = [] @@ -242,17 +289,10 @@ async def _sequential_group_iterator_async( # mark that we are done done = True break - if isinstance(sample[0], dict): - # Retrieve entity from the store - entity = ray.get( - remote_discovery_space.entity_for_point.remote(sample[0]) - ) - else: - entity = sample[0] - # append a new entity to batch + + # append a new entity to the batch + entity = sample.pop(0) batch.append(entity) - # remove entity from samples - sample = sample[1:] # submit a batch and clean it up # The last batch may be empty - if so don't return it if batch: @@ -282,7 +322,7 @@ def entityGroupIterator( self, discoverySpace: DiscoverySpace, ) -> Generator[list[Entity], None, None]: - """Returns an iterator that samples groups of entities from a discovery space + """Returns an iterator that samples groups of entities from a discovery space The group definition should be specified on initializing an instance of a subclass of this class @@ -292,17 +332,21 @@ def entityGroupIterator( Parameters: discovery_space: An orchestrator.model.space.DiscoverySpace instance """ - entities = discoverySpace.matchingEntities() - return _sequential_iterator(entities=entities, group=self.group) + points = _get_space_matching_points(discovery_space=discoverySpace) + return _sequential_iterator( + points=points, group=self.group, discovery_space=discoverySpace + ) async def remoteEntityGroupIterator( self, remoteDiscoverySpace: DiscoverySpaceManager ) -> AsyncGenerator[list[Entity], None]: async def iterator_closure(): - entities = await remoteDiscoverySpace.matchingEntitiesInSource.remote() + discovery_space = await remoteDiscoverySpace.discoverySpace.remote() + points = _get_space_matching_points(discovery_space=discovery_space) return _sequential_iterator_async( - entities=entities, + points=points, group=self.group, + remote_discovery_space=remoteDiscoverySpace, ) return await iterator_closure() @@ -313,7 +357,6 @@ def entityIterator( grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace) return _sequential_group_iterator( generator=grouped_iterator, - discovery_space=discoverySpace, batch_size=batchsize, ) @@ -325,7 +368,6 @@ async def remoteEntityIterator( ) return _sequential_group_iterator_async( generator=grouped_iterator, - remote_discovery_space=remoteDiscoverySpace, batch_size=batchsize, ) @@ -352,7 +394,7 @@ def entityGroupIterator( self, discoverySpace: DiscoverySpace, ) -> Generator[list[Entity], None, None]: - """Returns an iterator that samples groups of entities from a discovery space + """Returns an iterator that samples groups of entities from a discovery space The group definition should be specified on initializing an instance of a subclass of this class @@ -362,8 +404,10 @@ def entityGroupIterator( Parameters: discovery_space: An orchestrator.model.space.DiscoverySpace instance """ - entities = discoverySpace.matchingEntities() - return _random_iterator(entities=entities, group=self.group) + points = _get_space_matching_points(discovery_space=discoverySpace) + return _random_iterator( + points=points, group=self.group, discovery_space=discoverySpace + ) async def remoteEntityGroupIterator( self, remoteDiscoverySpace: DiscoverySpaceManager @@ -371,8 +415,13 @@ async def remoteEntityGroupIterator( async def iterator_closure( state_handle: DiscoverySpaceManager, ): - entities = await state_handle.matchingEntitiesInSource.remote() - return _random_iterator_async(entities=entities, group=self.group) + discovery_space = await state_handle.discoverySpace.remote() + points = _get_space_matching_points(discovery_space=discovery_space) + return _random_iterator_async( + points=points, + group=self.group, + remote_discovery_space=remoteDiscoverySpace, + ) return await iterator_closure(state_handle=remoteDiscoverySpace) @@ -382,7 +431,6 @@ def entityIterator( grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace) return _sequential_group_iterator( generator=grouped_iterator, - discovery_space=discoverySpace, batch_size=batchsize, ) @@ -394,7 +442,6 @@ async def remoteEntityIterator( ) return _sequential_group_iterator_async( generator=grouped_iterator, - remote_discovery_space=remoteDiscoverySpace, batch_size=batchsize, ) @@ -423,8 +470,8 @@ def __init__(self, mode: WalkModeEnum, group: list[str]): def entityGroupIterator( self, discoverySpace: DiscoverySpace, - ) -> Generator[list[dict], None, None]: - """Returns an iterator that samples groups of entities from a discovery space + ) -> Generator[list[Entity], None, None]: + """Returns an iterator that samples groups of entities from a discovery space Note: The number of entities returned on each call to the iterator can vary as it depends on the number of members of the associated group @@ -446,10 +493,14 @@ def entityGroupIterator( def iterator_closure() -> Generator[list[Entity], None, None]: def sequential_iterator() -> Generator[list[Entity], None, None]: - return _sequential_iterator(entities=points, group=self.group) + return _sequential_iterator( + points=points, group=self.group, discovery_space=discoverySpace + ) def random_iterator() -> Generator[list[Entity], None, None]: - return _random_iterator(entities=points, group=self.group) + return _random_iterator( + points=points, group=self.group, discovery_space=discoverySpace + ) if self.mode == WalkModeEnum.SEQUENTIAL: return sequential_iterator() @@ -480,14 +531,16 @@ async def iterator_closure( def sequential_iterator() -> AsyncGenerator[list[Entity], None]: return _sequential_iterator_async( - entities=points, + points=points, group=self.group, + remote_discovery_space=remoteDiscoverySpace, ) def random_iterator() -> AsyncGenerator[list[Entity], None]: return _random_iterator_async( - entities=points, + points=points, group=self.group, + remote_discovery_space=remoteDiscoverySpace, ) if self.mode == WalkModeEnum.SEQUENTIAL: @@ -503,7 +556,6 @@ def entityIterator( grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace) return _sequential_group_iterator( generator=grouped_iterator, - discovery_space=discoverySpace, batch_size=batchsize, ) @@ -515,6 +567,5 @@ async def remoteEntityIterator( ) return _sequential_group_iterator_async( generator=grouped_iterator, - remote_discovery_space=remoteDiscoverySpace, batch_size=batchsize, ) diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py index 5b91bb27..fccf6325 100644 --- a/tests/core/test_group_samplers.py +++ b/tests/core/test_group_samplers.py @@ -10,6 +10,7 @@ RandomGroupSampleSelector, SequentialGroupSampleSelector, _build_groups_dict, + _get_space_matching_points, ) from orchestrator.core.discoveryspace.samplers import ( GroupSampler, @@ -20,7 +21,6 @@ from orchestrator.modules.operators.discovery_space_manager import ( DiscoverySpaceManager, ) -from orchestrator.schema.entity import Entity from orchestrator.schema.entityspace import EntitySpaceRepresentation @@ -49,10 +49,10 @@ def check_group_order( if isinstance(sampler, ExplicitEntitySpaceGroupedGridSampleGenerator): ids = [cp.identifier for cp in space.entitySpace.constitutiveProperties] - entities = [ + points = [ dict(zip(ids, p)) for p in space.entitySpace.sequential_point_iterator() ] - groups = _build_groups_dict(entities=entities, group=group) + groups = _build_groups_dict(points=points, group=group) expected_group_order = list(groups.keys()) if sampler.mode == WalkModeEnum.RANDOM: assert group_order != expected_group_order @@ -60,8 +60,8 @@ def check_group_order( assert group_order == expected_group_order else: - entities = space.matchingEntities() - groups = _build_groups_dict(entities=entities, group=group) + points = _get_space_matching_points(discovery_space=space) + groups = _build_groups_dict(points=points, group=group) expected_group_order = list(groups.keys()) if isinstance(sampler, SequentialGroupSampleSelector): assert group_order == expected_group_order @@ -115,7 +115,7 @@ def test_group_sampler_local( for i, group in enumerate(sampler.entityGroupIterator(space)): count += len(group) for entity in group: - print(i, count, entity.identifier if isinstance(entity, Entity) else entity) + print(i, count, entity) node_value = { ( From 95f39dacdbc1452af7ff590de05e449dda766a17 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Fri, 21 Nov 2025 15:00:03 +0000 Subject: [PATCH 40/46] chore(group_sampler): Some leftover types and snake_cases Signed-off-by: Christian Pinto --- orchestrator/core/discoveryspace/group_samplers.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py index da201f16..40397ac1 100644 --- a/orchestrator/core/discoveryspace/group_samplers.py +++ b/orchestrator/core/discoveryspace/group_samplers.py @@ -307,7 +307,7 @@ class SequentialGroupSampleSelector(GroupSampler): @classmethod def samplerCompatibleWithDiscoverySpaceRemote( - cls, remote_discovery_space: DiscoverySpaceManager + cls, remoteDiscoverySpace: DiscoverySpaceManager ): return True @@ -330,7 +330,7 @@ def entityGroupIterator( the number of members of the associated group Parameters: - discovery_space: An orchestrator.model.space.DiscoverySpace instance + discoverySpace: An orchestrator.model.space.DiscoverySpace instance """ points = _get_space_matching_points(discovery_space=discoverySpace) return _sequential_iterator( @@ -402,7 +402,7 @@ def entityGroupIterator( the number of members of the associated group Parameters: - discovery_space: An orchestrator.model.space.DiscoverySpace instance + discoverySpace: An orchestrator.model.space.DiscoverySpace instance """ points = _get_space_matching_points(discovery_space=discoverySpace) return _random_iterator( @@ -413,9 +413,9 @@ async def remoteEntityGroupIterator( self, remoteDiscoverySpace: DiscoverySpaceManager ) -> AsyncGenerator[list[Entity], None]: async def iterator_closure( - state_handle: DiscoverySpaceManager, + remote_discovery_space: DiscoverySpaceManager, ): - discovery_space = await state_handle.discoverySpace.remote() + discovery_space = await remote_discovery_space.discoverySpace.remote() points = _get_space_matching_points(discovery_space=discovery_space) return _random_iterator_async( points=points, @@ -423,7 +423,7 @@ async def iterator_closure( remote_discovery_space=remoteDiscoverySpace, ) - return await iterator_closure(state_handle=remoteDiscoverySpace) + return await iterator_closure(remote_discovery_space=remoteDiscoverySpace) def entityIterator( self, discoverySpace: DiscoverySpace, batchsize=1 @@ -477,7 +477,7 @@ def entityGroupIterator( the number of members of the associated group Parameters: - discovery_space: An orchestrator.model.space.DiscoverySpace instance + discoverySpace: An orchestrator.model.space.DiscoverySpace instance """ entity_space = discoverySpace.entitySpace From 682fdbbd44df96b254dbb9fb225912a63474137a Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Mon, 24 Nov 2025 08:15:29 +0000 Subject: [PATCH 41/46] chore(group_sampler): Tests cleanup Signed-off-by: Christian Pinto --- tests/core/test_group_samplers.py | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py index fccf6325..7e708ec1 100644 --- a/tests/core/test_group_samplers.py +++ b/tests/core/test_group_samplers.py @@ -118,19 +118,10 @@ def test_group_sampler_local( print(i, count, entity) node_value = { - ( - e["nodes"] - if isinstance(e, dict) - else e.valueForConstitutivePropertyIdentifier("nodes").value - ) - for e in group + (e.valueForConstitutivePropertyIdentifier("nodes").value) for e in group } cpu_value = { - ( - e["cpu_family"] - if isinstance(e, dict) - else e.valueForConstitutivePropertyIdentifier("cpu_family").value - ) + (e.valueForConstitutivePropertyIdentifier("cpu_family").value) for e in group } @@ -226,19 +217,10 @@ async def test_group_sampler_remote( count += len(group) group_count += 1 node_value = { - ( - e["nodes"] - if isinstance(e, dict) - else e.valueForConstitutivePropertyIdentifier("nodes").value - ) - for e in group + (e.valueForConstitutivePropertyIdentifier("nodes").value) for e in group } cpu_value = { - ( - e["cpu_family"] - if isinstance(e, dict) - else e.valueForConstitutivePropertyIdentifier("cpu_family").value - ) + (e.valueForConstitutivePropertyIdentifier("cpu_family").value) for e in group } From 89872857a2cae4e94b64ea5e464f10413ef7221b Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Mon, 24 Nov 2025 18:09:25 +0000 Subject: [PATCH 42/46] chore(vllm_performance): removed custom entity_for_point in facvor of the new one from the discovery space Signed-off-by: Christian Pinto --- .../core/discoveryspace/group_samplers.py | 47 ++++--------------- 1 file changed, 10 insertions(+), 37 deletions(-) diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py index 40397ac1..070a17e2 100644 --- a/orchestrator/core/discoveryspace/group_samplers.py +++ b/orchestrator/core/discoveryspace/group_samplers.py @@ -1,6 +1,7 @@ # Copyright (c) IBM Corporation # SPDX-License-Identifier: MIT +import asyncio import logging from collections.abc import AsyncGenerator, Generator from typing import Any @@ -55,30 +56,6 @@ def _get_space_matching_points(discovery_space: DiscoverySpace) -> list[dict]: return points -def _entity_for_point(point: dict, discovery_space: DiscoverySpace) -> list[Entity]: - if "entity_identifier" in point: - entity = discovery_space.sample_store.entityWithIdentifier( - entityIdentifier=point["entity_identifier"] - ) - else: - entity = discovery_space.entity_for_point(point) - - return entity - - -async def _entity_for_point_async( - point: dict, remote_discovery_space: DiscoverySpaceManager -) -> list[Entity]: - if "entity_identifier" in point: - entity = await remote_discovery_space.storedEntityWithIdentifier.remote( - entityIdentifier=point["entity_identifier"] - ) - else: - entity = await remote_discovery_space.entity_for_point.remote(point) - - return entity - - def _build_point_group_values( point: dict, group: list[str] ) -> frozenset[tuple[str, Any]]: @@ -149,14 +126,12 @@ async def _sequential_iterator_async( """ group_list = _build_groups_list(points=points, group=group) for i in range(len(group_list)): - entity_list = [ - await _entity_for_point_async( - point=point, remote_discovery_space=remote_discovery_space - ) + entity_list_refs = [ + remote_discovery_space.entity_for_point.remote(point) for point in group_list[i] ] - lst = entity_list - yield lst + entity_list = await asyncio.gather(*entity_list_refs) + yield entity_list def _sequential_iterator( @@ -173,8 +148,7 @@ def _sequential_iterator( group_list = _build_groups_list(points=points, group=group) for i in range(len(group_list)): entity_list = [ - _entity_for_point(point=point, discovery_space=discovery_space) - for point in group_list[i] + discovery_space.entity_for_point(point) for point in group_list[i] ] yield entity_list @@ -195,12 +169,11 @@ async def _random_iterator_async( a=range(len(group_list)), size=len(group_list), replace=False ) for i in range(len(randomized)): - entity_list = [ - await _entity_for_point_async( - remote_discovery_space=remote_discovery_space, point=point - ) + entity_list_refs = [ + remote_discovery_space.entity_for_point.remote(point) for point in group_list[randomized[i]] ] + entity_list = await asyncio.gather(*entity_list_refs) yield entity_list @@ -221,7 +194,7 @@ def _random_iterator( ) for i in range(len(randomized)): entity_list = [ - _entity_for_point(point=point, discovery_space=discovery_space) + discovery_space.entity_for_point(point) for point in group_list[randomized[i]] ] yield entity_list From 6d5c6e0bbd512e25e0b9e1595b7d4636fc714d73 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Tue, 25 Nov 2025 08:28:04 +0000 Subject: [PATCH 43/46] chore(vllm_performance): removed unnecessary entity_identifier field from point dict Signed-off-by: Christian Pinto --- orchestrator/core/discoveryspace/group_samplers.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py index 070a17e2..36efa8df 100644 --- a/orchestrator/core/discoveryspace/group_samplers.py +++ b/orchestrator/core/discoveryspace/group_samplers.py @@ -48,9 +48,6 @@ def _get_space_matching_points(discovery_space: DiscoverySpace) -> list[dict]: point = { v.property.identifier: v.value for v in entity.constitutive_property_values } - # we save the entity identifier to retrieve the entity from the store at a later stage, - # assuming they are cached locally - point["entity_identifier"] = entity.identifier points.append(point) return points From 8b2d6f6691e5c7d11ff67d32551c6d0f43274508 Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Mon, 8 Dec 2025 13:21:51 +0000 Subject: [PATCH 44/46] fix(vllm_performance): Fixed geospatial experiments after merge with master Signed-off-by: Christian Pinto --- .../vllm_performance/experiment_executor.py | 11 +++++++++-- .../experiments/performance_testing.yaml | 9 --------- .../experiments/performance_testing_geospatial.yaml | 6 ------ .../vllm_performance/k8s/manage_components.py | 5 ++++- 4 files changed, 13 insertions(+), 18 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py index ff6b7c0f..c60085d0 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py @@ -210,8 +210,8 @@ def _create_environment( reuse_deployment=False, namespace=actuator.namespace, pvc_name=pvc_name, - skip_tokenizer_init=values.get("skip_tokenizer_init"), - enforce_eager=values.get("enforce_eager"), + skip_tokenizer_init=values.get("skip_tokenizer_init", 0) == 1, + enforce_eager=values.get("enforce_eager", 0) == 1, io_processor_plugin=values.get("io_processor_plugin"), check_interval=check_interval, timeout=timeout, @@ -415,6 +415,13 @@ def run_resource_and_workload_experiment( if max_concurrency < 0: max_concurrency = None started_benchmarking = True + console.put.remote( + message=RichConsoleSpinnerMessage( + id=request.requestid, + label=f"({request.requestid}) Executing vllm bench serve", + state="start", + ) + ) if experiment.identifier in [ "test-geospatial-deployment-v1", "test-geospatial-deployment-custom-dataset-v1", diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml index 14191aee..65f00181 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml @@ -126,12 +126,6 @@ test-deployment-v1: propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ] - - identifier: 'dataset' - metadata: - description: "(benchmark) The dataset to use for the benchmark" - propertyDomain: - variableType: 'CATEGORICAL_VARIABLE_TYPE' - values: [ 'random' ] defaultParameterization: - property: identifier: 'image' @@ -182,9 +176,6 @@ test-deployment-v1: - property: identifier: 'gpu_type' value: 'NVIDIA-A100-80GB-PCIe' - - property: - identifier: 'dataset' - value: 'random' # measurements targetProperties: - identifier: "duration" diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index 3df8b5b9..bee5660b 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -71,8 +71,6 @@ performance_testing-geospatial-endpoint: targetProperties: - identifier: "duration" - identifier: "completed" - - identifier: "total_input_tokens" - - identifier: "total_output_tokens" - identifier: "request_throughput" - identifier: "mean_e2el_ms" - identifier: "median_e2el_ms" @@ -267,8 +265,6 @@ performance_testing-geospatial-full: targetProperties: - identifier: "duration" - identifier: "completed" - - identifier: "total_input_tokens" - - identifier: "total_output_tokens" - identifier: "request_throughput" - identifier: "mean_e2el_ms" - identifier: "median_e2el_ms" @@ -460,8 +456,6 @@ performance_testing-geospatial-full-custom-dataset: targetProperties: - identifier: "duration" - identifier: "completed" - - identifier: "total_input_tokens" - - identifier: "total_output_tokens" - identifier: "request_throughput" - identifier: "mean_e2el_ms" - identifier: "median_e2el_ms" diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py index 5296b068..3fc633d4 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py @@ -86,7 +86,7 @@ def __init__( self.pvc_name = f"vllm-support-{uuid.uuid4().hex!s}" self.create_pvc(pvc_name=self.pvc_name, template=pvc_template) self.pvc_created = True - logger.debug(f"Created pvc {pvc_name} in namespace {namespace}") + logger.debug(f"Created pvc {self.pvc_name} in namespace {namespace}") else: if not self.check_pvc_exists(pvc_name=pvc_name): error_message = ( @@ -345,6 +345,9 @@ def create_deployment( template=template, claim_name=claim_name, hf_token=hf_token, + skip_tokenizer_init=skip_tokenizer_init, + io_processor_plugin=io_processor_plugin, + enforce_eager=enforce_eager, ) logger.debug(json.dumps(deployment_yaml, indent=2)) From 0fa9817143633564439255a1a636b8b9dd1925de Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Mon, 8 Dec 2025 13:30:21 +0000 Subject: [PATCH 45/46] fix(vllm_performance): Fixed yet another issue after merging with main Signed-off-by: Christian Pinto --- .../vllm_performance/experiments/performance_testing.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml index a5dc1c2a..d02b2212 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml @@ -55,12 +55,6 @@ test-deployment-v1: variableType: 'DISCRETE_VARIABLE_TYPE' domainRange: [1, 10000] interval: 1 - - identifier: 'dataset' - metadata: - description: "(benchmark) The dataset to be used for the experiment" - propertyDomain: - variableType: "CATEGORICAL_VARIABLE_TYPE" - values: ['random'] - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" From 343197160d99ad4f0a1ac7bb0ee66c744aa1f97f Mon Sep 17 00:00:00 2001 From: Christian Pinto Date: Mon, 8 Dec 2025 13:56:16 +0000 Subject: [PATCH 46/46] fix(chore): overall cleanup Signed-off-by: Christian Pinto --- .../performance_testing_geospatial.yaml | 107 +++++++++--------- .../execute_benchmark.py | 8 +- 2 files changed, 56 insertions(+), 59 deletions(-) diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml index bee5660b..b7a48110 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml @@ -1,6 +1,5 @@ # Copyright (c) IBM Corporation # SPDX-License-Identifier: MIT - # The input to an experiment is an Entity. For the Entity to be a valid input # it's properties which match what is defined here performance_testing-geospatial-endpoint: @@ -24,36 +23,36 @@ performance_testing-geospatial-endpoint: description: "The number of requests to send per second" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [-1,1000] - interval: 1 # -1 means send all requests at time 0 + domainRange: [-1, 1000] + interval: 1 # -1 means send all requests at time 0 optionalProperties: - identifier: 'num_prompts' metadata: description: "The number of prompts to send (total number of requests)" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [1,10001] + domainRange: [1, 10001] interval: 1 - identifier: 'burstiness' metadata: description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness." propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ 0, 10 ] + domainRange: [0, 10] interval: 1 - identifier: 'max_concurrency' metadata: description: "The maximum number of concurrent requests to send" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ -1, 500 ] # -1 means no concurrency control + domainRange: [-1, 500] # -1 means no concurrency control interval: 1 - identifier: 'dataset' metadata: description: "The dataset to be used for the experiment" propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" - values: [ 'india_url_in_b64_out', 'valencia_url_in_b64_out' ] + values: ['india_url_in_b64_out', 'valencia_url_in_b64_out'] defaultParameterization: - value: 100 property: @@ -90,135 +89,134 @@ performance_testing-geospatial-full: description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ] + values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"] - identifier: 'request_rate' metadata: description: "(benchmark) The number of requests to send per second" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [-1,1000] - interval: 1 # -1 means send all requests at time 0 + domainRange: [-1, 1000] + interval: 1 # -1 means send all requests at time 0 optionalProperties: - identifier: 'num_prompts' metadata: description: "(benchmark) The number of prompts to send (total number of requests)" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [1,10001] + domainRange: [1, 10001] interval: 1 - identifier: 'max_concurrency' metadata: description: "(benchmark) The maximum number of concurrent requests to send" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ -1, 500 ] # -1 means no concurrency control + domainRange: [-1, 500] # -1 means no concurrency control interval: 1 - identifier: 'burstiness' metadata: description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness." propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ 0, 10 ] + domainRange: [0, 10] interval: 1 - identifier: 'dataset' metadata: description: "(benchmark) The dataset to be used for the experiment" propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" - values: [ 'india_url_in_b64_out', 'valencia_url_in_b64_out' ] + values: ['india_url_in_b64_out', 'valencia_url_in_b64_out'] - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ] + values: ["your/image/with/vllm/and/terratorch:0.1"] - identifier: n_cpus metadata: description: "(deployment) the number of CPUs to use" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ 1,17 ] + domainRange: [1, 17] interval: 1 - identifier: memory metadata: description: "(deployment) the amount of memory to allocate to vLLM pod" propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" - values: [ "64Gi", "128Gi", "256Gi" ] + values: ["64Gi", "128Gi", "256Gi"] - identifier: dtype metadata: description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models." propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" - values: [ "auto", "half", "float16", "bfloat16", "float", "float32" ] + values: ["auto", "half", "float16", "bfloat16", "float", "float32"] - identifier: 'gpu_memory_utilization' metadata: description: "(deployment) The fraction of GPU memory to be used for the model executor," propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - values: [ .5, .75, .9 ] + values: [.5, .75, .9] - identifier: 'cpu_offload' metadata: description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU," propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - values: [ 0, 8, 16, 24, 32 ] + values: [0, 8, 16, 24, 32] - identifier: 'max_num_seq' metadata: description: "(deployment) Maximum number of sequences per iteration" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [32,2049] + domainRange: [32, 2049] interval: 32 - identifier: 'max_batch_tokens' metadata: description: "(deployment) maximum number of batched tokens per iteration" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ 8192, 32769] + domainRange: [8192, 32769] interval: 1024 - identifier: 'n_gpus' metadata: description: "(deployment) Number of GPUs to use" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [1,9] + domainRange: [1, 9] interval: 1 - identifier: 'gpu_type' metadata: description: "(deployment) The GPU type to use" propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" - values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ] + values: ['NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB'] - identifier: 'skip_tokenizer_init' metadata: description: "(deployment) skip tokenizer initialization" propertyDomain: - variableType: BINARY_VARIABLE_TYPE + variableType: BINARY_VARIABLE_TYPE values: [True, False] - identifier: 'enforce_eager' metadata: description: "(deployment) enforce pytorch eager mode" propertyDomain: - variableType: BINARY_VARIABLE_TYPE + variableType: BINARY_VARIABLE_TYPE values: [True, False] - identifier: 'io_processor_plugin' metadata: description: 'IO Processor plugin to load for the model' propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: [ None, "terratorch_segmentation" ] + values: [None, "terratorch_segmentation"] defaultParameterization: - property: identifier: 'image' - value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" + value: "your/image/with/vllm/and/terratorch:0.1" - property: identifier: n_cpus value: 8 - property: - identifier: - memory - value: "128Gi" + identifier: memory + value: "128Gi" - property: identifier: dtype value: "auto" @@ -284,14 +282,14 @@ performance_testing-geospatial-full-custom-dataset: description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation' propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ] + values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"] - identifier: 'request_rate' metadata: description: "(benchmark) The number of requests to send per second" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [-1,1000] - interval: 1 # -1 means send all requests at time 0 + domainRange: [-1, 1000] + interval: 1 # -1 means send all requests at time 0 - identifier: 'dataset' metadata: description: "(benchmark) The dataset to be used for the experiment" @@ -304,115 +302,114 @@ performance_testing-geospatial-full-custom-dataset: description: "(benchmark) The number of prompts to send (total number of requests)" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [1,10001] + domainRange: [1, 10001] interval: 1 - identifier: 'max_concurrency' metadata: description: "(benchmark) The maximum number of concurrent requests to send" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ -1, 500 ] # -1 means no concurrency control + domainRange: [-1, 500] # -1 means no concurrency control interval: 1 - identifier: 'burstiness' metadata: description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness." propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ 0, 10 ] + domainRange: [0, 10] interval: 1 - identifier: image metadata: description: "(deployment) Docker image to use to create vllm deployments" propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ] + values: ["your/image/with/vllm/and/terratorch:0.1"] - identifier: n_cpus metadata: description: "(deployment) the number of CPUs to use" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ 1,17 ] + domainRange: [1, 17] interval: 1 - identifier: memory metadata: description: "(deployment) the amount of memory to allocate to vLLM pod" propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" - values: [ "64Gi", "128Gi", "256Gi" ] + values: ["64Gi", "128Gi", "256Gi"] - identifier: dtype metadata: description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models." propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" - values: [ "auto", "half", "float16", "bfloat16", "float", "float32" ] + values: ["auto", "half", "float16", "bfloat16", "float", "float32"] - identifier: 'gpu_memory_utilization' metadata: description: "(deployment) The fraction of GPU memory to be used for the model executor," propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - values: [ .5, .75, .9 ] + values: [.5, .75, .9] - identifier: 'cpu_offload' metadata: description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU," propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - values: [ 0, 8, 16, 24, 32 ] + values: [0, 8, 16, 24, 32] - identifier: 'max_num_seq' metadata: description: "(deployment) Maximum number of sequences per iteration" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [32,2049] + domainRange: [32, 2049] interval: 32 - identifier: 'max_batch_tokens' metadata: description: "(deployment) maximum number of batched tokens per iteration" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [ 8192, 32769] + domainRange: [8192, 32769] interval: 1024 - identifier: 'n_gpus' metadata: description: "(deployment) Number of GPUs to use" propertyDomain: variableType: 'DISCRETE_VARIABLE_TYPE' - domainRange: [1,9] + domainRange: [1, 9] interval: 1 - identifier: 'gpu_type' metadata: description: "(deployment) The GPU type to use" propertyDomain: variableType: "CATEGORICAL_VARIABLE_TYPE" - values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ] + values: ['NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB'] - identifier: 'skip_tokenizer_init' metadata: description: "(deployment) skip tokenizer initialization" propertyDomain: - variableType: BINARY_VARIABLE_TYPE + variableType: BINARY_VARIABLE_TYPE values: [True, False] - identifier: 'enforce_eager' metadata: description: "(deployment) enforce PyTorch eager mode" propertyDomain: - variableType: BINARY_VARIABLE_TYPE + variableType: BINARY_VARIABLE_TYPE values: [True, False] - identifier: 'io_processor_plugin' metadata: description: 'IO Processor plugin to load for the model' propertyDomain: variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE" - values: [ "terratorch_segmentation" ] + values: ["terratorch_segmentation"] defaultParameterization: - property: identifier: 'image' - value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" + value: "your/image/with/vllm/and/terratorch:0.1" - property: identifier: n_cpus value: 8 - property: - identifier: - memory - value: "128Gi" + identifier: memory + value: "128Gi" - property: identifier: dtype value: "auto" @@ -465,4 +462,4 @@ performance_testing-geospatial-full-custom-dataset: - identifier: "p75_e2el_ms" - identifier: "p99_e2el_ms" metadata: - description: 'VLLM performance testing across compute resource and workload configuration' \ No newline at end of file + description: 'VLLM performance testing across compute resource and workload configuration' diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py index 9a477c82..05bd8fb2 100644 --- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py +++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py @@ -45,7 +45,7 @@ def execute_benchmark( Execute benchmark :param base_url: url for vllm endpoint :param model: model - :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"] + :param dataset: data set name ["random"] :param backend: name of the vLLM benchmark backend to be used ["vllm", "openai", "openai-chat", "openai-audio", "openai-embeddings"] :param interpreter: name of Python interpreter :param num_prompts: number of prompts @@ -83,7 +83,7 @@ def execute_benchmark( request = f"export VLLM_BENCH_LOGLEVEL={log_level} && " + request request += ( f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {dataset} " - f"--model {model} --seed 12345 --num-prompts 10 --save-result --metric-percentiles " + f"--model {model} --seed 12345 --num-prompts {num_prompts!s} --save-result --metric-percentiles " f'"25,75,99" --percentile-metrics "ttft,tpot,itl,e2el" --result-dir . --result-filename {f_name} ' f"--burstiness {burstiness} " ) @@ -146,7 +146,7 @@ def execute_random_benchmark( Execute benchmark with random dataset :param base_url: url for vllm endpoint :param model: model - :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"] + :param dataset: data set name ["random"] :param num_prompts: number of prompts :param request_rate: request rate :param max_concurrency: maximum number of concurrent requests @@ -197,7 +197,7 @@ def execute_geospatial_benchmark( Execute benchmark with random dataset :param base_url: url for vllm endpoint :param model: model - :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"] + :param dataset: data set name ["random"] :param num_prompts: number of prompts :param request_rate: request rate :param max_concurrency: maximum number of concurrent requests