From 1c77415e5786b1adf466942276c29c65f541b99c Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Wed, 15 Oct 2025 12:11:34 +0100
Subject: [PATCH 01/46] feat: support geospatial benchmark

- Add backend arg to execute benchmark
- Add execute_geospatial_benchmark function

This uses the geospatial_valencia.jsonl dataset
---
 .../execute_benchmark.py                      | 73 ++++++++++++++++---
 1 file changed, 62 insertions(+), 11 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index af8e2c34..a65463f9 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -17,6 +17,7 @@ def execute_benchmark(
     base_url: str,
     model: str,
     data_set: str,
+    backend: str = "openai",
     interpreter: str = "python",
     num_prompts: int = 500,
     request_rate: int | None = None,
@@ -68,21 +69,21 @@ def execute_benchmark(
     request += (
         # changing from script invocation to cli invocation
         # f"{interpreter} {code} --backend openai --base-url {base_url} --dataset-name {data_set} "
-        f"vllm bench serve --backend openai --base-url {base_url} --dataset-name {data_set} "
+        f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {data_set} "
         f"--model {model} --seed 12345 --num-prompts {num_prompts!s} --save-result --metric-percentiles "
         f'"25,75,99" --percentile-metrics "ttft,tpot,itl,e2el" --result-dir . --result-filename {f_name} '
         f"--burstiness {burstiness} "
     )
 
     if data_set_path is not None:
-        request += f"--dataset-path {data_set_path} "
+        request += f" --dataset-path {data_set_path} "
     if request_rate is not None:
-        request += f"--request-rate {request_rate!s} "
+        request += f" --request-rate {request_rate!s} "
     if max_concurrency is not None:
-        request += f"--max-concurrency {max_concurrency!s}"
+        request += f" --max-concurrency {max_concurrency!s} "
     if custom_args is not None:
         for key, value in custom_args.items():
-            request += f"{key} {value!s} "
+            request += f" {key} {value!s} "
     timeout = retries_timeout
 
     logger.debug(f"Command line: {request}")
@@ -149,14 +150,64 @@ def execute_random_benchmark(
     )
 
 
+def execute_geospatial_benchmark(
+    base_url: str,
+    model: str,
+    num_prompts: int = 500,
+    request_rate: int | None = None,
+    max_concurrency: int | None = None,
+    hf_token: str | None = None,
+    benchmark_retries: int = 3,
+    retries_timeout: int = 5,
+    burstiness: float = 1,
+    interpreter: str = "python",
+) -> dict[str, Any]:
+    """
+    Execute benchmark with random dataset
+    :param base_url: url for vllm endpoint
+    :param model: model
+    :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param hf_token: huggingface token
+    :param benchmark_retries: number of benchmark execution retries
+    :param retries_timeout: timeout between initial retry
+    :param input_token_length: length of input tokens
+    :param output_token_length: length of output tokens
+    :return: results dictionary
+    """
+    from importlib import resources
+
+    data_set_path = resources.path(
+        "ado_actuators.vllm_performance",
+        "geospatial_valencia.jsonl",
+    )
+    return execute_benchmark(
+        base_url=base_url,
+        backend="io-processor-plugin",
+        model=model,
+        data_set="custom",
+        interpreter=interpreter,
+        num_prompts=num_prompts,
+        request_rate=request_rate,
+        max_concurrency=max_concurrency,
+        hf_token=hf_token,
+        benchmark_retries=benchmark_retries,
+        retries_timeout=retries_timeout,
+        burstiness=burstiness,
+        custom_args={
+            "--dataset-path": data_set_path,
+            "--endpoint": "/pooling",
+            "--skip-tokenizer-init": True,
+        },
+    )
+
+
 if __name__ == "__main__":
-    results = execute_benchmark(
+    results = execute_geospatial_benchmark(
         interpreter="python3.10",
-        base_url="http://localhost:28015",
-        data_set="random",
-        model="openai/gpt-oss-20b",
-        request_rate=None,
-        max_concurrency=None,
+        base_url="http://localhost:8000",
+        model="ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11",
+        request_rate=2,
+        max_concurrency=10,
         hf_token=os.getenv("HF_TOKEN"),
         num_prompts=100,
     )

From 52ccff47f116f9486f0c65214b12e3c116dd4706 Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Wed, 15 Oct 2025 12:13:55 +0100
Subject: [PATCH 02/46] feat: add geospatial experiments

endpoint and full
---
 .../vllm_performance/experiment_executor.py   |  85 ++++--
 .../vllm_performance/experiments.yaml         | 266 ++++++++++++++++++
 2 files changed, 323 insertions(+), 28 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 3da664a8..2e4f213c 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -22,6 +22,7 @@
     VLLMDtype,
 )
 from ado_actuators.vllm_performance.vllm_performance_test.execute_benchmark import (
+    execute_geospatial_benchmark,
     execute_random_benchmark,
 )
 from ray.actor import ActorHandle
@@ -279,20 +280,34 @@ def run_resource_and_workload_experiment(
                 start = time.time()
                 result = None
                 try:
-                    result = execute_random_benchmark(
-                        base_url=base_url,
-                        model=values.get("model"),
-                        interpreter=actuator_parameters.interpreter,
-                        num_prompts=int(values.get("num_prompts")),
-                        request_rate=request_rate,
-                        max_concurrency=max_concurrency,
-                        hf_token=actuator_parameters.hf_token,
-                        benchmark_retries=actuator_parameters.benchmark_retries,
-                        retries_timeout=actuator_parameters.retries_timeout,
-                        number_input_tokens=int(values.get("number_input_tokens")),
-                        max_output_tokens=int(values.get("max_output_tokens")),
-                        burstiness=float(values.get("burstiness")),
-                    )
+                    if experiment.identifier == "performance-testing-geospatial-full":
+                        result = execute_geospatial_benchmark(
+                            base_url=base_url,
+                            model=values.get("model"),
+                            interpreter=actuator_parameters.interpreter,
+                            num_prompts=int(values.get("num_prompts")),
+                            request_rate=request_rate,
+                            max_concurrency=max_concurrency,
+                            hf_token=actuator_parameters.hf_token,
+                            benchmark_retries=actuator_parameters.benchmark_retries,
+                            retries_timeout=actuator_parameters.retries_timeout,
+                            burstiness=float(values.get("burstiness")),
+                        )
+                    else:
+                        result = execute_random_benchmark(
+                            base_url=base_url,
+                            model=values.get("model"),
+                            interpreter=actuator_parameters.interpreter,
+                            num_prompts=int(values.get("num_prompts")),
+                            request_rate=request_rate,
+                            max_concurrency=max_concurrency,
+                            hf_token=actuator_parameters.hf_token,
+                            benchmark_retries=actuator_parameters.benchmark_retries,
+                            retries_timeout=actuator_parameters.retries_timeout,
+                            number_input_tokens=int(values.get("number_input_tokens")),
+                            max_output_tokens=int(values.get("max_output_tokens")),
+                            burstiness=float(values.get("burstiness")),
+                        )
                     logger.debug(f"benchmark executed in {time.time() - start} sec")
                 except Exception as e:
                     logger.error(f"Failed to execute VLLM performance test {e}")
@@ -379,20 +394,34 @@ def run_workload_experiment(
         error = None
         measured_values = []
         try:
-            result = execute_random_benchmark(
-                base_url=values.get("endpoint"),
-                model=values.get("model"),
-                interpreter=actuator_parameters.interpreter,
-                num_prompts=int(values.get("num_prompts")),
-                request_rate=request_rate,
-                max_concurrency=max_concurrency,
-                hf_token=actuator_parameters.hf_token,
-                benchmark_retries=actuator_parameters.benchmark_retries,
-                retries_timeout=actuator_parameters.retries_timeout,
-                number_input_tokens=int(values.get("number_input_tokens")),
-                max_output_tokens=int(values.get("max_output_tokens")),
-                burstiness=float(values.get("burstiness")),
-            )
+            if experiment.identifier == "performance-testing-geospatial-endpoint":
+                result = execute_geospatial_benchmark(
+                    base_url=values.get("endpoint"),
+                    model=values.get("model"),
+                    interpreter=actuator_parameters.interpreter,
+                    num_prompts=int(values.get("num_prompts")),
+                    request_rate=request_rate,
+                    max_concurrency=max_concurrency,
+                    hf_token=actuator_parameters.hf_token,
+                    benchmark_retries=actuator_parameters.benchmark_retries,
+                    retries_timeout=actuator_parameters.retries_timeout,
+                    burstiness=float(values.get("burstiness")),
+                )
+            else:
+                result = execute_random_benchmark(
+                    base_url=values.get("endpoint"),
+                    model=values.get("model"),
+                    interpreter=actuator_parameters.interpreter,
+                    num_prompts=int(values.get("num_prompts")),
+                    request_rate=request_rate,
+                    max_concurrency=max_concurrency,
+                    hf_token=actuator_parameters.hf_token,
+                    benchmark_retries=actuator_parameters.benchmark_retries,
+                    retries_timeout=actuator_parameters.retries_timeout,
+                    number_input_tokens=int(values.get("number_input_tokens")),
+                    max_output_tokens=int(values.get("max_output_tokens")),
+                    burstiness=float(values.get("burstiness")),
+                )
         except Exception as e:
             logger.error(f"Failed to execute VLLM performance test {e}")
             error = f"Failed to execute VLLM performance test {e}"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
index 1d03b13a..40aa9777 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
@@ -319,3 +319,269 @@ performance_testing-endpoint:
     - identifier: "p99_e2el_ms"
   metadata:
     description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations'
+performance_testing-geospatial-endpoint:
+  identifier: performance-testing-geospatial-endpoint
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
+    - identifier: 'endpoint'
+      metadata:
+        description: 'The endpoint(s) to test'
+      propertyDomain:
+        variableType: "UNKNOWN_VARIABLE_TYPE"
+    - identifier: 'request_rate'
+      metadata:
+        description: "The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+  defaultParameterization:
+    - value: 100
+      property:
+        identifier: 'num_prompts'
+    - value: -1
+      property:
+        identifier: 'max_concurrency'
+    - value: 1.0
+      property:
+        identifier: 'burstiness'
+  # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "output_throughput"
+    - identifier: "total_token_throughput"
+    - identifier: "mean_ttft_ms"
+    - identifier: "median_ttft_ms"
+    - identifier: "std_ttft_ms"
+    - identifier: "p25_ttft_ms"
+    - identifier: "p50_ttft_ms"
+    - identifier: "p75_ttft_ms"
+    - identifier: "p99_ttft_ms"
+    - identifier: "mean_tpot_ms"
+    - identifier: "median_tpot_ms"
+    - identifier: "std_tpot_ms"
+    - identifier: "p25_tpot_ms"
+    - identifier: "p50_tpot_ms"
+    - identifier: "p75_tpot_ms"
+    - identifier: "p99_tpot_ms"
+    - identifier: "mean_itl_ms"
+    - identifier: "median_itl_ms"
+    - identifier: "std_itl_ms"
+    - identifier: "p25_itl_ms"
+    - identifier: "p50_itl_ms"
+    - identifier: "p75_itl_ms"
+    - identifier: "p99_itl_ms"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations'
+performance_testing-geospatial-full:
+  identifier: performance-testing-geospatial-full
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
+    - identifier: 'request_rate'
+      metadata:
+        description: "(benchmark) The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "(benchmark) The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "(benchmark) The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: image
+      metadata:
+        description: "(deployment) Docker image to use to create vllm deployments"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
+    - identifier: n_cpus
+      metadata:
+        description: "(deployment) the number of CPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 1,17 ]
+        interval: 1
+    - identifier: memory
+      metadata:
+        description: "(deployment) the amount of memory to allocate to vLLM pod"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "64Gi", "128Gi", "256Gi" ]
+    - identifier: dtype
+      metadata:
+        description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models."
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "auto", "half", "float16",  "bfloat16", "float", "float32" ]
+    - identifier: 'gpu_memory_utilization'
+      metadata:
+        description: "(deployment) The fraction of GPU memory to be used for the model executor,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ .5, .75, .9 ]
+    - identifier: 'cpu_offload'
+      metadata:
+        description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ 0, 8, 16, 24, 32 ]
+    - identifier: 'max_num_seq'
+      metadata:
+        description: "(deployment) Maximum number of sequences per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [32,2049]
+        interval: 32
+    - identifier: 'n_gpus'
+      metadata:
+        description: "(deployment) Number of GPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,9]
+        interval: 1
+    - identifier: 'gpu_type'
+      metadata:
+        description: "(deployment) The GPU type to use"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+  defaultParameterization:
+    - property:
+        identifier: 'image'
+      value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1"
+    - property:
+        identifier: n_cpus
+      value: 8
+    - property:
+        identifier:
+          memory
+      value:  "128Gi"
+    - property:
+        identifier: dtype
+      value: "auto"
+    - property:
+        identifier: 'num_prompts'
+      value: 500
+    - property:
+        identifier: 'max_concurrency'
+      value: -1
+    - property:
+        identifier: 'burstiness'
+      value: 1.0
+    - property:
+        identifier: 'gpu_memory_utilization'
+      value: .9
+    - property:
+        identifier: 'cpu_offload'
+      value: 0
+    - property:
+        identifier: 'max_num_seq'
+      value: 256
+    - property:
+        identifier: 'n_gpus'
+      value: 1
+    - property:
+        identifier: 'gpu_type'
+      value: 'NVIDIA-A100-80GB-PCIe'
+  # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "output_throughput"
+    - identifier: "total_token_throughput"
+    - identifier: "mean_ttft_ms"
+    - identifier: "median_ttft_ms"
+    - identifier: "std_ttft_ms"
+    - identifier: "p25_ttft_ms"
+    - identifier: "p50_ttft_ms"
+    - identifier: "p75_ttft_ms"
+    - identifier: "p99_ttft_ms"
+    - identifier: "mean_tpot_ms"
+    - identifier: "median_tpot_ms"
+    - identifier: "std_tpot_ms"
+    - identifier: "p25_tpot_ms"
+    - identifier: "p50_tpot_ms"
+    - identifier: "p75_tpot_ms"
+    - identifier: "p99_tpot_ms"
+    - identifier: "mean_itl_ms"
+    - identifier: "median_itl_ms"
+    - identifier: "std_itl_ms"
+    - identifier: "p25_itl_ms"
+    - identifier: "p50_itl_ms"
+    - identifier: "p75_itl_ms"
+    - identifier: "p99_itl_ms"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'VLLM performance testing across compute resource and workload configuration'
\ No newline at end of file

From 98591b581dcdfb69b555ecfde48fec9fa4bf36d1 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Wed, 15 Oct 2025 20:49:41 +0100
Subject: [PATCH 03/46] various fixes to the vllm_performance actuator

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../ado_actuators/vllm_performance/actuator.py     |  2 +-
 .../vllm_performance/experiment_executor.py        |  4 +++-
 .../vllm_performance/experiments.yaml              | 14 ++++++++++++--
 .../k8/yaml_support/build_components.py            |  6 +++++-
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index a9e5dc30..a4fea988 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -176,7 +176,7 @@ async def submit(
         if experiment.deprecated is True:
             raise DeprecatedExperimentError(f"Experiment {experiment} is deprecated")
 
-        if experiment.identifier == "performance-testing-full":
+        if experiment.identifier in ["performance-testing-full", "performance-testing-geospatial-full"]:
             if not self.env_manager:
                 raise MissingConfigurationForExperimentError(
                     f"Actuator configuration did not contain sufficient information for a kubernetes environment manager to be created. "
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 2e4f213c..f3594150 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -7,6 +7,7 @@
 import subprocess
 import sys
 import time
+import traceback
 
 import ray
 from ado_actuators.vllm_performance.actuator_parameters import (
@@ -152,6 +153,7 @@ def _create_environment(
                     logger.error(
                         f"Attempt {attempt}. Failed to create test environment {e}"
                     )
+                    logger.error(traceback.format_exception(e))
                     error = f"Failed to create test environment {e}"
                     time.sleep(tmout)
                     tmout *= 2
@@ -310,7 +312,7 @@ def run_resource_and_workload_experiment(
                         )
                     logger.debug(f"benchmark executed in {time.time() - start} sec")
                 except Exception as e:
-                    logger.error(f"Failed to execute VLLM performance test {e}")
+                    logger.error(traceback.format_exception(e))
                     error = f"Failed to execute VLLM performance test {e}"
                 finally:
                     if pf is not None:
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
index 40aa9777..0c1859ca 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
@@ -420,7 +420,7 @@ performance_testing-geospatial-full:
       metadata:
         description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
       propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
     - identifier: 'request_rate'
       metadata:
@@ -455,7 +455,7 @@ performance_testing-geospatial-full:
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
       propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
     - identifier: n_cpus
       metadata:
@@ -495,6 +495,13 @@ performance_testing-geospatial-full:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [32,2049]
         interval: 32
+    - identifier: 'max_batch_tokens'
+      metadata:
+        description: "(deployment) maximum number of batched tokens per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 8192, 32769]
+        interval: 1024
     - identifier: 'n_gpus'
       metadata:
         description: "(deployment) Number of GPUs to use"
@@ -540,6 +547,9 @@ performance_testing-geospatial-full:
     - property:
         identifier: 'max_num_seq'
       value: 256
+    - property:
+        identifier: 'max_batch_tokens'
+      value: 16384
     - property:
         identifier: 'n_gpus'
       value: 1
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index 1e2355a1..511a2ea6 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -41,7 +41,11 @@ def get_k8_name(model: str) -> str:
         :return: k8 unique name for a given LLM model
         """
         m_parts = model.split("/")
-        return f"vllm-{m_parts[-1].lower()}-{uuid.uuid4().hex}".replace(".", "-")
+
+        # Making sure the resulting name is not longer than 63 characters as it is
+        # the maximum allowed for a name in kubernetes.
+        name_prefix = m_parts[-1][:min(len(m_parts[-1]), 21)].rstrip("-")
+        return f"vllm-{name_prefix.lower()}-{uuid.uuid4()}".replace(".", "-")
 
     @staticmethod
     def _adjust_file_name(f: str) -> str:

From bd62781809e02f3b540c6c778f82e167334e0cd3 Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Wed, 15 Oct 2025 22:48:32 +0100
Subject: [PATCH 04/46] fix: add max_batch_tokens

---
 .../ado_actuators/vllm_performance/experiments.yaml    | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
index 40aa9777..cd67986a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
@@ -488,6 +488,13 @@ performance_testing-geospatial-full:
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         values: [ 0, 8, 16, 24, 32 ]
+    - identifier: 'max_batch_tokens'
+      metadata:
+        description: "(deployment) maximum size of the sum of the 1st image dimensions per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 1024, 32769 ]
+        interval: 1024
     - identifier: 'max_num_seq'
       metadata:
         description: "(deployment) Maximum number of sequences per iteration"
@@ -546,6 +553,9 @@ performance_testing-geospatial-full:
     - property:
         identifier: 'gpu_type'
       value: 'NVIDIA-A100-80GB-PCIe'
+    - property:
+        identifier: 'max_batch_tokens'
+      value: 16384
   # measurements
   targetProperties:
     - identifier: "duration"

From c1dec4aab0e51534e35cf2da7db4285f38d0b5fe Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Tue, 28 Oct 2025 15:49:18 +0000
Subject: [PATCH 05/46] Updated vllm performance actuator to support geospatial

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance/experiment_executor.py   |  3 +
 .../vllm_performance/experiments.yaml         | 55 +++++++++++++++++-
 .../vllm_performance/k8/create_environment.py | 11 ++--
 .../vllm_performance/k8/manage_components.py  |  6 ++
 .../k8/yaml_support/build_components.py       | 56 +++++++++++++++----
 .../execute_benchmark.py                      | 42 +++++++-------
 6 files changed, 135 insertions(+), 38 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index f3594150..9be6921a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -144,6 +144,9 @@ def _create_environment(
                         reuse_deployment=False,
                         pvc_name=actuator.pvc_template,
                         namespace=actuator.namespace,
+                        skip_tokenizer_init=values.get("skip_tokenizer_init"),
+                        enforce_eager=values.get("enforce_eager"),
+                        io_processor_plugin=values.get("io_processor_plugin")
                     )
                     # Update manager
                     env_manager.done_creating.remote(definition=definition)
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
index 072a936d..69d3460d 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
@@ -120,6 +120,24 @@ performance_testing-full:
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
         values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+    - identifier: 'skip_tokenizer_init'
+      metadata:
+        description: "(deployment) skip tokenizer intialization"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'enforce_eager'
+      metadata:
+        description: "(deployment) enforce pytorch eager mode"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'io_processor_plugin'
+      metadata:
+        description: 'IO Pocessor plugin to load for the model'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ None, "terratorch_segmentation" ]
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -167,6 +185,15 @@ performance_testing-full:
     - property:
         identifier: 'gpu_type'
       value: 'NVIDIA-A100-80GB-PCIe'
+    - property:
+        identifier: 'skip_tokenizer_init'
+      value: False
+    - property:
+        identifier: 'enforce_eager'
+      value: False
+    - property:
+        identifier: 'io_processor_plugin'
+      value: None
   # measurements
   targetProperties:
     - identifier: "duration"
@@ -522,6 +549,24 @@ performance_testing-geospatial-full:
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
         values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+    - identifier: 'skip_tokenizer_init'
+      metadata:
+        description: "(deployment) skip tokenizer intialization"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'enforce_eager'
+      metadata:
+        description: "(deployment) enforce pytorch eager mode"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'io_processor_plugin'
+      metadata:
+        description: 'IO Pocessor plugin to load for the model'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ None, "terratorch_segmentation" ]
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -564,8 +609,14 @@ performance_testing-geospatial-full:
         identifier: 'gpu_type'
       value: 'NVIDIA-A100-80GB-PCIe'
     - property:
-        identifier: 'max_batch_tokens'
-      value: 16384
+        identifier: 'skip_tokenizer_init'
+      value: True
+    - property:
+        identifier: 'enforce_eager'
+      value: True
+    - property:
+        identifier: 'io_processor_plugin'
+      value: "terratorch_segmentation"
   # measurements
   targetProperties:
     - identifier: "duration"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
index 87ee719d..3f0a0809 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
@@ -40,6 +40,9 @@ def create_test_environment(
     reuse_pvc: bool = True,
     pvc_name: str = "vllm-support",
     namespace: str = "vllm-testing",
+    enforce_eager: bool = False,
+    skip_tokenizer_init: bool = False,
+    io_processor_plugin: str | None = None
 ) -> None:
     """
     Create test deployment
@@ -113,15 +116,13 @@ def create_test_environment(
         n_gpus=n_gpus,
         n_cpus=n_cpus,
         memory=memory,
-        max_batch_tokens=max_batch_tokens,
-        gpu_memory_utilization=gpu_memory_utilization,
-        dtype=dtype,
-        cpu_offload=cpu_offload,
-        max_num_seq=max_num_seq,
         template=deployment_template,
         claim_name=pvc_name,
         hf_token=hf_token,
         reuse=reuse_deployment,
+        enforce_eager=enforce_eager,
+        skip_tokenizer_init=skip_tokenizer_init,
+        io_processor_plugin=io_processor_plugin
     )
     logger.debug("deployment created")
     c_manager.wait_deployment_ready(k8_name=k8_name)
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
index dfef4725..cd77a444 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
@@ -231,6 +231,9 @@ def create_deployment(
         claim_name: str | None = None,
         hf_token: str | None = None,
         reuse: bool = False,
+        enforce_eager: bool = False,
+        skip_tokenizer_init: bool = False,
+        io_processor_plugin: str | None = None
     ) -> None:
         """
         create deployment for model
@@ -293,6 +296,9 @@ def create_deployment(
                     template=template,
                     claim_name=claim_name,
                     hf_token=hf_token,
+                    enforce_eager=enforce_eager,
+                    skip_tokenizer_init=skip_tokenizer_init,
+                    io_processor_plugin=io_processor_plugin,
                 ),
             )
         except ApiException as e:
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index 511a2ea6..b4069ace 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -77,6 +77,9 @@ def deployment_yaml(
         template: str = "deployment.yaml",
         claim_name: str | None = None,
         hf_token: str | None = None,
+        enforce_eager: bool = False,
+        skip_tokenizer_init: bool = False,
+        io_processor_plugin: str | None = None,
     ) -> dict[str, Any]:
         """
         Generate deployment yaml
@@ -138,6 +141,30 @@ def deployment_yaml(
                 [{"name": PVC_NAME, "persistentVolumeClaim": {"claimName": claim_name}}]
             )
 
+        vllm_serve_args = [
+            model,
+            "--max-num-batched-tokens",
+            f"{max_batch_tokens}",
+            "--gpu-memory-utilization",
+            f"{gpu_memory_utilization}",
+            "--cpu-offload-gb",
+            f"{cpu_offload}",
+            "--max-num-seq",
+            f"{max_num_seq}",
+            "--tensor-parallel-size",
+            f"{n_gpus}",
+            "--dtype",
+            dtype.value,
+        ]
+
+        if enforce_eager:
+            vllm_serve_args.append("--skip-tokenizer-init")
+        if skip_tokenizer_init:
+            vllm_serve_args.append("--enforce-eager")
+        if io_processor_plugin:
+            vllm_serve_args.append("--io-processor-plugin")
+            vllm_serve_args.append(io_processor_plugin)
+
         # container
         container = spec["containers"][0]
         # image
@@ -151,19 +178,25 @@ def deployment_yaml(
         limits["cpu"] = str(n_cpus)
         limits["memory"] = memory
         limits["nvidia.com/gpu"] = str(n_gpus)
+
+        #command
+        container["command"] = ["vllm", "serve"]
+        container["args"] = vllm_serve_args
         # env variables to to set parameters for docker execution
-        container["env"] = [
-            {"name": "MODEL", "value": model},
-            {"name": "GPU_MEMORY_UTILIZATION", "value": str(gpu_memory_utilization)},
-            {"name": "DTYPE", "value": dtype.value},
-            {"name": "CPU_OFFLOAD_GB", "value": str(cpu_offload)},
-            {"name": "MAX_NUM_BATCHED_TOKENS", "value": str(max_batch_tokens)},
-            {"name": "MAX_NUM_SEQ", "value": str(max_num_seq)},
-            {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)},
-        ]
+        # container["env"] = [
+        #     {"name": "MODEL", "value": model},
+        #     {"name": "GPU_MEMORY_UTILIZATION", "value": str(gpu_memory_utilization)},
+        #     {"name": "DTYPE", "value": dtype.value},
+        #     {"name": "CPU_OFFLOAD_GB", "value": str(cpu_offload)},
+        #     {"name": "MAX_NUM_BATCHED_TOKENS", "value": str(max_batch_tokens)},
+        #     {"name": "MAX_NUM_SEQ", "value": str(max_num_seq)},
+        #     {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)},
+        # ]
         if hf_token is not None:
-            container["env"].extend([{"name": "HF_TOKEN", "value": hf_token}])
+            container["env"]=[{"name": "HF_TOKEN", "value": hf_token}]
         if claim_name is not None:
+            if "env" not in container:
+                container["env"] = []
             container["env"].extend(
                 [
                     {
@@ -181,6 +214,9 @@ def deployment_yaml(
             )
 
         # return
+
+        import json
+        print(json.dumps(deployment_yaml, indent=2))
         return deployment_yaml
 
     @staticmethod
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index a65463f9..952cb26a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -176,29 +176,29 @@ def execute_geospatial_benchmark(
     """
     from importlib import resources
 
-    data_set_path = resources.path(
+    with resources.path(
         "ado_actuators.vllm_performance",
         "geospatial_valencia.jsonl",
-    )
-    return execute_benchmark(
-        base_url=base_url,
-        backend="io-processor-plugin",
-        model=model,
-        data_set="custom",
-        interpreter=interpreter,
-        num_prompts=num_prompts,
-        request_rate=request_rate,
-        max_concurrency=max_concurrency,
-        hf_token=hf_token,
-        benchmark_retries=benchmark_retries,
-        retries_timeout=retries_timeout,
-        burstiness=burstiness,
-        custom_args={
-            "--dataset-path": data_set_path,
-            "--endpoint": "/pooling",
-            "--skip-tokenizer-init": True,
-        },
-    )
+    ) as data_set_path:
+        return execute_benchmark(
+            base_url=base_url,
+            backend="io-processor-plugin",
+            model=model,
+            data_set="custom",
+            interpreter=interpreter,
+            num_prompts=num_prompts,
+            request_rate=request_rate,
+            max_concurrency=max_concurrency,
+            hf_token=hf_token,
+            benchmark_retries=benchmark_retries,
+            retries_timeout=retries_timeout,
+            burstiness=burstiness,
+            custom_args={
+                "--dataset-path": data_set_path,
+                "--endpoint": "/pooling",
+                "--skip-tokenizer-init": True,
+            },
+        )
 
 
 if __name__ == "__main__":

From 2e030282585b6baacc43c06ee0ac98803f9dbc45 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 30 Oct 2025 09:32:29 +0000
Subject: [PATCH 06/46] Termorarily avoiding cpu14

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance/k8/yaml_support/deployment.yaml     | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
index 2b90302a..25851982 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
@@ -27,6 +27,15 @@ spec:
             - name: http
               containerPort: 8000
               protocol: TCP
+          affinity:
+            nodeAffinity:
+              requiredDuringSchedulingIgnoredDuringExecution:
+                nodeSelectorTerms:
+                - matchExpressions:
+                  - operator: NotIn
+                    key: kubernetes.io/hostname
+                    values:
+                    - adcpu014 # funny node
           startupProbe:
             exec:
               command:

From 80c68c4c0c0113ffbe764f72bff7a60d527c2d9d Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 30 Oct 2025 09:42:58 +0000
Subject: [PATCH 07/46] Termorarily avoiding cpu14

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../k8/yaml_support/deployment.yaml            | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
index 25851982..bc9c10a6 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
@@ -19,6 +19,15 @@ spec:
         app.kubernetes.io/name: vllm
         app.kubernetes.io/instance: vllm-testing
     spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - operator: NotIn
+                key: kubernetes.io/hostname
+                values:
+                - adcpu014 # funny node
       containers:
         - name: vllm
           image: "vllm/vllm-openai:v0.6.3"
@@ -27,15 +36,6 @@ spec:
             - name: http
               containerPort: 8000
               protocol: TCP
-          affinity:
-            nodeAffinity:
-              requiredDuringSchedulingIgnoredDuringExecution:
-                nodeSelectorTerms:
-                - matchExpressions:
-                  - operator: NotIn
-                    key: kubernetes.io/hostname
-                    values:
-                    - adcpu014 # funny node
           startupProbe:
             exec:
               command:

From 592e17940ffae7971936b268678944bdcf8c9107 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 30 Oct 2025 11:55:19 +0000
Subject: [PATCH 08/46] Added india dataset

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../ado_actuators/vllm_performance/geospatial_india.jsonl       | 1 +
 .../vllm_performance/vllm_performance_test/execute_benchmark.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl
new file mode 100644
index 00000000..693bbc09
--- /dev/null
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl
@@ -0,0 +1 @@
+{"prompt":{"data": {"data": "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif","data_format": "url","out_data_format": "b64_json","indices": [1, 2, 3, 8, 11, 12]},"priority": 0,"softmax": false}}
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index 952cb26a..cf3ed96a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -178,7 +178,7 @@ def execute_geospatial_benchmark(
 
     with resources.path(
         "ado_actuators.vllm_performance",
-        "geospatial_valencia.jsonl",
+        "geospatial_india.jsonl",
     ) as data_set_path:
         return execute_benchmark(
             base_url=base_url,

From 94c7490b34edf6d59918087a431f0683ee2b3b87 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 30 Oct 2025 14:45:22 +0000
Subject: [PATCH 09/46] Fixed BaseSamplerConfig

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/modules/operators/randomwalk.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/orchestrator/modules/operators/randomwalk.py b/orchestrator/modules/operators/randomwalk.py
index 1018c17a..aa050494 100644
--- a/orchestrator/modules/operators/randomwalk.py
+++ b/orchestrator/modules/operators/randomwalk.py
@@ -207,8 +207,11 @@ def sampler(self) -> BaseSampler | GroupSampler:
                         sampler = SequentialSampleSelector()
                     case CombinedWalkModeEnum.RANDOMGROUPED:
                         sampler = RandomGroupSampleSelector(group=self.grouping)
+                        sampler = RandomGroupSampleSelector(group=self.grouping)
                     case CombinedWalkModeEnum.SEQUENTIALGROUPED:
-                        sampler = SequentialGroupSampleSelector(group=self.grouping)
+                        sampler = SequentialGroupSampleSelector(
+                            group=self.grouping
+                        )
                     case _:
                         # this can never happen, as we are validating this above
                         pass
@@ -218,10 +221,12 @@ def sampler(self) -> BaseSampler | GroupSampler:
                     case CombinedWalkModeEnum.RANDOMGROUPED:
                         sampler = ExplicitEntitySpaceGroupedGridSampleGenerator(
                             mode=WalkModeEnum.RANDOM, group=self.grouping
+                            mode=WalkModeEnum.RANDOM, group=self.grouping
                         )
                     case CombinedWalkModeEnum.SEQUENTIALGROUPED:
                         sampler = ExplicitEntitySpaceGroupedGridSampleGenerator(
                             mode=WalkModeEnum.SEQUENTIAL, group=self.grouping
+                            mode=WalkModeEnum.SEQUENTIAL, group=self.grouping
                         )
                     case CombinedWalkModeEnum.RANDOM:
                         sampler = ExplicitEntitySpaceGridSampleGenerator(

From 3fd83b83cb6988031918a22d835bcd3301255e9d Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 30 Oct 2025 15:48:05 +0000
Subject: [PATCH 10/46] Some changes to the vllmperformance experiments

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../ado_actuators/vllm_performance/experiments.yaml    |  9 ---------
 .../k8/yaml_support/build_components.py                | 10 ++++------
 2 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
index 69d3460d..53ee0c33 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
@@ -132,12 +132,6 @@ performance_testing-full:
       propertyDomain:
         variableType: BINARY_VARIABLE_TYPE 
         values: [True, False]
-    - identifier: 'io_processor_plugin'
-      metadata:
-        description: 'IO Pocessor plugin to load for the model'
-      propertyDomain:
-        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ None, "terratorch_segmentation" ]
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -191,9 +185,6 @@ performance_testing-full:
     - property:
         identifier: 'enforce_eager'
       value: False
-    - property:
-        identifier: 'io_processor_plugin'
-      value: None
   # measurements
   targetProperties:
     - identifier: "duration"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index b4069ace..a3277a08 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -6,6 +6,7 @@
 import sys
 import uuid
 from enum import Enum
+import json
 from typing import Any
 
 import yaml
@@ -158,9 +159,9 @@ def deployment_yaml(
         ]
 
         if enforce_eager:
-            vllm_serve_args.append("--skip-tokenizer-init")
-        if skip_tokenizer_init:
             vllm_serve_args.append("--enforce-eager")
+        if skip_tokenizer_init:
+            vllm_serve_args.append("--skip-tokenizer-init")
         if io_processor_plugin:
             vllm_serve_args.append("--io-processor-plugin")
             vllm_serve_args.append(io_processor_plugin)
@@ -213,10 +214,7 @@ def deployment_yaml(
                 ]
             )
 
-        # return
-
-        import json
-        print(json.dumps(deployment_yaml, indent=2))
+        logger.debug(json.dumps(deployment_yaml, indent=2))
         return deployment_yaml
 
     @staticmethod

From 90ae6bbbbcc96df29ee4b89c45d48cde4aa4efde Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 31 Oct 2025 09:23:01 +0000
Subject: [PATCH 11/46] Some changes to changes to the experiment and reverted
 the deployment template

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../ado_actuators/vllm_performance/actuator.py           | 2 +-
 .../vllm_performance/k8/yaml_support/build_components.py | 2 +-
 .../vllm_performance/k8/yaml_support/deployment.yaml     | 9 ---------
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index a4fea988..2e522bf6 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -197,7 +197,7 @@ async def submit(
                     )
 
             # Execute experiment
-            # Note: Here the experiment instance is just past for convenience since we retrieved it above
+            # Note: Here the experiment instance is just passed for convenience since we retrieved it above
             run_resource_and_workload_experiment.remote(
                 request=request,
                 experiment=experiment,
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index a3277a08..0abcc8c9 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -162,7 +162,7 @@ def deployment_yaml(
             vllm_serve_args.append("--enforce-eager")
         if skip_tokenizer_init:
             vllm_serve_args.append("--skip-tokenizer-init")
-        if io_processor_plugin:
+        if io_processor_plugin is not None:
             vllm_serve_args.append("--io-processor-plugin")
             vllm_serve_args.append(io_processor_plugin)
 
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
index bc9c10a6..2b90302a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
@@ -19,15 +19,6 @@ spec:
         app.kubernetes.io/name: vllm
         app.kubernetes.io/instance: vllm-testing
     spec:
-      affinity:
-        nodeAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-            nodeSelectorTerms:
-            - matchExpressions:
-              - operator: NotIn
-                key: kubernetes.io/hostname
-                values:
-                - adcpu014 # funny node
       containers:
         - name: vllm
           image: "vllm/vllm-openai:v0.6.3"

From a7509754d612e0e0063ddd3c67c66a2a01e212cc Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 31 Oct 2025 09:28:36 +0000
Subject: [PATCH 12/46] Removed some clutter from deployment template

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance/k8/yaml_support/deployment.yaml          | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
index 2b90302a..2659550d 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
@@ -68,6 +68,4 @@ spec:
           emptyDir:
             medium: Memory
       nodeSelector:
-        nvidia.com/gpu.product: NVIDIA-A100-80GB-PCIe
-        #nvidia.com/gpu.product: Tesla-V100-PCIE-16GB
-        #kubernetes.io/hostname: cpu15
\ No newline at end of file
+        nvidia.com/gpu.product: NVIDIA-A100-80GB-PCIe
\ No newline at end of file

From c432cff57b38f6dc2e77b7a7210ffe928fe7b2e2 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 31 Oct 2025 11:05:03 +0000
Subject: [PATCH 13/46] Few more fixes

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/modules/operators/randomwalk.py  |   3 -
 .../vllm_performance/actuator.py              |  26 +-
 .../performance_testing.yaml}                 | 312 +-----------------
 .../performance_testing_geospatial.yaml       | 308 +++++++++++++++++
 4 files changed, 328 insertions(+), 321 deletions(-)
 rename plugins/actuators/vllm_performance/ado_actuators/vllm_performance/{experiments.yaml => experiments/performance_testing.yaml} (51%)
 create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml

diff --git a/orchestrator/modules/operators/randomwalk.py b/orchestrator/modules/operators/randomwalk.py
index aa050494..eff9a6f2 100644
--- a/orchestrator/modules/operators/randomwalk.py
+++ b/orchestrator/modules/operators/randomwalk.py
@@ -207,7 +207,6 @@ def sampler(self) -> BaseSampler | GroupSampler:
                         sampler = SequentialSampleSelector()
                     case CombinedWalkModeEnum.RANDOMGROUPED:
                         sampler = RandomGroupSampleSelector(group=self.grouping)
-                        sampler = RandomGroupSampleSelector(group=self.grouping)
                     case CombinedWalkModeEnum.SEQUENTIALGROUPED:
                         sampler = SequentialGroupSampleSelector(
                             group=self.grouping
@@ -221,12 +220,10 @@ def sampler(self) -> BaseSampler | GroupSampler:
                     case CombinedWalkModeEnum.RANDOMGROUPED:
                         sampler = ExplicitEntitySpaceGroupedGridSampleGenerator(
                             mode=WalkModeEnum.RANDOM, group=self.grouping
-                            mode=WalkModeEnum.RANDOM, group=self.grouping
                         )
                     case CombinedWalkModeEnum.SEQUENTIALGROUPED:
                         sampler = ExplicitEntitySpaceGroupedGridSampleGenerator(
                             mode=WalkModeEnum.SEQUENTIAL, group=self.grouping
-                            mode=WalkModeEnum.SEQUENTIAL, group=self.grouping
                         )
                     case CombinedWalkModeEnum.RANDOM:
                         sampler = ExplicitEntitySpaceGridSampleGenerator(
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index 2e522bf6..88d4e67c 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -54,14 +54,26 @@ def catalog(
     ) -> ExperimentCatalog:
         """Returns the Experiments your actuator provides"""
 
-        # The catalog be formed in code here or read from a file containing the Experiments models
-        # This shows reading from a file
-
+        # Loading experiment definitions for yaml files contained in the `experiments` directory.
+        # NOTE: Only files can be placed in the experiments directory,
+        #       but each file can contain multiple experiment definitions
         path = os.path.abspath(__file__)
-        path = os.path.split(path)[0]
-        with open(os.path.join(path, "experiments.yaml")) as f:
-            data = yaml.safe_load(f)
-            experiments = [Experiment(**data[e]) for e in data]
+        exp_dir = os.path.join(os.path.split(path)[0], "experiments")
+        experiments = []
+        for exp_file in os.listdir(exp_dir):
+            logger.debug(f"Loading experiments from {exp_file}")
+            exp_file_path = os.path.join(exp_dir, exp_file)
+            if os.path.isdir(exp_file_path):
+                logger.error(f"{exp_file_path} is a directory. Only files are supported in the experiments directory")
+                raise Exception(f"{exp_file_path} is a directory. Only files are supported in the experiments directory")
+            with open(exp_file_path) as f:
+                try:
+                    data = yaml.safe_load(f)
+                except yaml.YAMLError as e:
+                    logger.error(f"File {exp_file} is a malformed YAML - {e}")
+                    raise Exception (f"File {exp_file} is a malformed YAML - {e}")
+
+            experiments.extend([Experiment(**data[e]) for e in data])
 
         return ExperimentCatalog(
             catalogIdentifier=cls.identifier,
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
similarity index 51%
rename from plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
rename to plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index 53ee0c33..216d6ae4 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -336,314 +336,4 @@ performance_testing-endpoint:
     - identifier: "p75_e2el_ms"
     - identifier: "p99_e2el_ms"
   metadata:
-    description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations'
-performance_testing-geospatial-endpoint:
-  identifier: performance-testing-geospatial-endpoint
-  actuatorIdentifier: "vllm_performance"
-  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
-    - identifier: 'model'
-      metadata:
-        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
-      propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
-    - identifier: 'endpoint'
-      metadata:
-        description: 'The endpoint(s) to test'
-      propertyDomain:
-        variableType: "UNKNOWN_VARIABLE_TYPE"
-    - identifier: 'request_rate'
-      metadata:
-        description: "The number of requests to send per second"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [-1,1000]
-        interval: 1  # -1 means send all requests at time 0
-  optionalProperties:
-    - identifier: 'num_prompts'
-      metadata:
-        description: "The number of prompts to send (total number of requests)"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [1,10001]
-        interval: 1
-    - identifier: 'burstiness'
-      metadata:
-        description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 0, 10 ]
-        interval: 1
-    - identifier: 'max_concurrency'
-      metadata:
-        description: "The maximum number of concurrent requests to send"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ -1, 500 ] # -1 means no concurrency control
-        interval: 1
-  defaultParameterization:
-    - value: 100
-      property:
-        identifier: 'num_prompts'
-    - value: -1
-      property:
-        identifier: 'max_concurrency'
-    - value: 1.0
-      property:
-        identifier: 'burstiness'
-  # measurements
-  targetProperties:
-    - identifier: "duration"
-    - identifier: "completed"
-    - identifier: "total_input_tokens"
-    - identifier: "total_output_tokens"
-    - identifier: "request_throughput"
-    - identifier: "output_throughput"
-    - identifier: "total_token_throughput"
-    - identifier: "mean_ttft_ms"
-    - identifier: "median_ttft_ms"
-    - identifier: "std_ttft_ms"
-    - identifier: "p25_ttft_ms"
-    - identifier: "p50_ttft_ms"
-    - identifier: "p75_ttft_ms"
-    - identifier: "p99_ttft_ms"
-    - identifier: "mean_tpot_ms"
-    - identifier: "median_tpot_ms"
-    - identifier: "std_tpot_ms"
-    - identifier: "p25_tpot_ms"
-    - identifier: "p50_tpot_ms"
-    - identifier: "p75_tpot_ms"
-    - identifier: "p99_tpot_ms"
-    - identifier: "mean_itl_ms"
-    - identifier: "median_itl_ms"
-    - identifier: "std_itl_ms"
-    - identifier: "p25_itl_ms"
-    - identifier: "p50_itl_ms"
-    - identifier: "p75_itl_ms"
-    - identifier: "p99_itl_ms"
-    - identifier: "mean_e2el_ms"
-    - identifier: "median_e2el_ms"
-    - identifier: "std_e2el_ms"
-    - identifier: "p25_e2el_ms"
-    - identifier: "p50_e2el_ms"
-    - identifier: "p75_e2el_ms"
-    - identifier: "p99_e2el_ms"
-  metadata:
-    description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations'
-performance_testing-geospatial-full:
-  identifier: performance-testing-geospatial-full
-  actuatorIdentifier: "vllm_performance"
-  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
-    - identifier: 'model'
-      metadata:
-        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
-      propertyDomain:
-        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
-    - identifier: 'request_rate'
-      metadata:
-        description: "(benchmark) The number of requests to send per second"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [-1,1000]
-        interval: 1  # -1 means send all requests at time 0
-  optionalProperties:
-    - identifier: 'num_prompts'
-      metadata:
-        description: "(benchmark) The number of prompts to send (total number of requests)"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [1,10001]
-        interval: 1
-    - identifier: 'max_concurrency'
-      metadata:
-        description: "(benchmark) The maximum number of concurrent requests to send"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ -1, 500 ] # -1 means no concurrency control
-        interval: 1
-    - identifier: 'burstiness'
-      metadata:
-        description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 0, 10 ]
-        interval: 1
-    - identifier: image
-      metadata:
-        description: "(deployment) Docker image to use to create vllm deployments"
-      propertyDomain:
-        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
-    - identifier: n_cpus
-      metadata:
-        description: "(deployment) the number of CPUs to use"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 1,17 ]
-        interval: 1
-    - identifier: memory
-      metadata:
-        description: "(deployment) the amount of memory to allocate to vLLM pod"
-      propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ "64Gi", "128Gi", "256Gi" ]
-    - identifier: dtype
-      metadata:
-        description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models."
-      propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ "auto", "half", "float16",  "bfloat16", "float", "float32" ]
-    - identifier: 'gpu_memory_utilization'
-      metadata:
-        description: "(deployment) The fraction of GPU memory to be used for the model executor,"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        values: [ .5, .75, .9 ]
-    - identifier: 'cpu_offload'
-      metadata:
-        description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU,"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        values: [ 0, 8, 16, 24, 32 ]
-    - identifier: 'max_batch_tokens'
-      metadata:
-        description: "(deployment) maximum size of the sum of the 1st image dimensions per iteration"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 1024, 32769 ]
-        interval: 1024
-    - identifier: 'max_num_seq'
-      metadata:
-        description: "(deployment) Maximum number of sequences per iteration"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [32,2049]
-        interval: 32
-    - identifier: 'max_batch_tokens'
-      metadata:
-        description: "(deployment) maximum number of batched tokens per iteration"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 8192, 32769]
-        interval: 1024
-    - identifier: 'n_gpus'
-      metadata:
-        description: "(deployment) Number of GPUs to use"
-      propertyDomain:
-        variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [1,9]
-        interval: 1
-    - identifier: 'gpu_type'
-      metadata:
-        description: "(deployment) The GPU type to use"
-      propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
-    - identifier: 'skip_tokenizer_init'
-      metadata:
-        description: "(deployment) skip tokenizer intialization"
-      propertyDomain:
-        variableType: BINARY_VARIABLE_TYPE 
-        values: [True, False]
-    - identifier: 'enforce_eager'
-      metadata:
-        description: "(deployment) enforce pytorch eager mode"
-      propertyDomain:
-        variableType: BINARY_VARIABLE_TYPE 
-        values: [True, False]
-    - identifier: 'io_processor_plugin'
-      metadata:
-        description: 'IO Pocessor plugin to load for the model'
-      propertyDomain:
-        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ None, "terratorch_segmentation" ]
-  defaultParameterization:
-    - property:
-        identifier: 'image'
-      value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1"
-    - property:
-        identifier: n_cpus
-      value: 8
-    - property:
-        identifier:
-          memory
-      value:  "128Gi"
-    - property:
-        identifier: dtype
-      value: "auto"
-    - property:
-        identifier: 'num_prompts'
-      value: 500
-    - property:
-        identifier: 'max_concurrency'
-      value: -1
-    - property:
-        identifier: 'burstiness'
-      value: 1.0
-    - property:
-        identifier: 'gpu_memory_utilization'
-      value: .9
-    - property:
-        identifier: 'cpu_offload'
-      value: 0
-    - property:
-        identifier: 'max_num_seq'
-      value: 256
-    - property:
-        identifier: 'max_batch_tokens'
-      value: 16384
-    - property:
-        identifier: 'n_gpus'
-      value: 1
-    - property:
-        identifier: 'gpu_type'
-      value: 'NVIDIA-A100-80GB-PCIe'
-    - property:
-        identifier: 'skip_tokenizer_init'
-      value: True
-    - property:
-        identifier: 'enforce_eager'
-      value: True
-    - property:
-        identifier: 'io_processor_plugin'
-      value: "terratorch_segmentation"
-  # measurements
-  targetProperties:
-    - identifier: "duration"
-    - identifier: "completed"
-    - identifier: "total_input_tokens"
-    - identifier: "total_output_tokens"
-    - identifier: "request_throughput"
-    - identifier: "output_throughput"
-    - identifier: "total_token_throughput"
-    - identifier: "mean_ttft_ms"
-    - identifier: "median_ttft_ms"
-    - identifier: "std_ttft_ms"
-    - identifier: "p25_ttft_ms"
-    - identifier: "p50_ttft_ms"
-    - identifier: "p75_ttft_ms"
-    - identifier: "p99_ttft_ms"
-    - identifier: "mean_tpot_ms"
-    - identifier: "median_tpot_ms"
-    - identifier: "std_tpot_ms"
-    - identifier: "p25_tpot_ms"
-    - identifier: "p50_tpot_ms"
-    - identifier: "p75_tpot_ms"
-    - identifier: "p99_tpot_ms"
-    - identifier: "mean_itl_ms"
-    - identifier: "median_itl_ms"
-    - identifier: "std_itl_ms"
-    - identifier: "p25_itl_ms"
-    - identifier: "p50_itl_ms"
-    - identifier: "p75_itl_ms"
-    - identifier: "p99_itl_ms"
-    - identifier: "mean_e2el_ms"
-    - identifier: "median_e2el_ms"
-    - identifier: "std_e2el_ms"
-    - identifier: "p25_e2el_ms"
-    - identifier: "p50_e2el_ms"
-    - identifier: "p75_e2el_ms"
-    - identifier: "p99_e2el_ms"
-  metadata:
-    description: 'VLLM performance testing across compute resource and workload configuration'
\ No newline at end of file
+    description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations'
\ No newline at end of file
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
new file mode 100644
index 00000000..ad62052d
--- /dev/null
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -0,0 +1,308 @@
+# Copyright (c) IBM Corporation
+# SPDX-License-Identifier: MIT
+
+# The input to an experiment is an Entity. For the Entity to be a valid input
+# it's properties which  match what is defined here
+performance_testing-geospatial-endpoint:
+  identifier: performance-testing-geospatial-endpoint
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
+    - identifier: 'endpoint'
+      metadata:
+        description: 'The endpoint(s) to test'
+      propertyDomain:
+        variableType: "UNKNOWN_VARIABLE_TYPE"
+    - identifier: 'request_rate'
+      metadata:
+        description: "The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+  defaultParameterization:
+    - value: 100
+      property:
+        identifier: 'num_prompts'
+    - value: -1
+      property:
+        identifier: 'max_concurrency'
+    - value: 1.0
+      property:
+        identifier: 'burstiness'
+  # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "output_throughput"
+    - identifier: "total_token_throughput"
+    - identifier: "mean_ttft_ms"
+    - identifier: "median_ttft_ms"
+    - identifier: "std_ttft_ms"
+    - identifier: "p25_ttft_ms"
+    - identifier: "p50_ttft_ms"
+    - identifier: "p75_ttft_ms"
+    - identifier: "p99_ttft_ms"
+    - identifier: "mean_tpot_ms"
+    - identifier: "median_tpot_ms"
+    - identifier: "std_tpot_ms"
+    - identifier: "p25_tpot_ms"
+    - identifier: "p50_tpot_ms"
+    - identifier: "p75_tpot_ms"
+    - identifier: "p99_tpot_ms"
+    - identifier: "mean_itl_ms"
+    - identifier: "median_itl_ms"
+    - identifier: "std_itl_ms"
+    - identifier: "p25_itl_ms"
+    - identifier: "p50_itl_ms"
+    - identifier: "p75_itl_ms"
+    - identifier: "p99_itl_ms"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations'
+performance_testing-geospatial-full:
+  identifier: performance-testing-geospatial-full
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
+    - identifier: 'request_rate'
+      metadata:
+        description: "(benchmark) The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "(benchmark) The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "(benchmark) The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: image
+      metadata:
+        description: "(deployment) Docker image to use to create vllm deployments"
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
+    - identifier: n_cpus
+      metadata:
+        description: "(deployment) the number of CPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 1,17 ]
+        interval: 1
+    - identifier: memory
+      metadata:
+        description: "(deployment) the amount of memory to allocate to vLLM pod"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "64Gi", "128Gi", "256Gi" ]
+    - identifier: dtype
+      metadata:
+        description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models."
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "auto", "half", "float16",  "bfloat16", "float", "float32" ]
+    - identifier: 'gpu_memory_utilization'
+      metadata:
+        description: "(deployment) The fraction of GPU memory to be used for the model executor,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ .5, .75, .9 ]
+    - identifier: 'cpu_offload'
+      metadata:
+        description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ 0, 8, 16, 24, 32 ]
+    - identifier: 'max_num_seq'
+      metadata:
+        description: "(deployment) Maximum number of sequences per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [32,2049]
+        interval: 32
+    - identifier: 'max_batch_tokens'
+      metadata:
+        description: "(deployment) maximum number of batched tokens per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 8192, 32769]
+        interval: 1024
+    - identifier: 'n_gpus'
+      metadata:
+        description: "(deployment) Number of GPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,9]
+        interval: 1
+    - identifier: 'gpu_type'
+      metadata:
+        description: "(deployment) The GPU type to use"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+    - identifier: 'skip_tokenizer_init'
+      metadata:
+        description: "(deployment) skip tokenizer intialization"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'enforce_eager'
+      metadata:
+        description: "(deployment) enforce pytorch eager mode"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'io_processor_plugin'
+      metadata:
+        description: 'IO Pocessor plugin to load for the model'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ None, "terratorch_segmentation" ]
+  defaultParameterization:
+    - property:
+        identifier: 'image'
+      value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1"
+    - property:
+        identifier: n_cpus
+      value: 8
+    - property:
+        identifier:
+          memory
+      value:  "128Gi"
+    - property:
+        identifier: dtype
+      value: "auto"
+    - property:
+        identifier: 'num_prompts'
+      value: 500
+    - property:
+        identifier: 'max_concurrency'
+      value: -1
+    - property:
+        identifier: 'burstiness'
+      value: 1.0
+    - property:
+        identifier: 'gpu_memory_utilization'
+      value: .9
+    - property:
+        identifier: 'cpu_offload'
+      value: 0
+    - property:
+        identifier: 'max_num_seq'
+      value: 256
+    - property:
+        identifier: 'max_batch_tokens'
+      value: 16384
+    - property:
+        identifier: 'n_gpus'
+      value: 1
+    - property:
+        identifier: 'gpu_type'
+      value: 'NVIDIA-A100-80GB-PCIe'
+    - property:
+        identifier: 'skip_tokenizer_init'
+      value: True
+    - property:
+        identifier: 'enforce_eager'
+      value: True
+    - property:
+        identifier: 'io_processor_plugin'
+      value: "terratorch_segmentation"
+  # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "output_throughput"
+    - identifier: "total_token_throughput"
+    - identifier: "mean_ttft_ms"
+    - identifier: "median_ttft_ms"
+    - identifier: "std_ttft_ms"
+    - identifier: "p25_ttft_ms"
+    - identifier: "p50_ttft_ms"
+    - identifier: "p75_ttft_ms"
+    - identifier: "p99_ttft_ms"
+    - identifier: "mean_tpot_ms"
+    - identifier: "median_tpot_ms"
+    - identifier: "std_tpot_ms"
+    - identifier: "p25_tpot_ms"
+    - identifier: "p50_tpot_ms"
+    - identifier: "p75_tpot_ms"
+    - identifier: "p99_tpot_ms"
+    - identifier: "mean_itl_ms"
+    - identifier: "median_itl_ms"
+    - identifier: "std_itl_ms"
+    - identifier: "p25_itl_ms"
+    - identifier: "p50_itl_ms"
+    - identifier: "p75_itl_ms"
+    - identifier: "p99_itl_ms"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'VLLM performance testing across compute resource and workload configuration'
\ No newline at end of file

From b851d033e45436bc79dd2b56bc4c4233c2363d55 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 3 Nov 2025 09:24:43 +0000
Subject: [PATCH 14/46] Fixed bug in validate_entitiy

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/schema/experiment.py     |  6 +++---
 orchestrator/schema/property_value.py | 13 +++++++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/orchestrator/schema/experiment.py b/orchestrator/schema/experiment.py
index 9e17ee6e..deb87a30 100644
--- a/orchestrator/schema/experiment.py
+++ b/orchestrator/schema/experiment.py
@@ -636,14 +636,14 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool:
         }
         if validate_point_against_properties(
             point,
-            constitutive_properties=self.requiredConstitutiveProperties,
+            constitutive_properties=self.requiredConstitutiveProperties + list(self.optionalProperties),
         ):
             return True
 
         # It's not an exact match - check if partial match
         if not validate_point_against_properties(
             point,
-            constitutive_properties=self.requiredConstitutiveProperties,
+            constitutive_properties=self.requiredConstitutiveProperties + list(self.optionalProperties),
             allow_partial_matches=True,
         ):
             # no partial match - missing required properties or has incorrect values for them
@@ -654,7 +654,7 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool:
             return False
 
         # It has the required properties with valid values but there are additional properties
-        # See if these properties are optional propertiesof the experiment
+        # See if these properties are optional properties of the experiment
         potential_optional_properties: set[str] = point.keys() - {
             cp.identifier for cp in self.requiredProperties
         }
diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py
index a429412a..d7b56107 100644
--- a/orchestrator/schema/property_value.py
+++ b/orchestrator/schema/property_value.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: MIT
 
 import enum
+import logging
 import typing
 
 import pydantic
@@ -13,6 +14,7 @@
     PropertyDescriptor,
 )
 
+logger = logging.getLogger("property_value")
 
 class ValueTypeEnum(str, enum.Enum):
     NUMERIC_VALUE_TYPE = "NUMERIC_VALUE_TYPE"  # the value is a bool,int, float etc.
@@ -189,6 +191,12 @@ def validate_point_against_properties(
         cp.identifier for cp in constitutive_properties
     }
 
+    logger.debug(
+        f"Validating point's constitutive properties "
+        f"(allow_partial_matches = {allow_partial_matches}) {constitutive_property_identifiers_for_point}, "
+        f"against the space constitutive properties {constitutive_property_identifiers_for_entity_space}"
+    )
+
     matching_constitutive_property_identifiers = (
         constitutive_property_identifiers_for_point.intersection(
             constitutive_property_identifiers_for_entity_space
@@ -221,6 +229,11 @@ def validate_point_against_properties(
         if not constitutive_property.propertyDomain.valueInDomain(
             point[constitutive_property.identifier]
         ):
+            logger.warning(
+                f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) "
+                "is not in the target consitutive property "
+                f"domain ({constitutive_property.propertyDomain.domainRange})"
+            )
             return False
 
     return True

From 7055c38850d67e24104c39260d671c4659a99285 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 3 Nov 2025 11:32:16 +0000
Subject: [PATCH 15/46] One more fix to to a log message

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/schema/property_value.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py
index d7b56107..0ed81e4f 100644
--- a/orchestrator/schema/property_value.py
+++ b/orchestrator/schema/property_value.py
@@ -232,7 +232,7 @@ def validate_point_against_properties(
             logger.warning(
                 f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) "
                 "is not in the target consitutive property "
-                f"domain ({constitutive_property.propertyDomain.domainRange})"
+                f"domain ({constitutive_property.propertyDomain.domain_values()})"
             )
             return False
 

From 5bdf90263b21e8061ac86fb6b0a298c477e47ee1 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 3 Nov 2025 11:34:43 +0000
Subject: [PATCH 16/46] One more fix to to a log message

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/schema/property_value.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py
index 0ed81e4f..46f595ef 100644
--- a/orchestrator/schema/property_value.py
+++ b/orchestrator/schema/property_value.py
@@ -232,7 +232,7 @@ def validate_point_against_properties(
             logger.warning(
                 f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) "
                 "is not in the target consitutive property "
-                f"domain ({constitutive_property.propertyDomain.domain_values()})"
+                f"domain ({constitutive_property.propertyDomain.domain_values})"
             )
             return False
 

From dbab4c7cb55fb664e4fe74b0eb6c41782dbbc1de Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 3 Nov 2025 11:35:41 +0000
Subject: [PATCH 17/46] One more fix to to a log message

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/schema/property_value.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py
index 0ed81e4f..46f595ef 100644
--- a/orchestrator/schema/property_value.py
+++ b/orchestrator/schema/property_value.py
@@ -232,7 +232,7 @@ def validate_point_against_properties(
             logger.warning(
                 f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) "
                 "is not in the target consitutive property "
-                f"domain ({constitutive_property.propertyDomain.domain_values()})"
+                f"domain ({constitutive_property.propertyDomain.domain_values})"
             )
             return False
 

From fd100b6a1faf9bfce4e985c43c8e6b1e31eaee7d Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 3 Nov 2025 12:59:43 +0000
Subject: [PATCH 18/46] Fixes to vllm_performance actuator

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../datasets/india_url_in_b64_out.jsonl        |  1 +
 .../valencia_url_in_b64_out.jsonl}             |  0
 .../vllm_performance/experiment_executor.py    |  4 ++++
 .../experiments/performance_testing.yaml       | 18 ++++++++++++++++++
 .../performance_testing_geospatial.yaml        | 18 ++++++++++++++++++
 .../vllm_performance_test/execute_benchmark.py | 14 +++++++++++---
 6 files changed, 52 insertions(+), 3 deletions(-)
 create mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl
 rename plugins/actuators/vllm_performance/ado_actuators/vllm_performance/{geospatial_valencia.jsonl => datasets/valencia_url_in_b64_out.jsonl} (100%)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl
new file mode 100644
index 00000000..693bbc09
--- /dev/null
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl
@@ -0,0 +1 @@
+{"prompt":{"data": {"data": "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif","data_format": "url","out_data_format": "b64_json","indices": [1, 2, 3, 8, 11, 12]},"priority": 0,"softmax": false}}
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_valencia.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/valencia_url_in_b64_out.jsonl
similarity index 100%
rename from plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_valencia.jsonl
rename to plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/valencia_url_in_b64_out.jsonl
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 9be6921a..459e8473 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -297,6 +297,7 @@ def run_resource_and_workload_experiment(
                             benchmark_retries=actuator_parameters.benchmark_retries,
                             retries_timeout=actuator_parameters.retries_timeout,
                             burstiness=float(values.get("burstiness")),
+                            dataset = values.get("dataset"),
                         )
                     else:
                         result = execute_random_benchmark(
@@ -312,6 +313,7 @@ def run_resource_and_workload_experiment(
                             number_input_tokens=int(values.get("number_input_tokens")),
                             max_output_tokens=int(values.get("max_output_tokens")),
                             burstiness=float(values.get("burstiness")),
+                            dataset = values.get("dataset"),
                         )
                     logger.debug(f"benchmark executed in {time.time() - start} sec")
                 except Exception as e:
@@ -411,6 +413,7 @@ def run_workload_experiment(
                     benchmark_retries=actuator_parameters.benchmark_retries,
                     retries_timeout=actuator_parameters.retries_timeout,
                     burstiness=float(values.get("burstiness")),
+                    dataset = values.get("dataset"),
                 )
             else:
                 result = execute_random_benchmark(
@@ -426,6 +429,7 @@ def run_workload_experiment(
                     number_input_tokens=int(values.get("number_input_tokens")),
                     max_output_tokens=int(values.get("max_output_tokens")),
                     burstiness=float(values.get("burstiness")),
+                    dataset = values.get("dataset"),
                 )
         except Exception as e:
             logger.error(f"Failed to execute VLLM performance test {e}")
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index 216d6ae4..c9537fd7 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -56,6 +56,12 @@ performance_testing-full:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [ 1, 10000 ]
         interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used fof the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'random' ]
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
@@ -161,6 +167,9 @@ performance_testing-full:
     - property:
         identifier: 'max_output_tokens'
       value: 128
+    - property:
+        identifier: 'dataset'
+      value: 'random'
     - property:
         identifier: 'gpu_memory_utilization'
       value: .9
@@ -282,6 +291,12 @@ performance_testing-endpoint:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [ -1, 500 ] # -1 means no concurrency control
         interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used fof the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'random' ]
   defaultParameterization:
     - value: 1000
       property:
@@ -298,6 +313,9 @@ performance_testing-endpoint:
     - value: 128
       property:
         identifier: 'max_output_tokens'
+    - property:
+        identifier: 'dataset'
+      value: 'random'
   # measurements
   targetProperties:
     - identifier: "duration"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index ad62052d..5d976439 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -47,6 +47,12 @@ performance_testing-geospatial-endpoint:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [ -1, 500 ] # -1 means no concurrency control
         interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'india_url_in_b64_out', 'valencia_url_in_b64_out' ]
   defaultParameterization:
     - value: 100
       property:
@@ -57,6 +63,9 @@ performance_testing-geospatial-endpoint:
     - value: 1.0
       property:
         identifier: 'burstiness'
+    - property:
+        identifier: 'dataset'
+      value: 'india_url_in_b64_out'
   # measurements
   targetProperties:
     - identifier: "duration"
@@ -135,6 +144,12 @@ performance_testing-geospatial-full:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [ 0, 10 ]
         interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'india_url_in_b64_out', 'valencia_url_in_b64_out' ]
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
@@ -267,6 +282,9 @@ performance_testing-geospatial-full:
     - property:
         identifier: 'io_processor_plugin'
       value: "terratorch_segmentation"
+    - property:
+        identifier: 'dataset'
+      value: 'india_url_in_b64_out'
   # measurements
   targetProperties:
     - identifier: "duration"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index cf3ed96a..1d81ee36 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -12,6 +12,10 @@
     get_results,
 )
 
+default_geospatial_datasets_filenames = {
+    "india_url_in_b64_out": "india_url_in_b64_out.jsonl",
+    "valencia_url_in_b64_out": "valencia_url_in_b64_out.jsonl",
+}
 
 def execute_benchmark(
     base_url: str,
@@ -107,6 +111,7 @@ def execute_benchmark(
 def execute_random_benchmark(
     base_url: str,
     model: str,
+    dataset: str,
     num_prompts: int = 500,
     request_rate: int | None = None,
     max_concurrency: int | None = None,
@@ -134,7 +139,7 @@ def execute_random_benchmark(
     return execute_benchmark(
         base_url=base_url,
         model=model,
-        data_set="random",
+        data_set=dataset,
         interpreter=interpreter,
         num_prompts=num_prompts,
         request_rate=request_rate,
@@ -153,6 +158,7 @@ def execute_random_benchmark(
 def execute_geospatial_benchmark(
     base_url: str,
     model: str,
+    dataset: str,
     num_prompts: int = 500,
     request_rate: int | None = None,
     max_concurrency: int | None = None,
@@ -176,9 +182,11 @@ def execute_geospatial_benchmark(
     """
     from importlib import resources
 
+    dataset_filename = default_geospatial_datasets_filenames[dataset]
+
     with resources.path(
-        "ado_actuators.vllm_performance",
-        "geospatial_india.jsonl",
+        "ado_actuators.vllm_performance.datasets",
+        dataset_filename,
     ) as data_set_path:
         return execute_benchmark(
             base_url=base_url,

From f7ceb520be5fde683d6e8b5c8f5a568fd82c160c Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 3 Nov 2025 09:24:43 +0000
Subject: [PATCH 19/46] fix(experiment): Fixed bug in validate_entitiy

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/schema/experiment.py     |  8 +++++---
 orchestrator/schema/property_value.py | 14 ++++++++++++++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/orchestrator/schema/experiment.py b/orchestrator/schema/experiment.py
index 9e17ee6e..61da6ec4 100644
--- a/orchestrator/schema/experiment.py
+++ b/orchestrator/schema/experiment.py
@@ -636,14 +636,16 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool:
         }
         if validate_point_against_properties(
             point,
-            constitutive_properties=self.requiredConstitutiveProperties,
+            constitutive_properties=self.requiredConstitutiveProperties
+            + list(self.optionalProperties),
         ):
             return True
 
         # It's not an exact match - check if partial match
         if not validate_point_against_properties(
             point,
-            constitutive_properties=self.requiredConstitutiveProperties,
+            constitutive_properties=self.requiredConstitutiveProperties
+            + list(self.optionalProperties),
             allow_partial_matches=True,
         ):
             # no partial match - missing required properties or has incorrect values for them
@@ -654,7 +656,7 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool:
             return False
 
         # It has the required properties with valid values but there are additional properties
-        # See if these properties are optional propertiesof the experiment
+        # See if these properties are optional properties of the experiment
         potential_optional_properties: set[str] = point.keys() - {
             cp.identifier for cp in self.requiredProperties
         }
diff --git a/orchestrator/schema/property_value.py b/orchestrator/schema/property_value.py
index a429412a..08da1bb0 100644
--- a/orchestrator/schema/property_value.py
+++ b/orchestrator/schema/property_value.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: MIT
 
 import enum
+import logging
 import typing
 
 import pydantic
@@ -13,6 +14,8 @@
     PropertyDescriptor,
 )
 
+logger = logging.getLogger("property_value")
+
 
 class ValueTypeEnum(str, enum.Enum):
     NUMERIC_VALUE_TYPE = "NUMERIC_VALUE_TYPE"  # the value is a bool,int, float etc.
@@ -189,6 +192,12 @@ def validate_point_against_properties(
         cp.identifier for cp in constitutive_properties
     }
 
+    logger.debug(
+        f"Validating point's constitutive properties "
+        f"(allow_partial_matches = {allow_partial_matches}) {constitutive_property_identifiers_for_point}, "
+        f"against the space constitutive properties {constitutive_property_identifiers_for_entity_space}"
+    )
+
     matching_constitutive_property_identifiers = (
         constitutive_property_identifiers_for_point.intersection(
             constitutive_property_identifiers_for_entity_space
@@ -221,6 +230,11 @@ def validate_point_against_properties(
         if not constitutive_property.propertyDomain.valueInDomain(
             point[constitutive_property.identifier]
         ):
+            logger.warning(
+                f"Property {constitutive_property.identifier}({point[constitutive_property.identifier]}) "
+                "is not in the target consitutive property "
+                f"domain ({constitutive_property.propertyDomain.domain_values})"
+            )
             return False
 
     return True

From df4f9bc1369c5744f01a25de30c43c82b3f94184 Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Mon, 3 Nov 2025 15:21:16 +0000
Subject: [PATCH 20/46] fix: Not using reference which may be parameterized

---
 orchestrator/utilities/run_experiment.py | 44 +++++++++++++++---------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/orchestrator/utilities/run_experiment.py b/orchestrator/utilities/run_experiment.py
index 0ed6959d..85064c4a 100644
--- a/orchestrator/utilities/run_experiment.py
+++ b/orchestrator/utilities/run_experiment.py
@@ -24,7 +24,7 @@
 def local_execution_closure(
     registry: ActuatorRegistry,
     actuator_configuration_identifiers: list[str] | None = None,
-) -> Callable[[ExperimentReference, Entity], MeasurementRequest]:
+) -> Callable[[ExperimentReference, Entity], MeasurementRequest] | None:
     """Create a callable that submits a local measurement request.
 
     The function keeps a dictionary of Actuator actors so that each actuator
@@ -66,31 +66,41 @@ def local_execution_closure(
 
     def execute_local(
         reference: ExperimentReference, entity: Entity
-    ) -> MeasurementRequest:
+    ) -> MeasurementRequest | None:
         # instantiate the actuator for this experiment identifier.
-        experiment = registry.experimentForReference(reference)
-        if experiment.actuatorIdentifier not in actuators:
+        if reference.actuatorIdentifier not in actuators:
             actuator_class = registry.actuatorForIdentifier(
-                experiment.actuatorIdentifier
+                reference.actuatorIdentifier
             )
-            if experiment.actuatorIdentifier in actuator_configurations:
+            if reference.actuatorIdentifier in actuator_configurations:
                 config = actuator_configurations[
-                    experiment.actuatorIdentifier
+                    reference.actuatorIdentifier
                 ].parameters
             else:
                 config = actuator_class.default_parameters()
 
-            actuators[experiment.actuatorIdentifier] = actuator_class.remote(
+            actuators[reference.actuatorIdentifier] = actuator_class.remote(
                 queue=queue, params=config
             )
-        actuator = actuators[experiment.actuatorIdentifier]
-        # Submit the measurement request asynchronously.
-        actuator.submit.remote(
-            entities=[entity],
-            experimentReference=experiment.reference,
-            requesterid="run_experiment",
-            requestIndex=0,
-        )
+        actuator = actuators[reference.actuatorIdentifier]
+        # Submit the measurement request asynchronously, handle errors gracefully.
+        try:
+            actuator.submit.remote(
+                entities=[entity],
+                experimentReference=reference,
+                requesterid="run_experiment",
+                requestIndex=0,
+            )
+        except Exception as e:
+            print(
+                f"[ERROR] Failed to submit measurement request for {reference} to actuator '{reference.actuatorIdentifier}': {e}"
+            )
+            import traceback
+
+            traceback.print_exc()
+            # Either skip, or return None, or propagate. Let's return None.
+            return None
+
         return queue.get()
 
     return execute_local
@@ -239,7 +249,7 @@ def run(
             print("Skipping validation")
 
         if valid:
-            print(f"Executing: {reference.experimentIdentifier}")
+            print(f"Executing: {reference}")
             request = execute(reference, entity)
             print("Result:")
             print(f"{request.series_representation(output_format='target')}\n")

From d744287b1ec3ab53b3d1c93c7aae161c4d219e28 Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Mon, 3 Nov 2025 16:34:43 +0000
Subject: [PATCH 21/46] fix: validate_entity

validate_entity was incorrectly identifying Entities with optional properties as invalid

This was because validate_point_against_properties(allow_partial_matches=True) does not work as code expected.
Code expected that given {point props} and {required props}, if {required props}.issubset(point_props) it would return True. However, it was checking the opposite i.e.{point_probs}.issubset{required_props}
---
 orchestrator/schema/experiment.py | 84 +++++++++++++++----------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/orchestrator/schema/experiment.py b/orchestrator/schema/experiment.py
index 61da6ec4..46b7eb62 100644
--- a/orchestrator/schema/experiment.py
+++ b/orchestrator/schema/experiment.py
@@ -624,71 +624,71 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool:
         """Returns True if Experiment can be applied to entity, false otherwise
 
         This method only checks constitutive properties.
-        - All properties of the Entity that match the experiments required or optional properties must have
-        values in the domain of that property
-        - All required properties of the experiment must have a matching constitutive property
-        - If strict_optional is True all properties of the Entity that are not required properties of the Experiment
-        must match optional properties of the experiment.
+        - The entity has valid values for all required properties of the experiment
+        - The entity has valid values for any optional properties of the experiment it contains
+        - If strict_optional is True all properties of the Entity are properties (required+optional) of the experiment
         """
 
         point = {
             v.property.identifier: v.value for v in entity.constitutive_property_values
         }
-        if validate_point_against_properties(
-            point,
-            constitutive_properties=self.requiredConstitutiveProperties
-            + list(self.optionalProperties),
-        ):
-            return True
-
-        # It's not an exact match - check if partial match
-        if not validate_point_against_properties(
-            point,
-            constitutive_properties=self.requiredConstitutiveProperties
-            + list(self.optionalProperties),
-            allow_partial_matches=True,
-        ):
-            # no partial match - missing required properties or has incorrect values for them
-            logging.getLogger("experiment").warning(
-                f"The entity is missing or has invalid values for required properties of "
-                f" {self.identifier}"
-            )
-            return False
 
-        # It has the required properties with valid values but there are additional properties
-        # See if these properties are optional properties of the experiment
-        potential_optional_properties: set[str] = point.keys() - {
-            cp.identifier for cp in self.requiredProperties
+        #
+        # Get required and optional property sets of the experiment
+        #
+        required_property_identifiers = {
+            cp.identifier for cp in self.requiredConstitutiveProperties
         }
-        optional_properties = potential_optional_properties & {
+        optional_property_identifiers = {
             cp.identifier for cp in self.optionalProperties
         }
-        # If strict_optional is on all the additional properties must be optional properties
-        if (
-            len(optional_properties) != len(potential_optional_properties)
-            and strict_optional
-        ):
+
+        #
+        # Get the equivalent sets from the entity
+        #
+        required_properties_present = point.keys() & required_property_identifiers
+        optional_properties_present = point.keys() & optional_property_identifiers
+        additional_properties_present = (
+            point.keys() - required_properties_present - optional_properties_present
+        )
+
+        # First check against strict optional as it is a quick fail condition
+        if additional_properties_present and strict_optional:
             logging.getLogger("experiment").warning(
                 f"Strict property checking is on and the following entity "
                 f"properties are not required or optional properties of {self.identifier}:"
-                f"{potential_optional_properties-optional_properties} "
+                f"{additional_properties_present} "
+            )
+
+        # Check if all the required properties are present with values in domain
+        if not validate_point_against_properties(
+            point={k: v for k, v in point.items() if k in required_properties_present},
+            constitutive_properties=self.requiredConstitutiveProperties,
+        ):
+            logging.getLogger("experiment").warning(
+                f"The entity is missing values for required properties of {self.identifier}: {required_property_identifiers - required_properties_present}"
             )
             return False
 
-        is_valid = validate_point_against_properties(
-            point={key: point[key] for key in optional_properties},
+        # All required properties are there
+        # Now check optional properties, if given
+        # We can set partial_match=True because:
+        # - If we wanted full match of optional properties (strict_optional), but it wasn't present,
+        #   we would have already exited
+        if optional_properties_present and not validate_point_against_properties(
+            point={k: v for k, v in point.items() if k in optional_properties_present},
             constitutive_properties=list(self.optionalProperties),
             allow_partial_matches=True,
-        )
-        if not is_valid:
+        ):
             logging.getLogger("experiment").warning(
                 f"The entity has properties that match optional properties"
                 f"of {self.identifier} - "
-                f"{potential_optional_properties - optional_properties} - "
+                f"{optional_properties_present} - "
                 f"but its values for those properties are not in the domain of the optional properties"
             )
+            return False
 
-        return is_valid
+        return True
 
 
 class ParameterizedExperiment(Experiment):

From 1869a49af194cbc3307e841a530d47d6f3ea9733 Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Mon, 3 Nov 2025 16:53:45 +0000
Subject: [PATCH 22/46] fix: missing return

---
 orchestrator/schema/experiment.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/orchestrator/schema/experiment.py b/orchestrator/schema/experiment.py
index 46b7eb62..07bbfadf 100644
--- a/orchestrator/schema/experiment.py
+++ b/orchestrator/schema/experiment.py
@@ -659,6 +659,7 @@ def validate_entity(self, entity: "Entity", strict_optional=False) -> bool:
                 f"properties are not required or optional properties of {self.identifier}:"
                 f"{additional_properties_present} "
             )
+            return False
 
         # Check if all the required properties are present with values in domain
         if not validate_point_against_properties(

From 883e8771d801558f05565d73e83f7c89e9c7cfee Mon Sep 17 00:00:00 2001
From: michaelj <michaelj@ie.ibm.com>
Date: Mon, 3 Nov 2025 16:54:05 +0000
Subject: [PATCH 23/46] test: for validate_entity

---
 tests/schema/test_experiment.py | 194 +++++++++++++++++++++++++++++++-
 1 file changed, 193 insertions(+), 1 deletion(-)

diff --git a/tests/schema/test_experiment.py b/tests/schema/test_experiment.py
index 14f3230d..72401bf1 100644
--- a/tests/schema/test_experiment.py
+++ b/tests/schema/test_experiment.py
@@ -8,8 +8,12 @@
 from orchestrator.modules.actuators.registry import (
     ActuatorRegistry,
 )
+from orchestrator.schema.domain import PropertyDomain, VariableTypeEnum
 from orchestrator.schema.entity import Entity
-from orchestrator.schema.experiment import Experiment, ParameterizedExperiment
+from orchestrator.schema.experiment import (
+    Experiment,
+    ParameterizedExperiment,
+)
 from orchestrator.schema.property import (
     AbstractProperty,
     ConstitutiveProperty,
@@ -815,3 +819,191 @@ def test_experiment_provides_requirements(
             mock_parameterizable_experiment
         )
     )
+
+
+@pytest.fixture(scope="module")
+def nevergrad_opt_3d_test_func_experiment():
+    # Define required constitutive properties (x0, x1, x2, all continuous)
+    required_props = [
+        ConstitutiveProperty(
+            identifier="x0",
+            propertyDomain=PropertyDomain(
+                variableType=VariableTypeEnum.CONTINUOUS_VARIABLE_TYPE
+            ),
+        ),
+        ConstitutiveProperty(
+            identifier="x1",
+            propertyDomain=PropertyDomain(
+                variableType=VariableTypeEnum.CONTINUOUS_VARIABLE_TYPE
+            ),
+        ),
+        ConstitutiveProperty(
+            identifier="x2",
+            propertyDomain=PropertyDomain(
+                variableType=VariableTypeEnum.CONTINUOUS_VARIABLE_TYPE
+            ),
+        ),
+    ]
+    # Optional property: name (categorical)
+    optional_props = (
+        ConstitutiveProperty(
+            identifier="name",
+            propertyDomain=PropertyDomain(
+                variableType=VariableTypeEnum.CATEGORICAL_VARIABLE_TYPE,
+                values=["rosenbrock", "griewank", "sphere"],
+            ),
+        ),
+    )
+    default_param = (
+        ConstitutivePropertyValue(
+            value="rosenbrock",
+            property=ConstitutivePropertyDescriptor(identifier="name"),
+        ),
+    )
+    return Experiment(
+        actuatorIdentifier="custom_experiments",
+        identifier="nevergrad_opt_3d_test_func",
+        targetProperties=[],
+        requiredProperties=tuple(required_props),
+        optionalProperties=optional_props,
+        defaultParameterization=default_param,
+    )
+
+
+def entity_with_props(props):
+    return Entity(constitutive_property_values=tuple(props))
+
+
+def test_validate_entity_required_only(nevergrad_opt_3d_test_func_experiment):
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+        ConstitutivePropertyValue(
+            value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2")
+        ),
+    ]
+    entity = entity_with_props(props)
+    assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is True
+
+
+def test_validate_entity_with_optional_valid(nevergrad_opt_3d_test_func_experiment):
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+        ConstitutivePropertyValue(
+            value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2")
+        ),
+        ConstitutivePropertyValue(
+            value="sphere", property=ConstitutivePropertyDescriptor(identifier="name")
+        ),
+    ]
+    entity = entity_with_props(props)
+    assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is True
+
+
+def test_validate_entity_with_optional_invalid(nevergrad_opt_3d_test_func_experiment):
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+        ConstitutivePropertyValue(
+            value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2")
+        ),
+        ConstitutivePropertyValue(
+            value="foobar", property=ConstitutivePropertyDescriptor(identifier="name")
+        ),
+    ]
+    entity = entity_with_props(props)
+    assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is False
+
+
+def test_validate_entity_missing_required(nevergrad_opt_3d_test_func_experiment):
+    # missing x2
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+    ]
+    entity = entity_with_props(props)
+    assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is False
+
+
+def test_validate_entity_missing_required_with_optional_valid(
+    nevergrad_opt_3d_test_func_experiment,
+):
+    # missing x2 but valid name
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+        ConstitutivePropertyValue(
+            value="griewank", property=ConstitutivePropertyDescriptor(identifier="name")
+        ),
+    ]
+    entity = entity_with_props(props)
+    assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is False
+
+
+def test_validate_entity_additional_property_strict_optional_false(
+    nevergrad_opt_3d_test_func_experiment,
+):
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+        ConstitutivePropertyValue(
+            value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2")
+        ),
+        ConstitutivePropertyValue(
+            value=10, property=ConstitutivePropertyDescriptor(identifier="test")
+        ),
+    ]
+    entity = entity_with_props(props)
+    # Default: strict_optional=False, extra property is fine
+    assert nevergrad_opt_3d_test_func_experiment.validate_entity(entity) is True
+
+
+def test_validate_entity_additional_property_strict_optional_true(
+    nevergrad_opt_3d_test_func_experiment,
+):
+    props = [
+        ConstitutivePropertyValue(
+            value=0.5, property=ConstitutivePropertyDescriptor(identifier="x0")
+        ),
+        ConstitutivePropertyValue(
+            value=1.5, property=ConstitutivePropertyDescriptor(identifier="x1")
+        ),
+        ConstitutivePropertyValue(
+            value=2.5, property=ConstitutivePropertyDescriptor(identifier="x2")
+        ),
+        ConstitutivePropertyValue(
+            value=10, property=ConstitutivePropertyDescriptor(identifier="test")
+        ),
+    ]
+    entity = entity_with_props(props)
+    assert (
+        nevergrad_opt_3d_test_func_experiment.validate_entity(
+            entity, strict_optional=True
+        )
+        is False
+    )

From 064750f8403e140ba38a253b9debdd69e1dea9d5 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Tue, 4 Nov 2025 09:30:49 +0000
Subject: [PATCH 24/46] chore: fixed formatting with black

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/modules/operators/randomwalk.py      |  4 +---
 .../ado_actuators/vllm_performance/actuator.py    | 15 +++++++++++----
 .../vllm_performance/experiment_executor.py       | 10 +++++-----
 .../vllm_performance/k8/create_environment.py     |  4 ++--
 .../vllm_performance/k8/manage_components.py      |  2 +-
 .../k8/yaml_support/build_components.py           |  8 ++++----
 .../vllm_performance_test/execute_benchmark.py    |  1 +
 7 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/orchestrator/modules/operators/randomwalk.py b/orchestrator/modules/operators/randomwalk.py
index eff9a6f2..1018c17a 100644
--- a/orchestrator/modules/operators/randomwalk.py
+++ b/orchestrator/modules/operators/randomwalk.py
@@ -208,9 +208,7 @@ def sampler(self) -> BaseSampler | GroupSampler:
                     case CombinedWalkModeEnum.RANDOMGROUPED:
                         sampler = RandomGroupSampleSelector(group=self.grouping)
                     case CombinedWalkModeEnum.SEQUENTIALGROUPED:
-                        sampler = SequentialGroupSampleSelector(
-                            group=self.grouping
-                        )
+                        sampler = SequentialGroupSampleSelector(group=self.grouping)
                     case _:
                         # this can never happen, as we are validating this above
                         pass
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index 88d4e67c..dd45091f 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -64,14 +64,18 @@ def catalog(
             logger.debug(f"Loading experiments from {exp_file}")
             exp_file_path = os.path.join(exp_dir, exp_file)
             if os.path.isdir(exp_file_path):
-                logger.error(f"{exp_file_path} is a directory. Only files are supported in the experiments directory")
-                raise Exception(f"{exp_file_path} is a directory. Only files are supported in the experiments directory")
+                logger.error(
+                    f"{exp_file_path} is a directory. Only files are supported in the experiments directory"
+                )
+                raise Exception(
+                    f"{exp_file_path} is a directory. Only files are supported in the experiments directory"
+                )
             with open(exp_file_path) as f:
                 try:
                     data = yaml.safe_load(f)
                 except yaml.YAMLError as e:
                     logger.error(f"File {exp_file} is a malformed YAML - {e}")
-                    raise Exception (f"File {exp_file} is a malformed YAML - {e}")
+                    raise Exception(f"File {exp_file} is a malformed YAML - {e}")
 
             experiments.extend([Experiment(**data[e]) for e in data])
 
@@ -188,7 +192,10 @@ async def submit(
         if experiment.deprecated is True:
             raise DeprecatedExperimentError(f"Experiment {experiment} is deprecated")
 
-        if experiment.identifier in ["performance-testing-full", "performance-testing-geospatial-full"]:
+        if experiment.identifier in [
+            "performance-testing-full",
+            "performance-testing-geospatial-full",
+        ]:
             if not self.env_manager:
                 raise MissingConfigurationForExperimentError(
                     f"Actuator configuration did not contain sufficient information for a kubernetes environment manager to be created. "
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 459e8473..693bdb58 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -146,7 +146,7 @@ def _create_environment(
                         namespace=actuator.namespace,
                         skip_tokenizer_init=values.get("skip_tokenizer_init"),
                         enforce_eager=values.get("enforce_eager"),
-                        io_processor_plugin=values.get("io_processor_plugin")
+                        io_processor_plugin=values.get("io_processor_plugin"),
                     )
                     # Update manager
                     env_manager.done_creating.remote(definition=definition)
@@ -297,7 +297,7 @@ def run_resource_and_workload_experiment(
                             benchmark_retries=actuator_parameters.benchmark_retries,
                             retries_timeout=actuator_parameters.retries_timeout,
                             burstiness=float(values.get("burstiness")),
-                            dataset = values.get("dataset"),
+                            dataset=values.get("dataset"),
                         )
                     else:
                         result = execute_random_benchmark(
@@ -313,7 +313,7 @@ def run_resource_and_workload_experiment(
                             number_input_tokens=int(values.get("number_input_tokens")),
                             max_output_tokens=int(values.get("max_output_tokens")),
                             burstiness=float(values.get("burstiness")),
-                            dataset = values.get("dataset"),
+                            dataset=values.get("dataset"),
                         )
                     logger.debug(f"benchmark executed in {time.time() - start} sec")
                 except Exception as e:
@@ -413,7 +413,7 @@ def run_workload_experiment(
                     benchmark_retries=actuator_parameters.benchmark_retries,
                     retries_timeout=actuator_parameters.retries_timeout,
                     burstiness=float(values.get("burstiness")),
-                    dataset = values.get("dataset"),
+                    dataset=values.get("dataset"),
                 )
             else:
                 result = execute_random_benchmark(
@@ -429,7 +429,7 @@ def run_workload_experiment(
                     number_input_tokens=int(values.get("number_input_tokens")),
                     max_output_tokens=int(values.get("max_output_tokens")),
                     burstiness=float(values.get("burstiness")),
-                    dataset = values.get("dataset"),
+                    dataset=values.get("dataset"),
                 )
         except Exception as e:
             logger.error(f"Failed to execute VLLM performance test {e}")
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
index 3f0a0809..c3a2a2b1 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
@@ -42,7 +42,7 @@ def create_test_environment(
     namespace: str = "vllm-testing",
     enforce_eager: bool = False,
     skip_tokenizer_init: bool = False,
-    io_processor_plugin: str | None = None
+    io_processor_plugin: str | None = None,
 ) -> None:
     """
     Create test deployment
@@ -122,7 +122,7 @@ def create_test_environment(
         reuse=reuse_deployment,
         enforce_eager=enforce_eager,
         skip_tokenizer_init=skip_tokenizer_init,
-        io_processor_plugin=io_processor_plugin
+        io_processor_plugin=io_processor_plugin,
     )
     logger.debug("deployment created")
     c_manager.wait_deployment_ready(k8_name=k8_name)
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
index cd77a444..9fddc978 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
@@ -233,7 +233,7 @@ def create_deployment(
         reuse: bool = False,
         enforce_eager: bool = False,
         skip_tokenizer_init: bool = False,
-        io_processor_plugin: str | None = None
+        io_processor_plugin: str | None = None,
     ) -> None:
         """
         create deployment for model
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index 0abcc8c9..41d8cdb4 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -1,12 +1,12 @@
 # Copyright (c) IBM Corporation
 # SPDX-License-Identifier: MIT
 
+import json
 import logging
 import os
 import sys
 import uuid
 from enum import Enum
-import json
 from typing import Any
 
 import yaml
@@ -45,7 +45,7 @@ def get_k8_name(model: str) -> str:
 
         # Making sure the resulting name is not longer than 63 characters as it is
         # the maximum allowed for a name in kubernetes.
-        name_prefix = m_parts[-1][:min(len(m_parts[-1]), 21)].rstrip("-")
+        name_prefix = m_parts[-1][: min(len(m_parts[-1]), 21)].rstrip("-")
         return f"vllm-{name_prefix.lower()}-{uuid.uuid4()}".replace(".", "-")
 
     @staticmethod
@@ -180,7 +180,7 @@ def deployment_yaml(
         limits["memory"] = memory
         limits["nvidia.com/gpu"] = str(n_gpus)
 
-        #command
+        # command
         container["command"] = ["vllm", "serve"]
         container["args"] = vllm_serve_args
         # env variables to to set parameters for docker execution
@@ -194,7 +194,7 @@ def deployment_yaml(
         #     {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)},
         # ]
         if hf_token is not None:
-            container["env"]=[{"name": "HF_TOKEN", "value": hf_token}]
+            container["env"] = [{"name": "HF_TOKEN", "value": hf_token}]
         if claim_name is not None:
             if "env" not in container:
                 container["env"] = []
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index 1d81ee36..e3d39cc8 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -17,6 +17,7 @@
     "valencia_url_in_b64_out": "valencia_url_in_b64_out.jsonl",
 }
 
+
 def execute_benchmark(
     base_url: str,
     model: str,

From 35bb2e2f396bc056e7e108eac756f28b5609e4ac Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Tue, 4 Nov 2025 09:35:00 +0000
Subject: [PATCH 25/46] chore: Removed dataset file as it was relocated to a
 different folder

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../ado_actuators/vllm_performance/geospatial_india.jsonl        | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl
deleted file mode 100644
index 693bbc09..00000000
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_india.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"prompt":{"data": {"data": "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif","data_format": "url","out_data_format": "b64_json","indices": [1, 2, 3, 8, 11, 12]},"priority": 0,"softmax": false}}

From c68ab3540c06788f5249cdb5bd21c87be05ec7f3 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Tue, 4 Nov 2025 16:03:38 +0000
Subject: [PATCH 26/46] feat: Added custom dataset geospatial experiment

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance/actuator.py              |   1 +
 .../vllm_performance/experiment_executor.py   |   5 +-
 .../performance_testing_geospatial.yaml       | 220 +++++++++++++++++-
 .../execute_benchmark.py                      |  71 +++---
 4 files changed, 268 insertions(+), 29 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index dd45091f..38e9f47a 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -195,6 +195,7 @@ async def submit(
         if experiment.identifier in [
             "performance-testing-full",
             "performance-testing-geospatial-full",
+            "performance-testing-geospatial-full-custom-dataset",
         ]:
             if not self.env_manager:
                 raise MissingConfigurationForExperimentError(
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 693bdb58..d588a884 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -285,7 +285,10 @@ def run_resource_and_workload_experiment(
                 start = time.time()
                 result = None
                 try:
-                    if experiment.identifier == "performance-testing-geospatial-full":
+                    if experiment.identifier in [
+                        "performance-testing-geospatial-full",
+                        "performance-testing-geospatial-full-custom-dataset",
+                    ]:
                         result = execute_geospatial_benchmark(
                             base_url=base_url,
                             model=values.get("model"),
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 5d976439..9edd8668 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -216,7 +216,7 @@ performance_testing-geospatial-full:
         values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
     - identifier: 'skip_tokenizer_init'
       metadata:
-        description: "(deployment) skip tokenizer intialization"
+        description: "(deployment) skip tokenizer initialization"
       propertyDomain:
         variableType: BINARY_VARIABLE_TYPE 
         values: [True, False]
@@ -286,6 +286,224 @@ performance_testing-geospatial-full:
         identifier: 'dataset'
       value: 'india_url_in_b64_out'
   # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "output_throughput"
+    - identifier: "total_token_throughput"
+    - identifier: "mean_ttft_ms"
+    - identifier: "median_ttft_ms"
+    - identifier: "std_ttft_ms"
+    - identifier: "p25_ttft_ms"
+    - identifier: "p50_ttft_ms"
+    - identifier: "p75_ttft_ms"
+    - identifier: "p99_ttft_ms"
+    - identifier: "mean_tpot_ms"
+    - identifier: "median_tpot_ms"
+    - identifier: "std_tpot_ms"
+    - identifier: "p25_tpot_ms"
+    - identifier: "p50_tpot_ms"
+    - identifier: "p75_tpot_ms"
+    - identifier: "p99_tpot_ms"
+    - identifier: "mean_itl_ms"
+    - identifier: "median_itl_ms"
+    - identifier: "std_itl_ms"
+    - identifier: "p25_itl_ms"
+    - identifier: "p50_itl_ms"
+    - identifier: "p75_itl_ms"
+    - identifier: "p99_itl_ms"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'VLLM performance testing across compute resource and workload configuration'
+performance_testing-geospatial-full-custom-dataset:
+  identifier: performance-testing-geospatial-full-custom-dataset
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
+    - identifier: 'request_rate'
+      metadata:
+        description: "(benchmark) The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "(benchmark) The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "(benchmark) The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: image
+      metadata:
+        description: "(deployment) Docker image to use to create vllm deployments"
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
+    - identifier: n_cpus
+      metadata:
+        description: "(deployment) the number of CPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 1,17 ]
+        interval: 1
+    - identifier: memory
+      metadata:
+        description: "(deployment) the amount of memory to allocate to vLLM pod"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "64Gi", "128Gi", "256Gi" ]
+    - identifier: dtype
+      metadata:
+        description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models."
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "auto", "half", "float16",  "bfloat16", "float", "float32" ]
+    - identifier: 'gpu_memory_utilization'
+      metadata:
+        description: "(deployment) The fraction of GPU memory to be used for the model executor,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ .5, .75, .9 ]
+    - identifier: 'cpu_offload'
+      metadata:
+        description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ 0, 8, 16, 24, 32 ]
+    - identifier: 'max_num_seq'
+      metadata:
+        description: "(deployment) Maximum number of sequences per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [32,2049]
+        interval: 32
+    - identifier: 'max_batch_tokens'
+      metadata:
+        description: "(deployment) maximum number of batched tokens per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 8192, 32769]
+        interval: 1024
+    - identifier: 'n_gpus'
+      metadata:
+        description: "(deployment) Number of GPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,9]
+        interval: 1
+    - identifier: 'gpu_type'
+      metadata:
+        description: "(deployment) The GPU type to use"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+    - identifier: 'skip_tokenizer_init'
+      metadata:
+        description: "(deployment) skip tokenizer initialization"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'enforce_eager'
+      metadata:
+        description: "(deployment) enforce pytorch eager mode"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'io_processor_plugin'
+      metadata:
+        description: 'IO Pocessor plugin to load for the model'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ None, "terratorch_segmentation" ]
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+  defaultParameterization:
+    - property:
+        identifier: 'image'
+      value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1"
+    - property:
+        identifier: n_cpus
+      value: 8
+    - property:
+        identifier:
+          memory
+      value:  "128Gi"
+    - property:
+        identifier: dtype
+      value: "auto"
+    - property:
+        identifier: 'num_prompts'
+      value: 500
+    - property:
+        identifier: 'max_concurrency'
+      value: -1
+    - property:
+        identifier: 'burstiness'
+      value: 1.0
+    - property:
+        identifier: 'gpu_memory_utilization'
+      value: .9
+    - property:
+        identifier: 'cpu_offload'
+      value: 0
+    - property:
+        identifier: 'max_num_seq'
+      value: 256
+    - property:
+        identifier: 'max_batch_tokens'
+      value: 16384
+    - property:
+        identifier: 'n_gpus'
+      value: 1
+    - property:
+        identifier: 'gpu_type'
+      value: 'NVIDIA-A100-80GB-PCIe'
+    - property:
+        identifier: 'skip_tokenizer_init'
+      value: True
+    - property:
+        identifier: 'enforce_eager'
+      value: True
+    - property:
+        identifier: 'io_processor_plugin'
+      value: "terratorch_segmentation"
+    - property:
+        identifier: 'dataset'
+      value: None
+  # measurements
   targetProperties:
     - identifier: "duration"
     - identifier: "completed"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index e3d39cc8..8b651cfc 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -12,6 +12,8 @@
     get_results,
 )
 
+logger = logging.getLogger("vllm-bench")
+
 default_geospatial_datasets_filenames = {
     "india_url_in_b64_out": "india_url_in_b64_out.jsonl",
     "valencia_url_in_b64_out": "valencia_url_in_b64_out.jsonl",
@@ -51,7 +53,6 @@ def execute_benchmark(
     keys are vllm benchmark arguments. values are the values to pass to the arguments
     :return: results dictionary
     """
-    logger = logging.getLogger("vllm-bench")
 
     logger.debug(
         f"executing benchmark, invoking service at {base_url} with the parameters: "
@@ -181,34 +182,50 @@ def execute_geospatial_benchmark(
     :param output_token_length: length of output tokens
     :return: results dictionary
     """
-    from importlib import resources
-
-    dataset_filename = default_geospatial_datasets_filenames[dataset]
-
-    with resources.path(
-        "ado_actuators.vllm_performance.datasets",
-        dataset_filename,
-    ) as data_set_path:
-        return execute_benchmark(
-            base_url=base_url,
-            backend="io-processor-plugin",
-            model=model,
-            data_set="custom",
-            interpreter=interpreter,
-            num_prompts=num_prompts,
-            request_rate=request_rate,
-            max_concurrency=max_concurrency,
-            hf_token=hf_token,
-            benchmark_retries=benchmark_retries,
-            retries_timeout=retries_timeout,
-            burstiness=burstiness,
-            custom_args={
-                "--dataset-path": data_set_path,
-                "--endpoint": "/pooling",
-                "--skip-tokenizer-init": True,
-            },
+
+    if dataset in default_geospatial_datasets_filenames:
+        from pathlib import Path
+
+        dataset_filename = default_geospatial_datasets_filenames[dataset]
+        parent_path = Path(__file__).parents[1].absolute()
+        data_set_path = os.path.join(parent_path, "datasets", dataset_filename)
+    else:
+        # This can only happen with the performance-testing-geospatial-full-custom-dataset
+        # experiment, otherwise the dataset name is always one of the allowed ones.
+        # Here the assumption is that the dataset file is placed in the  process working directory.
+        ray_working_dir = os.getcwd()
+        data_set_path = os.path.join(ray_working_dir, dataset)
+
+    if not os.path.exists(data_set_path) or not os.path.isfile(data_set_path):
+        logger.warning(
+            f"The dataset filename provided does not exist or does not point to a valid file: {data_set_path}"
+        )
+        raise Exception(
+            f"The dataset filename provided does not exist or does not point to a valid file: {data_set_path}"
         )
 
+    logger.debug(f"Dataset path {data_set_path}")
+
+    return execute_benchmark(
+        base_url=base_url,
+        backend="io-processor-plugin",
+        model=model,
+        data_set="custom",
+        interpreter=interpreter,
+        num_prompts=num_prompts,
+        request_rate=request_rate,
+        max_concurrency=max_concurrency,
+        hf_token=hf_token,
+        benchmark_retries=benchmark_retries,
+        retries_timeout=retries_timeout,
+        burstiness=burstiness,
+        custom_args={
+            "--dataset-path": data_set_path,
+            "--endpoint": "/pooling",
+            "--skip-tokenizer-init": True,
+        },
+    )
+
 
 if __name__ == "__main__":
     results = execute_geospatial_benchmark(

From 41815b6073d83570962f11468be04c7ca9ed782a Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 6 Nov 2025 13:28:51 +0000
Subject: [PATCH 27/46] fix: Reworked vllm_catalog actuator experiments catalog
 loading

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance/actuator.py              | 37 +++++++++----------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index 38e9f47a..ec38f1fe 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -3,8 +3,8 @@
 
 import json
 import logging
-import os
 import uuid
+from pathlib import Path
 
 import ray
 import yaml
@@ -57,27 +57,24 @@ def catalog(
         # Loading experiment definitions for yaml files contained in the `experiments` directory.
         # NOTE: Only files can be placed in the experiments directory,
         #       but each file can contain multiple experiment definitions
-        path = os.path.abspath(__file__)
-        exp_dir = os.path.join(os.path.split(path)[0], "experiments")
+        curr_path = Path(__file__)
+        exp_dir = curr_path.parent / Path("experiments")
+        logger.debug(f"Experiments dir {exp_dir.absolute()}")
         experiments = []
-        for exp_file in os.listdir(exp_dir):
-            logger.debug(f"Loading experiments from {exp_file}")
-            exp_file_path = os.path.join(exp_dir, exp_file)
-            if os.path.isdir(exp_file_path):
-                logger.error(
-                    f"{exp_file_path} is a directory. Only files are supported in the experiments directory"
-                )
-                raise Exception(
-                    f"{exp_file_path} is a directory. Only files are supported in the experiments directory"
-                )
-            with open(exp_file_path) as f:
-                try:
-                    data = yaml.safe_load(f)
-                except yaml.YAMLError as e:
-                    logger.error(f"File {exp_file} is a malformed YAML - {e}")
-                    raise Exception(f"File {exp_file} is a malformed YAML - {e}")
+        for exp_file in exp_dir.iterdir():
+            if exp_file.is_dir():
+                continue
 
-            experiments.extend([Experiment(**data[e]) for e in data])
+            logger.debug(f"Loading experiments from {exp_file.name}")
+            try:
+                file_data = exp_file.read_text()
+                data = yaml.safe_load(file_data)
+            except yaml.YAMLError:
+                error_message = f"File {exp_file.name} is a malformed YAML"
+                logger.error(error_message)
+                raise ValueError(error_message)
+
+            experiments.extend([Experiment.model_validate(data[e]) for e in data])
 
         return ExperimentCatalog(
             catalogIdentifier=cls.identifier,

From 53bf77ae3423e3b9ab8290a5f5d9c5350e8830c3 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 6 Nov 2025 13:49:03 +0000
Subject: [PATCH 28/46] chore: Improved experiment definition language and
 properties typing

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../experiments/performance_testing.yaml      |  7 ++++---
 .../performance_testing_geospatial.yaml       | 19 +++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index c9537fd7..032bfec1 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -58,7 +58,7 @@ performance_testing-full:
         interval: 1
     - identifier: 'dataset'
       metadata:
-        description: "(benchmark) The dataset to be used fof the experiment"
+        description: "(benchmark) The dataset to be used for the experiment"
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
         values: [ 'random' ]
@@ -128,7 +128,7 @@ performance_testing-full:
         values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
     - identifier: 'skip_tokenizer_init'
       metadata:
-        description: "(deployment) skip tokenizer intialization"
+        description: "(deployment) skip tokenizer initialization"
       propertyDomain:
         variableType: BINARY_VARIABLE_TYPE 
         values: [True, False]
@@ -248,6 +248,7 @@ performance_testing-endpoint:
         description: 'The endpoint(s) to test'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: ["http://localhost:8000"]
     - identifier: 'request_rate'
       metadata:
         description: "The number of requests to send per second"
@@ -293,7 +294,7 @@ performance_testing-endpoint:
         interval: 1
     - identifier: 'dataset'
       metadata:
-        description: "(benchmark) The dataset to be used fof the experiment"
+        description: "(benchmark) The dataset to be used for the experiment"
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
         values: [ 'random' ]
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 9edd8668..29d3a81d 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -17,7 +17,8 @@ performance_testing-geospatial-endpoint:
       metadata:
         description: 'The endpoint(s) to test'
       propertyDomain:
-        variableType: "UNKNOWN_VARIABLE_TYPE"
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: ["http://localhost:8000"]
     - identifier: 'request_rate'
       metadata:
         description: "The number of requests to send per second"
@@ -341,6 +342,12 @@ performance_testing-geospatial-full-custom-dataset:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [-1,1000]
         interval: 1  # -1 means send all requests at time 0
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [" custom_dataset.jsonl "]
   optionalProperties:
     - identifier: 'num_prompts'
       metadata:
@@ -444,12 +451,7 @@ performance_testing-geospatial-full-custom-dataset:
         description: 'IO Pocessor plugin to load for the model'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ None, "terratorch_segmentation" ]
-    - identifier: 'dataset'
-      metadata:
-        description: "(benchmark) The dataset to be used for the experiment"
-      propertyDomain:
-        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "terratorch_segmentation" ]
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -500,9 +502,6 @@ performance_testing-geospatial-full-custom-dataset:
     - property:
         identifier: 'io_processor_plugin'
       value: "terratorch_segmentation"
-    - property:
-        identifier: 'dataset'
-      value: None
   # measurements
   targetProperties:
     - identifier: "duration"

From 18f217c331e55781d07453a4548115e89bf7abc2 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 6 Nov 2025 13:56:19 +0000
Subject: [PATCH 29/46] fix: Improved logic for fetching a dataset in the
 geospatial benchmark

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../k8/yaml_support/build_components.py       | 11 +------
 .../execute_benchmark.py                      | 33 ++++++++-----------
 2 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index ffca8985..36ab4fe0 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -183,16 +183,7 @@ def deployment_yaml(
         # command
         container["command"] = ["vllm", "serve"]
         container["args"] = vllm_serve_args
-        # env variables to to set parameters for docker execution
-        # container["env"] = [
-        #     {"name": "MODEL", "value": model},
-        #     {"name": "GPU_MEMORY_UTILIZATION", "value": str(gpu_memory_utilization)},
-        #     {"name": "DTYPE", "value": dtype.value},
-        #     {"name": "CPU_OFFLOAD_GB", "value": str(cpu_offload)},
-        #     {"name": "MAX_NUM_BATCHED_TOKENS", "value": str(max_batch_tokens)},
-        #     {"name": "MAX_NUM_SEQ", "value": str(max_num_seq)},
-        #     {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)},
-        # ]
+
         if hf_token is not None:
             container["env"] = [{"name": "HF_TOKEN", "value": hf_token}]
         if claim_name is not None:
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index a23285fe..0120694c 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -63,13 +63,7 @@ def execute_benchmark(
     logger.debug(
         f"request_rate {request_rate}, max_concurrency {max_concurrency}, benchmark retries {benchmark_retries}"
     )
-    # The code below is commented as we are switching from a script invocation to command line
-    # invocation. If we want to bring back script execution for any reason, this code must be
-    # uncommented
-    # parameters
-    # code = os.path.abspath(
-    #    os.path.join(os.path.dirname(__file__), "benchmark_serving.py")
-    # )
+
     request = f"export HF_TOKEN={hf_token} && " if hf_token is not None else ""
     f_name = f"{uuid.uuid4().hex}.json"
     request += (
@@ -182,27 +176,26 @@ def execute_geospatial_benchmark(
     :param output_token_length: length of output tokens
     :return: results dictionary
     """
+    from pathlib import Path
 
     if dataset in default_geospatial_datasets_filenames:
-        from pathlib import Path
-
         dataset_filename = default_geospatial_datasets_filenames[dataset]
-        parent_path = Path(__file__).parents[1].absolute()
-        data_set_path = os.path.join(parent_path, "datasets", dataset_filename)
+        parent_path = Path(__file__).parents[1]
+        data_set_path = parent_path / "datasets" / dataset_filename
     else:
         # This can only happen with the performance-testing-geospatial-full-custom-dataset
         # experiment, otherwise the dataset name is always one of the allowed ones.
         # Here the assumption is that the dataset file is placed in the  process working directory.
-        ray_working_dir = os.getcwd()
-        data_set_path = os.path.join(ray_working_dir, dataset)
+        ray_working_dir = Path.cwd()
+        data_set_path = ray_working_dir / dataset
 
-    if not os.path.exists(data_set_path) or not os.path.isfile(data_set_path):
-        logger.warning(
-            f"The dataset filename provided does not exist or does not point to a valid file: {data_set_path}"
-        )
-        raise Exception(
-            f"The dataset filename provided does not exist or does not point to a valid file: {data_set_path}"
+    if not data_set_path.is_file():
+        error_string = (
+            "The dataset filename provided does not exist or "
+            f"does not point to a valid file: {data_set_path}"
         )
+        logger.warning(error_string)
+        raise ValueError(error_string)
 
     logger.debug(f"Dataset path {data_set_path}")
 
@@ -220,7 +213,7 @@ def execute_geospatial_benchmark(
         retries_timeout=retries_timeout,
         burstiness=burstiness,
         custom_args={
-            "--dataset-path": data_set_path,
+            "--dataset-path": f"{data_set_path.resolve()}",
             "--endpoint": "/pooling",
             "--skip-tokenizer-init": True,
         },

From e93779454af7494a7de4d152bb7ff106fc6d1bb1 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 6 Nov 2025 16:24:28 +0000
Subject: [PATCH 30/46] chore: various fixes around after review

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../experiments/performance_testing.yaml      |  2 +-
 .../performance_testing_geospatial.yaml       |  8 ++--
 .../execute_benchmark.py                      | 42 +++++++++----------
 3 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index 032bfec1..a60a17d4 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -134,7 +134,7 @@ performance_testing-full:
         values: [True, False]
     - identifier: 'enforce_eager'
       metadata:
-        description: "(deployment) enforce pytorch eager mode"
+        description: "(deployment) enforce PyTorch eager mode"
       propertyDomain:
         variableType: BINARY_VARIABLE_TYPE 
         values: [True, False]
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 29d3a81d..43f8e3e2 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -229,7 +229,7 @@ performance_testing-geospatial-full:
         values: [True, False]
     - identifier: 'io_processor_plugin'
       metadata:
-        description: 'IO Pocessor plugin to load for the model'
+        description: 'IO Processor plugin to load for the model'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: [ None, "terratorch_segmentation" ]
@@ -347,7 +347,7 @@ performance_testing-geospatial-full-custom-dataset:
         description: "(benchmark) The dataset to be used for the experiment"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [" custom_dataset.jsonl "]
+        values: ["custom_dataset.jsonl"]
   optionalProperties:
     - identifier: 'num_prompts'
       metadata:
@@ -442,13 +442,13 @@ performance_testing-geospatial-full-custom-dataset:
         values: [True, False]
     - identifier: 'enforce_eager'
       metadata:
-        description: "(deployment) enforce pytorch eager mode"
+        description: "(deployment) enforce PyTorch eager mode"
       propertyDomain:
         variableType: BINARY_VARIABLE_TYPE 
         values: [True, False]
     - identifier: 'io_processor_plugin'
       metadata:
-        description: 'IO Pocessor plugin to load for the model'
+        description: 'IO Processor plugin to load for the model'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: [ "terratorch_segmentation" ]
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index 0120694c..028abdd3 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -23,7 +23,7 @@
 def execute_benchmark(
     base_url: str,
     model: str,
-    data_set: str,
+    dataset: str,
     backend: str = "openai",
     interpreter: str = "python",
     num_prompts: int = 500,
@@ -32,7 +32,7 @@ def execute_benchmark(
     hf_token: str | None = None,
     benchmark_retries: int = 3,
     retries_timeout: int = 5,
-    data_set_path: str | None = None,
+    dataset_path: str | None = None,
     custom_args: dict[str, Any] | None = None,
     burstiness: float = 1,
 ) -> dict[str, Any]:
@@ -40,7 +40,7 @@ def execute_benchmark(
     Execute benchmark
     :param base_url: url for vllm endpoint
     :param model: model
-    :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
     :param interpreter - name of Python interpreter
     :param num_prompts: number of prompts
     :param request_rate: request rate
@@ -48,7 +48,7 @@ def execute_benchmark(
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
-    :param data_set_path: path to the dataset
+    :param dataset_path: path to the dataset
     :param custom_args: custom arguments to pass to the benchmark.
     keys are vllm benchmark arguments. values are the values to pass to the arguments
     :return: results dictionary
@@ -58,7 +58,7 @@ def execute_benchmark(
         f"executing benchmark, invoking service at {base_url} with the parameters: "
     )
     logger.debug(
-        f"model {model}, data set {data_set}, python {interpreter}, num prompts {num_prompts}"
+        f"model {model}, data set {dataset}, python {interpreter}, num prompts {num_prompts}"
     )
     logger.debug(
         f"request_rate {request_rate}, max_concurrency {max_concurrency}, benchmark retries {benchmark_retries}"
@@ -67,16 +67,14 @@ def execute_benchmark(
     request = f"export HF_TOKEN={hf_token} && " if hf_token is not None else ""
     f_name = f"{uuid.uuid4().hex}.json"
     request += (
-        # changing from script invocation to cli invocation
-        # f"{interpreter} {code} --backend openai --base-url {base_url} --dataset-name {data_set} "
-        f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {data_set} "
+        f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {dataset} "
         f"--model {model} --seed 12345 --num-prompts {num_prompts!s} --save-result --metric-percentiles "
         f'"25,75,99" --percentile-metrics "ttft,tpot,itl,e2el" --result-dir . --result-filename {f_name} '
         f"--burstiness {burstiness} "
     )
 
-    if data_set_path is not None:
-        request += f" --dataset-path {data_set_path} "
+    if dataset_path is not None:
+        request += f" --dataset-path {dataset_path} "
     if request_rate is not None:
         request += f" --request-rate {request_rate!s} "
     if max_concurrency is not None:
@@ -123,7 +121,7 @@ def execute_random_benchmark(
     Execute benchmark with random dataset
     :param base_url: url for vllm endpoint
     :param model: model
-    :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
@@ -135,7 +133,7 @@ def execute_random_benchmark(
     return execute_benchmark(
         base_url=base_url,
         model=model,
-        data_set=dataset,
+        dataset=dataset,
         interpreter=interpreter,
         num_prompts=num_prompts,
         request_rate=request_rate,
@@ -168,12 +166,12 @@ def execute_geospatial_benchmark(
     Execute benchmark with random dataset
     :param base_url: url for vllm endpoint
     :param model: model
-    :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
-    :param input_token_length: length of input tokens
-    :param output_token_length: length of output tokens
+    :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1
+    :param interpreter: python interpreter to use
     :return: results dictionary
     """
     from pathlib import Path
@@ -181,29 +179,29 @@ def execute_geospatial_benchmark(
     if dataset in default_geospatial_datasets_filenames:
         dataset_filename = default_geospatial_datasets_filenames[dataset]
         parent_path = Path(__file__).parents[1]
-        data_set_path = parent_path / "datasets" / dataset_filename
+        dataset_path = parent_path / "datasets" / dataset_filename
     else:
         # This can only happen with the performance-testing-geospatial-full-custom-dataset
         # experiment, otherwise the dataset name is always one of the allowed ones.
         # Here the assumption is that the dataset file is placed in the  process working directory.
         ray_working_dir = Path.cwd()
-        data_set_path = ray_working_dir / dataset
+        dataset_path = ray_working_dir / dataset
 
-    if not data_set_path.is_file():
+    if not dataset_path.is_file():
         error_string = (
             "The dataset filename provided does not exist or "
-            f"does not point to a valid file: {data_set_path}"
+            f"does not point to a valid file: {dataset_path}"
         )
         logger.warning(error_string)
         raise ValueError(error_string)
 
-    logger.debug(f"Dataset path {data_set_path}")
+    logger.debug(f"Dataset path {dataset_path}")
 
     return execute_benchmark(
         base_url=base_url,
         backend="io-processor-plugin",
         model=model,
-        data_set="custom",
+        dataset="custom",
         interpreter=interpreter,
         num_prompts=num_prompts,
         request_rate=request_rate,
@@ -213,7 +211,7 @@ def execute_geospatial_benchmark(
         retries_timeout=retries_timeout,
         burstiness=burstiness,
         custom_args={
-            "--dataset-path": f"{data_set_path.resolve()}",
+            "--dataset-path": f"{dataset_path.resolve()}",
             "--endpoint": "/pooling",
             "--skip-tokenizer-init": True,
         },

From bad013389fbd453c271ca752778abc6fa4742d62 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 7 Nov 2025 11:37:47 +0000
Subject: [PATCH 31/46] chore(performance_testing_geospatial): removing target
 properties that are irrelevant to geospatial tests

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../performance_testing_geospatial.yaml       | 69 -------------------
 1 file changed, 69 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 43f8e3e2..65ee2733 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -74,29 +74,6 @@ performance_testing-geospatial-endpoint:
     - identifier: "total_input_tokens"
     - identifier: "total_output_tokens"
     - identifier: "request_throughput"
-    - identifier: "output_throughput"
-    - identifier: "total_token_throughput"
-    - identifier: "mean_ttft_ms"
-    - identifier: "median_ttft_ms"
-    - identifier: "std_ttft_ms"
-    - identifier: "p25_ttft_ms"
-    - identifier: "p50_ttft_ms"
-    - identifier: "p75_ttft_ms"
-    - identifier: "p99_ttft_ms"
-    - identifier: "mean_tpot_ms"
-    - identifier: "median_tpot_ms"
-    - identifier: "std_tpot_ms"
-    - identifier: "p25_tpot_ms"
-    - identifier: "p50_tpot_ms"
-    - identifier: "p75_tpot_ms"
-    - identifier: "p99_tpot_ms"
-    - identifier: "mean_itl_ms"
-    - identifier: "median_itl_ms"
-    - identifier: "std_itl_ms"
-    - identifier: "p25_itl_ms"
-    - identifier: "p50_itl_ms"
-    - identifier: "p75_itl_ms"
-    - identifier: "p99_itl_ms"
     - identifier: "mean_e2el_ms"
     - identifier: "median_e2el_ms"
     - identifier: "std_e2el_ms"
@@ -293,29 +270,6 @@ performance_testing-geospatial-full:
     - identifier: "total_input_tokens"
     - identifier: "total_output_tokens"
     - identifier: "request_throughput"
-    - identifier: "output_throughput"
-    - identifier: "total_token_throughput"
-    - identifier: "mean_ttft_ms"
-    - identifier: "median_ttft_ms"
-    - identifier: "std_ttft_ms"
-    - identifier: "p25_ttft_ms"
-    - identifier: "p50_ttft_ms"
-    - identifier: "p75_ttft_ms"
-    - identifier: "p99_ttft_ms"
-    - identifier: "mean_tpot_ms"
-    - identifier: "median_tpot_ms"
-    - identifier: "std_tpot_ms"
-    - identifier: "p25_tpot_ms"
-    - identifier: "p50_tpot_ms"
-    - identifier: "p75_tpot_ms"
-    - identifier: "p99_tpot_ms"
-    - identifier: "mean_itl_ms"
-    - identifier: "median_itl_ms"
-    - identifier: "std_itl_ms"
-    - identifier: "p25_itl_ms"
-    - identifier: "p50_itl_ms"
-    - identifier: "p75_itl_ms"
-    - identifier: "p99_itl_ms"
     - identifier: "mean_e2el_ms"
     - identifier: "median_e2el_ms"
     - identifier: "std_e2el_ms"
@@ -509,29 +463,6 @@ performance_testing-geospatial-full-custom-dataset:
     - identifier: "total_input_tokens"
     - identifier: "total_output_tokens"
     - identifier: "request_throughput"
-    - identifier: "output_throughput"
-    - identifier: "total_token_throughput"
-    - identifier: "mean_ttft_ms"
-    - identifier: "median_ttft_ms"
-    - identifier: "std_ttft_ms"
-    - identifier: "p25_ttft_ms"
-    - identifier: "p50_ttft_ms"
-    - identifier: "p75_ttft_ms"
-    - identifier: "p99_ttft_ms"
-    - identifier: "mean_tpot_ms"
-    - identifier: "median_tpot_ms"
-    - identifier: "std_tpot_ms"
-    - identifier: "p25_tpot_ms"
-    - identifier: "p50_tpot_ms"
-    - identifier: "p75_tpot_ms"
-    - identifier: "p99_tpot_ms"
-    - identifier: "mean_itl_ms"
-    - identifier: "median_itl_ms"
-    - identifier: "std_itl_ms"
-    - identifier: "p25_itl_ms"
-    - identifier: "p50_itl_ms"
-    - identifier: "p75_itl_ms"
-    - identifier: "p99_itl_ms"
     - identifier: "mean_e2el_ms"
     - identifier: "median_e2el_ms"
     - identifier: "std_e2el_ms"

From 164e13e35e7750777616f7210e08e34b991644b6 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 7 Nov 2025 11:39:20 +0000
Subject: [PATCH 32/46] chore(execute_benchmark): docstrings cleanup

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../execute_benchmark.py                      | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index 028abdd3..839aa528 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -41,16 +41,19 @@ def execute_benchmark(
     :param base_url: url for vllm endpoint
     :param model: model
     :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
-    :param interpreter - name of Python interpreter
+    :param backend: name of the vLLM benchmark backend to be used ["vllm", "openai", "openai-chat", "openai-audio", "openai-embeddings"]
+    :param interpreter: name of Python interpreter
     :param num_prompts: number of prompts
     :param request_rate: request rate
-    :param max_concurrency: max concurrency
+    :param max_concurrency: maximum number of concurrent requests
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
     :param dataset_path: path to the dataset
     :param custom_args: custom arguments to pass to the benchmark.
+    :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1
     keys are vllm benchmark arguments. values are the values to pass to the arguments
+
     :return: results dictionary
     """
 
@@ -68,7 +71,7 @@ def execute_benchmark(
     f_name = f"{uuid.uuid4().hex}.json"
     request += (
         f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {dataset} "
-        f"--model {model} --seed 12345 --num-prompts {num_prompts!s} --save-result --metric-percentiles "
+        f"--model {model} --seed 12345 --num-prompts 10 --save-result --metric-percentiles "
         f'"25,75,99" --percentile-metrics "ttft,tpot,itl,e2el" --result-dir . --result-filename {f_name} '
         f"--burstiness {burstiness} "
     )
@@ -122,11 +125,17 @@ def execute_random_benchmark(
     :param base_url: url for vllm endpoint
     :param model: model
     :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param num_prompts: number of prompts
+    :param request_rate: request rate
+    :param max_concurrency: maximum number of concurrent requests
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
-    :param input_token_length: length of input tokens
-    :param output_token_length: length of output tokens
+    :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1
+    :param number_input_tokens: maximum number of input tokens for each request,
+    :param max_output_tokens: maximum number of output tokens for each request,
+    :param interpreter: name of Python interpreter
+
     :return: results dictionary
     """
     # Call execute_benchmark with the appropriate arguments
@@ -167,11 +176,15 @@ def execute_geospatial_benchmark(
     :param base_url: url for vllm endpoint
     :param model: model
     :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param num_prompts: number of prompts
+    :param request_rate: request rate
+    :param max_concurrency: maximum number of concurrent requests
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
     :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1
     :param interpreter: python interpreter to use
+
     :return: results dictionary
     """
     from pathlib import Path

From 7da44ddd98b31075d200163fe3f22ba7c676a570 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Wed, 19 Nov 2025 11:33:07 +0000
Subject: [PATCH 33/46] feat(group_samplers): Improved GroupSampler groups
 building process

Moved to reading the entities from storage only when are returned by the generathor
rather than getching the whole space when initializing the sampler.

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../core/discoveryspace/group_samplers.py     | 96 +++++++++++--------
 1 file changed, 54 insertions(+), 42 deletions(-)

diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py
index 9a20b3c3..4149eccf 100644
--- a/orchestrator/core/discoveryspace/group_samplers.py
+++ b/orchestrator/core/discoveryspace/group_samplers.py
@@ -21,24 +21,18 @@
 
 
 def _build_entity_group_values(
-    entity: Entity, group: list[str]
+    entity: dict, group: list[str]
 ) -> frozenset[tuple[str, Any]]:
     """
-    :return: A frozen set of (key,value) paris
+    :return: A frozen set of (key,value) pairs
     """
     # build a dictionary of entity values given the group
-    return frozenset(
-        {
-            (v.property.identifier, v.value)
-            for v in entity.constitutive_property_values
-            if v.property.identifier in group
-        }
-    )
+    return frozenset({(k, v) for k, v in entity.items() if k in group})
 
 
 def _build_groups_dict(
-    entities: list[Entity], group: list[str]
-) -> dict[frozenset[tuple[str, Any]], list[Entity]]:
+    entities: list[dict], group: list[str]
+) -> dict[frozenset[tuple[str, Any]], list[dict]]:
     """
     builds a dict of lists of entities, combining entities based on group definitions
     :param entities: list of entities
@@ -55,7 +49,7 @@ def _build_groups_dict(
     return groups
 
 
-def _build_groups_list(entities: list[Entity], group: list[str]) -> list[list[Entity]]:
+def _build_groups_list(entities: list[dict], group: list[str]) -> list[list[dict]]:
     """
     builds a list of lists of entities, combining entities based on group definitions
     :param entities: list of entities
@@ -68,7 +62,7 @@ def _build_groups_list(entities: list[Entity], group: list[str]) -> list[list[En
 
 async def _get_grouped_sample_async(
     generator: AsyncGenerator[list[Entity], None],
-) -> list[Entity] | None:
+) -> list[dict] | None:
     try:
         return await anext(generator)
     except (StopAsyncIteration, StopIteration):
@@ -77,7 +71,7 @@ async def _get_grouped_sample_async(
 
 def _get_grouped_sample(
     generator: Generator[list[Entity], None, None],
-) -> list[Entity] | None:
+) -> list[dict] | None:
     try:
         return next(generator)
     except (StopAsyncIteration, StopIteration):
@@ -85,8 +79,8 @@ def _get_grouped_sample(
 
 
 async def _sequential_iterator_async(
-    entities: list[Entity], group: list[str]
-) -> AsyncGenerator[list[Entity], None]:
+    entities: list[dict], group: list[str]
+) -> AsyncGenerator[list[dict], None]:
     """
     Sequential iterator through discovery space with grouping
     :param entities: list of entities
@@ -100,8 +94,8 @@ async def _sequential_iterator_async(
 
 
 def _sequential_iterator(
-    entities: list[Entity], group: list[str]
-) -> Generator[list[Entity], None, None]:
+    entities: list[dict], group: list[str]
+) -> Generator[list[dict], None, None]:
     """
     Sequential iterator through discovery space with grouping
     :param entities: list of entities
@@ -115,8 +109,8 @@ def _sequential_iterator(
 
 
 async def _random_iterator_async(
-    entities: list[Entity], group: list[str]
-) -> AsyncGenerator[list[Entity], None]:
+    entities: list[dict], group: list[str]
+) -> AsyncGenerator[list[dict], None]:
     """
     Random iterator through discovery space with grouping
     :param entities: list of entities
@@ -133,8 +127,8 @@ async def _random_iterator_async(
 
 
 def _random_iterator(
-    entities: list[Entity], group: list[str]
-) -> Generator[list[Entity], None, None]:
+    entities: list[dict], group: list[str]
+) -> Generator[list[dict], None, None]:
     """
     Random iterator through discovery space with grouping
     :param entities: list of entities
@@ -151,7 +145,9 @@ def _random_iterator(
 
 
 def _sequential_group_iterator(
-    generator: Generator[list[Entity], None, None], batchsize: int
+    generator: Generator[list[dict], None, None],
+    discoverySpace: DiscoverySpace,
+    batchsize: int,
 ) -> Generator[list[Entity], None, None]:
     """
     Sequential group iterator
@@ -174,8 +170,10 @@ def _sequential_group_iterator(
                     # mark that we are done and break
                     done = True
                     break
+            # Retrieve entity from the store
+            entity = discoverySpace.entity_for_point(sample[0])
             # append a new entity to batch
-            batch.append(sample[0])
+            batch.append(entity)
             # remove entity from samples
             sample = sample[1:]
         # submit a batch and clean it up
@@ -186,7 +184,9 @@ def _sequential_group_iterator(
 
 
 async def _sequential_group_iterator_async(
-    generator: AsyncGenerator[list[Entity], None], batchsize: int
+    generator: AsyncGenerator[list[Entity], None],
+    discoverySpaceManager: DiscoverySpaceManager,
+    batchsize: int,
 ) -> AsyncGenerator[list[Entity], None]:
     """
     Async sequential group iterator
@@ -210,8 +210,10 @@ async def _sequential_group_iterator_async(
                     # mark that we are done
                     done = True
                     break
+            # Retrieve entity from the store
+            entity = ray.get(discoverySpaceManager.entity_for_point(sample[0]))
             # append a new entity to batch
-            batch.append(sample[0])
+            batch.append(entity)
             # remove entity from samples
             sample = sample[1:]
         # submit a batch and clean it up
@@ -283,7 +285,7 @@ async def remoteEntityIterator(
             remoteDiscoverySpace=remoteDiscoverySpace
         )
         return _sequential_group_iterator_async(
-            generator=grooped_iterator, batchsize=batchsize
+            generator=grooped_iterator, discbatchsize=batchsize
         )
 
 
@@ -388,29 +390,24 @@ def _get_remote_space_entities(
         # build list of entities
         return self._get_space_entities(discoverySpace=dspace)
 
-    def _get_space_entities(self, discoverySpace: DiscoverySpace) -> list[Entity]:
+    def _get_space_entities(self, discoverySpace: DiscoverySpace) -> list[dict]:
         """
         Building list of entities for a discovery space
 
         :param discoverySpace: discovery space
         :return: list of entities
         """
-        # get entity space
         entity_space = discoverySpace.entitySpace
-        # create sampler generator
-        self.samplerCompatibleWithEntitySpace(entity_space)
-        # create iterator
-        iterator = super().entityIterator(discoverySpace, batchsize=1)
-        # get entities
-        entity_list = []
-        for e in iterator:
-            entity_list.append(e[0])
-        return entity_list
+        property_names = [c.identifier for c in entity_space.constitutiveProperties]
+        return [
+            dict(zip(property_names, point))
+            for point in entity_space.sequential_point_iterator()
+        ]
 
     def entityGroupIterator(
         self,
         discoverySpace: DiscoverySpace,
-    ) -> Generator[list[Entity], None, None]:
+    ) -> Generator[list[dict], None, None]:
         """Returns an iterator  that samples groups of entities from a discoveryspace
 
         Note: The number of entities returned on each call to the iterator can vary as it depends on
@@ -420,13 +417,26 @@ def entityGroupIterator(
             discoverySpace: An orchestrator.model.space.DiscoverySpace instance
         """
 
+        entitySpace = discoverySpace.entitySpace
+
+        if not ExplicitEntitySpaceGroupedGridSampleGenerator.samplerCompatibleWithEntitySpace(
+            entitySpace=entitySpace
+        ):
+            raise ValueError(
+                f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entitySpace}"
+            )
+
+        entities = self._get_space_entities(discoverySpace=discoverySpace)
+
         def iterator_closure() -> Generator[list[Entity], None, None]:
             def sequential_iterator() -> Generator[list[Entity], None, None]:
-                entities = self._get_space_entities(discoverySpace=discoverySpace)
                 return _sequential_iterator(entities=entities, group=self.group)
 
             def random_iterator() -> Generator[list[Entity], None, None]:
-                entities = self._get_space_entities(discoverySpace=discoverySpace)
+                import time
+
+                now = time.perf_counter()
+                print(f"Getting all entities took {time.perf_counter() - now}")
                 return _random_iterator(entities=entities, group=self.group)
 
             if self.mode == WalkModeEnum.SEQUENTIAL:
@@ -490,7 +500,9 @@ def entityIterator(
         """Returns an iterator over a sequence of entities ordered by group"""
         grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace)
         return _sequential_group_iterator(
-            generator=grouped_iterator, batchsize=batchsize
+            generator=grouped_iterator,
+            batchsize=batchsize,
+            discoverySpace=discoverySpace,
         )
 
     async def remoteEntityIterator(

From 18a65c309b62bcd10516cd6ab95d437d22800e09 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 20 Nov 2025 11:06:56 +0000
Subject: [PATCH 34/46] fix(group_samplers): Last fixes to make the group
 samplers tests pass

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../core/discoveryspace/group_samplers.py     | 154 ++++++++++--------
 tests/core/test_group_samplers.py             |  35 +++-
 2 files changed, 118 insertions(+), 71 deletions(-)

diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py
index 4149eccf..dad9617d 100644
--- a/orchestrator/core/discoveryspace/group_samplers.py
+++ b/orchestrator/core/discoveryspace/group_samplers.py
@@ -20,19 +20,60 @@
 moduleLog = logging.getLogger("groupsamplers")
 
 
+def _get_space_entities(discoverySpace: DiscoverySpace) -> list[dict]:
+    """
+    Building list of entities for a discovery space
+
+    :param discoverySpace: discovery space
+    :return: list of entities
+    """
+    entity_space = discoverySpace.entitySpace
+    property_names = [c.identifier for c in entity_space.constitutiveProperties]
+    return [
+        dict(zip(property_names, point))
+        for point in entity_space.sequential_point_iterator()
+    ]
+
+
+def _get_remote_space_entities(
+    remoteDiscoverySpace: DiscoverySpaceManager,
+) -> list[dict]:
+    """
+    Building list of entities for a discovery space
+
+    :param remoteDiscoverySpace: discovery space actor
+    :return: list of entities dict
+    """
+    # get discovery space
+    # noinspection PyUnresolvedReferences
+    dspace = ray.get(remoteDiscoverySpace.discoverySpace.remote())
+    # build list of entities
+    return _get_space_entities(discoverySpace=dspace)
+
+
 def _build_entity_group_values(
-    entity: dict, group: list[str]
+    entity: dict | Entity, group: list[str]
 ) -> frozenset[tuple[str, Any]]:
     """
     :return: A frozen set of (key,value) pairs
     """
     # build a dictionary of entity values given the group
+    if isinstance(entity, Entity):
+        return frozenset(
+            {
+                (v.property.identifier, v.value)
+                for v in entity.constitutive_property_values
+                if v.property.identifier in group
+            }
+        )
+    # We get a dict in case of generator type of samplers where we defer fetching the
+    # full entity from store until it is returned by the iterator.
     return frozenset({(k, v) for k, v in entity.items() if k in group})
 
 
 def _build_groups_dict(
-    entities: list[dict], group: list[str]
-) -> dict[frozenset[tuple[str, Any]], list[dict]]:
+    entities: list[dict | Entity], group: list[str]
+) -> dict[frozenset[tuple[str, Any]], list[dict | Entity]]:
     """
     builds a dict of lists of entities, combining entities based on group definitions
     :param entities: list of entities
@@ -49,19 +90,20 @@ def _build_groups_dict(
     return groups
 
 
-def _build_groups_list(entities: list[dict], group: list[str]) -> list[list[dict]]:
+def _build_groups_list(
+    entities: list[dict | Entity], group: list[str]
+) -> list[list[dict | Entity]]:
     """
     builds a list of lists of entities, combining entities based on group definitions
     :param entities: list of entities
     :param group: group definition
     :return:
     """
-
     return list(_build_groups_dict(entities=entities, group=group).values())
 
 
 async def _get_grouped_sample_async(
-    generator: AsyncGenerator[list[Entity], None],
+    generator: AsyncGenerator[list[dict], None],
 ) -> list[dict] | None:
     try:
         return await anext(generator)
@@ -79,8 +121,8 @@ def _get_grouped_sample(
 
 
 async def _sequential_iterator_async(
-    entities: list[dict], group: list[str]
-) -> AsyncGenerator[list[dict], None]:
+    entities: list[dict | Entity], group: list[str]
+) -> AsyncGenerator[list[dict | Entity], None]:
     """
     Sequential iterator through discovery space with grouping
     :param entities: list of entities
@@ -94,8 +136,8 @@ async def _sequential_iterator_async(
 
 
 def _sequential_iterator(
-    entities: list[dict], group: list[str]
-) -> Generator[list[dict], None, None]:
+    entities: list[dict | Entity], group: list[str]
+) -> Generator[list[dict | Entity], None, None]:
     """
     Sequential iterator through discovery space with grouping
     :param entities: list of entities
@@ -171,7 +213,11 @@ def _sequential_group_iterator(
                     done = True
                     break
             # Retrieve entity from the store
-            entity = discoverySpace.entity_for_point(sample[0])
+            if type(sample[0]) is dict:
+                entity = discoverySpace.entity_for_point(sample[0])
+            else:
+                # The sample is already an Entity
+                entity = sample[0]
             # append a new entity to batch
             batch.append(entity)
             # remove entity from samples
@@ -184,8 +230,8 @@ def _sequential_group_iterator(
 
 
 async def _sequential_group_iterator_async(
-    generator: AsyncGenerator[list[Entity], None],
-    discoverySpaceManager: DiscoverySpaceManager,
+    generator: AsyncGenerator[list[dict], None],
+    remoteDiscoverySpace: DiscoverySpaceManager,
     batchsize: int,
 ) -> AsyncGenerator[list[Entity], None]:
     """
@@ -210,8 +256,13 @@ async def _sequential_group_iterator_async(
                     # mark that we are done
                     done = True
                     break
-            # Retrieve entity from the store
-            entity = ray.get(discoverySpaceManager.entity_for_point(sample[0]))
+            if type(sample[0]) is dict:
+                # Retrieve entity from the store
+                entity = ray.get(
+                    remoteDiscoverySpace.entity_for_point.remote(sample[0])
+                )
+            else:
+                entity = sample[0]
             # append a new entity to batch
             batch.append(entity)
             # remove entity from samples
@@ -275,17 +326,21 @@ def entityIterator(
     ) -> Generator[list[Entity], None, None]:
         grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace)
         return _sequential_group_iterator(
-            generator=grouped_iterator, batchsize=batchsize
+            generator=grouped_iterator,
+            discoverySpace=discoverySpace,
+            batchsize=batchsize,
         )
 
     async def remoteEntityIterator(
         self, remoteDiscoverySpace: DiscoverySpaceManager, batchsize=1
     ) -> AsyncGenerator[list[Entity], None]:
-        grooped_iterator = await self.remoteEntityGroupIterator(
+        grouped_iterator = await self.remoteEntityGroupIterator(
             remoteDiscoverySpace=remoteDiscoverySpace
         )
         return _sequential_group_iterator_async(
-            generator=grooped_iterator, discbatchsize=batchsize
+            generator=grouped_iterator,
+            remoteDiscoverySpace=remoteDiscoverySpace,
+            batchsize=batchsize,
         )
 
 
@@ -340,7 +395,9 @@ def entityIterator(
     ) -> Generator[list[Entity], None, None]:
         grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace)
         return _sequential_group_iterator(
-            generator=grouped_iterator, batchsize=batchsize
+            generator=grouped_iterator,
+            discoverySpace=discoverySpace,
+            batchsize=batchsize,
         )
 
     async def remoteEntityIterator(
@@ -350,7 +407,9 @@ async def remoteEntityIterator(
             remoteDiscoverySpace=remoteDiscoverySpace
         )
         return _sequential_group_iterator_async(
-            generator=grouped_iterator, batchsize=batchsize
+            generator=grouped_iterator,
+            remoteDiscoverySpace=remoteDiscoverySpace,
+            batchsize=batchsize,
         )
 
 
@@ -375,35 +434,6 @@ def __init__(self, mode: WalkModeEnum, group: list[str]):
             f"Initializing ExplicitEntitySpaceGroupedGridSampleGenerator, group: {group}"
         )
 
-    def _get_remote_space_entities(
-        self, discoverySpaceActor: DiscoverySpaceManager
-    ) -> list[Entity]:
-        """
-        Building list of entities for a discovery space
-
-        :param discoverySpaceActor: discovery space actor
-        :return: list of entities
-        """
-        # get discovery space
-        # noinspection PyUnresolvedReferences
-        dspace = ray.get(discoverySpaceActor.discoverySpace.remote())
-        # build list of entities
-        return self._get_space_entities(discoverySpace=dspace)
-
-    def _get_space_entities(self, discoverySpace: DiscoverySpace) -> list[dict]:
-        """
-        Building list of entities for a discovery space
-
-        :param discoverySpace: discovery space
-        :return: list of entities
-        """
-        entity_space = discoverySpace.entitySpace
-        property_names = [c.identifier for c in entity_space.constitutiveProperties]
-        return [
-            dict(zip(property_names, point))
-            for point in entity_space.sequential_point_iterator()
-        ]
-
     def entityGroupIterator(
         self,
         discoverySpace: DiscoverySpace,
@@ -426,7 +456,7 @@ def entityGroupIterator(
                 f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entitySpace}"
             )
 
-        entities = self._get_space_entities(discoverySpace=discoverySpace)
+        entities = _get_space_entities(discoverySpace=discoverySpace)
 
         def iterator_closure() -> Generator[list[Entity], None, None]:
             def sequential_iterator() -> Generator[list[Entity], None, None]:
@@ -451,13 +481,11 @@ async def remoteEntityGroupIterator(
         """Returns an async iterator that returns groups of entities as defined by the instances group property"""
 
         async def iterator_closure(
-            spaceActor: DiscoverySpaceManager,
+            remoteDiscoverySpace: DiscoverySpaceManager,
         ) -> AsyncGenerator[list[Entity], None]:
 
             # noinspection PyUnresolvedReferences
-            entitySpace = await spaceActor.entitySpace.remote()
-            # noinspection PyUnresolvedReferences
-            measurementSpace = await spaceActor.measurementSpace.remote()
+            entitySpace = await remoteDiscoverySpace.entitySpace.remote()
 
             if not ExplicitEntitySpaceGroupedGridSampleGenerator.samplerCompatibleWithEntitySpace(
                 entitySpace=entitySpace
@@ -466,13 +494,9 @@ async def iterator_closure(
                     f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entitySpace}"
                 )
 
-            observedProperties = []
-            for experiment in measurementSpace.experiments:
-                observedProperties.extend(experiment.observedProperties)
-
             def sequential_iterator() -> AsyncGenerator[list[Entity], None]:
-                entities = self._get_remote_space_entities(
-                    discoverySpaceActor=spaceActor
+                entities = _get_remote_space_entities(
+                    remoteDiscoverySpace=remoteDiscoverySpace
                 )
                 return _sequential_iterator_async(
                     entities=entities,
@@ -480,8 +504,8 @@ def sequential_iterator() -> AsyncGenerator[list[Entity], None]:
                 )
 
             def random_iterator() -> AsyncGenerator[list[Entity], None]:
-                entities = self._get_remote_space_entities(
-                    discoverySpaceActor=spaceActor
+                entities = _get_remote_space_entities(
+                    remoteDiscoverySpace=remoteDiscoverySpace
                 )
                 return _random_iterator_async(
                     entities=entities,
@@ -501,8 +525,8 @@ def entityIterator(
         grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace)
         return _sequential_group_iterator(
             generator=grouped_iterator,
-            batchsize=batchsize,
             discoverySpace=discoverySpace,
+            batchsize=batchsize,
         )
 
     async def remoteEntityIterator(
@@ -512,5 +536,7 @@ async def remoteEntityIterator(
             remoteDiscoverySpace=remoteDiscoverySpace
         )
         return _sequential_group_iterator_async(
-            generator=grouped_iterator, batchsize=batchsize
+            generator=grouped_iterator,
+            remoteDiscoverySpace=remoteDiscoverySpace,
+            batchsize=batchsize,
         )
diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py
index 97de8203..f712c5f1 100644
--- a/tests/core/test_group_samplers.py
+++ b/tests/core/test_group_samplers.py
@@ -20,6 +20,7 @@
 from orchestrator.modules.operators.discovery_space_manager import (
     DiscoverySpaceManager,
 )
+from orchestrator.schema.entity import Entity
 from orchestrator.schema.entityspace import EntitySpaceRepresentation
 
 
@@ -49,14 +50,14 @@ def check_group_order(
     if isinstance(sampler, ExplicitEntitySpaceGroupedGridSampleGenerator):
         ids = [cp.identifier for cp in space.entitySpace.constitutiveProperties]
         entities = [
-            space.entity_for_point(dict(zip(ids, p)))
-            for p in space.entitySpace.sequential_point_iterator()
+            dict(zip(ids, p)) for p in space.entitySpace.sequential_point_iterator()
         ]
         groups = _build_groups_dict(entities=entities, group=group)
         expected_group_order = list(groups.keys())
         if sampler.mode == WalkModeEnum.RANDOM:
             assert group_order != expected_group_order
         else:
+
             assert group_order == expected_group_order
     else:
         entities = space.matchingEntities()
@@ -114,13 +115,23 @@ def test_group_sampler_local(
     for i, group in enumerate(sampler.entityGroupIterator(space)):
         count += len(group)
         for entity in group:
-            print(i, count, entity.identifier)
+            print(i, count, entity.identifier if isinstance(entity, Entity) else entity)
 
         node_value = {
-            e.valueForConstitutivePropertyIdentifier("nodes").value for e in group
+            (
+                e["nodes"]
+                if type(e) is dict
+                else e.valueForConstitutivePropertyIdentifier("nodes").value
+            )
+            for e in group
         }
         cpu_value = {
-            e.valueForConstitutivePropertyIdentifier("cpu_family").value for e in group
+            (
+                e["cpu_family"]
+                if type(e) is dict
+                else e.valueForConstitutivePropertyIdentifier("cpu_family").value
+            )
+            for e in group
         }
 
         assert (
@@ -215,10 +226,20 @@ async def test_group_sampler_remote(
         count += len(group)
         group_count += 1
         node_value = {
-            e.valueForConstitutivePropertyIdentifier("nodes").value for e in group
+            (
+                e["nodes"]
+                if type(e) is dict
+                else e.valueForConstitutivePropertyIdentifier("nodes").value
+            )
+            for e in group
         }
         cpu_value = {
-            e.valueForConstitutivePropertyIdentifier("cpu_family").value for e in group
+            (
+                e["cpu_family"]
+                if type(e) is dict
+                else e.valueForConstitutivePropertyIdentifier("cpu_family").value
+            )
+            for e in group
         }
 
         assert (

From 8fa538ac6347d65fed6aec14d44db6b9d5cb462d Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Thu, 20 Nov 2025 13:57:28 +0000
Subject: [PATCH 35/46] chore(group_sampler): Generic cleanup

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../core/discoveryspace/group_samplers.py     | 40 +++++--------------
 1 file changed, 11 insertions(+), 29 deletions(-)

diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py
index dad9617d..295f34f8 100644
--- a/orchestrator/core/discoveryspace/group_samplers.py
+++ b/orchestrator/core/discoveryspace/group_samplers.py
@@ -20,7 +20,7 @@
 moduleLog = logging.getLogger("groupsamplers")
 
 
-def _get_space_entities(discoverySpace: DiscoverySpace) -> list[dict]:
+def _get_space_points(discoverySpace: DiscoverySpace) -> list[dict]:
     """
     Building list of entities for a discovery space
 
@@ -35,22 +35,6 @@ def _get_space_entities(discoverySpace: DiscoverySpace) -> list[dict]:
     ]
 
 
-def _get_remote_space_entities(
-    remoteDiscoverySpace: DiscoverySpaceManager,
-) -> list[dict]:
-    """
-    Building list of entities for a discovery space
-
-    :param remoteDiscoverySpace: discovery space actor
-    :return: list of entities dict
-    """
-    # get discovery space
-    # noinspection PyUnresolvedReferences
-    dspace = ray.get(remoteDiscoverySpace.discoverySpace.remote())
-    # build list of entities
-    return _get_space_entities(discoverySpace=dspace)
-
-
 def _build_entity_group_values(
     entity: dict | Entity, group: list[str]
 ) -> frozenset[tuple[str, Any]]:
@@ -169,7 +153,8 @@ async def _random_iterator_async(
 
 
 def _random_iterator(
-    entities: list[dict], group: list[str]
+    entities: list[dict],
+    group: list[str],
 ) -> Generator[list[dict], None, None]:
     """
     Random iterator through discovery space with grouping
@@ -178,6 +163,7 @@ def _random_iterator(
     :return:
     """
     group_list = _build_groups_list(entities=entities, group=group)
+
     randomized = np.random.choice(
         a=range(len(group_list)), size=len(group_list), replace=False
     )
@@ -456,18 +442,18 @@ def entityGroupIterator(
                 f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entitySpace}"
             )
 
-        entities = _get_space_entities(discoverySpace=discoverySpace)
+        points = _get_space_points(discoverySpace=discoverySpace)
 
         def iterator_closure() -> Generator[list[Entity], None, None]:
             def sequential_iterator() -> Generator[list[Entity], None, None]:
-                return _sequential_iterator(entities=entities, group=self.group)
+                return _sequential_iterator(entities=points, group=self.group)
 
             def random_iterator() -> Generator[list[Entity], None, None]:
                 import time
 
                 now = time.perf_counter()
                 print(f"Getting all entities took {time.perf_counter() - now}")
-                return _random_iterator(entities=entities, group=self.group)
+                return _random_iterator(entities=points, group=self.group)
 
             if self.mode == WalkModeEnum.SEQUENTIAL:
                 return sequential_iterator()
@@ -486,6 +472,8 @@ async def iterator_closure(
 
             # noinspection PyUnresolvedReferences
             entitySpace = await remoteDiscoverySpace.entitySpace.remote()
+            discoverySpace = await remoteDiscoverySpace.discoverySpace.remote()
+            points = _get_space_points(discoverySpace=discoverySpace)
 
             if not ExplicitEntitySpaceGroupedGridSampleGenerator.samplerCompatibleWithEntitySpace(
                 entitySpace=entitySpace
@@ -495,20 +483,14 @@ async def iterator_closure(
                 )
 
             def sequential_iterator() -> AsyncGenerator[list[Entity], None]:
-                entities = _get_remote_space_entities(
-                    remoteDiscoverySpace=remoteDiscoverySpace
-                )
                 return _sequential_iterator_async(
-                    entities=entities,
+                    entities=points,
                     group=self.group,
                 )
 
             def random_iterator() -> AsyncGenerator[list[Entity], None]:
-                entities = _get_remote_space_entities(
-                    remoteDiscoverySpace=remoteDiscoverySpace
-                )
                 return _random_iterator_async(
-                    entities=entities,
+                    entities=points,
                     group=self.group,
                 )
 

From 3ed7ec603d88ab12a663d610c62713cad5aad803 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 21 Nov 2025 09:34:37 +0000
Subject: [PATCH 36/46] chore(group_sampler): Applied snake casing wherever
 possible

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../core/discoveryspace/group_samplers.py     | 98 +++++++++----------
 tests/core/test_group_samplers.py             |  3 +-
 2 files changed, 51 insertions(+), 50 deletions(-)

diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py
index 295f34f8..e142bb20 100644
--- a/orchestrator/core/discoveryspace/group_samplers.py
+++ b/orchestrator/core/discoveryspace/group_samplers.py
@@ -20,14 +20,14 @@
 moduleLog = logging.getLogger("groupsamplers")
 
 
-def _get_space_points(discoverySpace: DiscoverySpace) -> list[dict]:
+def _get_space_points(discovery_space: DiscoverySpace) -> list[dict]:
     """
     Building list of entities for a discovery space
 
-    :param discoverySpace: discovery space
+    :param discovery_space: discovery space
     :return: list of entities
     """
-    entity_space = discoverySpace.entitySpace
+    entity_space = discovery_space.entitySpace
     property_names = [c.identifier for c in entity_space.constitutiveProperties]
     return [
         dict(zip(property_names, point))
@@ -174,8 +174,8 @@ def _random_iterator(
 
 def _sequential_group_iterator(
     generator: Generator[list[dict], None, None],
-    discoverySpace: DiscoverySpace,
-    batchsize: int,
+    discovery_space: DiscoverySpace,
+    batch_size: int,
 ) -> Generator[list[Entity], None, None]:
     """
     Sequential group iterator
@@ -189,7 +189,7 @@ def _sequential_group_iterator(
     # loop while not done
     while not done:
         # loop through the batch size
-        for _ in range(batchsize):
+        for _ in range(batch_size):
             if len(sample) == 0:
                 # get the new group
                 sample = _get_grouped_sample(generator=generator)
@@ -200,7 +200,7 @@ def _sequential_group_iterator(
                     break
             # Retrieve entity from the store
             if type(sample[0]) is dict:
-                entity = discoverySpace.entity_for_point(sample[0])
+                entity = discovery_space.entity_for_point(sample[0])
             else:
                 # The sample is already an Entity
                 entity = sample[0]
@@ -217,8 +217,8 @@ def _sequential_group_iterator(
 
 async def _sequential_group_iterator_async(
     generator: AsyncGenerator[list[dict], None],
-    remoteDiscoverySpace: DiscoverySpaceManager,
-    batchsize: int,
+    remote_discovery_space: DiscoverySpaceManager,
+    batch_size: int,
 ) -> AsyncGenerator[list[Entity], None]:
     """
     Async sequential group iterator
@@ -232,7 +232,7 @@ async def _sequential_group_iterator_async(
     # loop while not done
     while not done:
         # loop through the batch size
-        for _ in range(batchsize):
+        for _ in range(batch_size):
             if len(sample) == 0:
                 # get the new group
                 sample = await _get_grouped_sample_async(generator=generator)
@@ -245,7 +245,7 @@ async def _sequential_group_iterator_async(
             if type(sample[0]) is dict:
                 # Retrieve entity from the store
                 entity = ray.get(
-                    remoteDiscoverySpace.entity_for_point.remote(sample[0])
+                    remote_discovery_space.entity_for_point.remote(sample[0])
                 )
             else:
                 entity = sample[0]
@@ -267,7 +267,7 @@ class SequentialGroupSampleSelector(GroupSampler):
 
     @classmethod
     def samplerCompatibleWithDiscoverySpaceRemote(
-        cls, remoteDiscoverySpace: DiscoverySpaceManager
+        cls, remote_discovery_space: DiscoverySpaceManager
     ):
         return True
 
@@ -280,9 +280,9 @@ def __init__(self, group: list[str]):
 
     def entityGroupIterator(
         self,
-        discoverySpace: DiscoverySpace,
+        discovery_space: DiscoverySpace,
     ) -> Generator[list[Entity], None, None]:
-        """Returns an iterator  that samples groups of entities from a discoveryspace
+        """Returns an iterator  that samples groups of entities from a discovery space
 
         The group definition should be specified on initializing an instance of a subclass of this class
 
@@ -290,9 +290,9 @@ def entityGroupIterator(
         the number of members of the associated group
 
         Parameters:
-            discoverySpace: An orchestrator.model.space.DiscoverySpace instance
+            discovery_space: An orchestrator.model.space.DiscoverySpace instance
         """
-        entities = discoverySpace.matchingEntities()
+        entities = discovery_space.matchingEntities()
         return _sequential_iterator(entities=entities, group=self.group)
 
     async def remoteEntityGroupIterator(
@@ -310,11 +310,11 @@ async def iterator_closure():
     def entityIterator(
         self, discoverySpace: DiscoverySpace, batchsize=1
     ) -> Generator[list[Entity], None, None]:
-        grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace)
+        grouped_iterator = self.entityGroupIterator(discovery_space=discoverySpace)
         return _sequential_group_iterator(
             generator=grouped_iterator,
-            discoverySpace=discoverySpace,
-            batchsize=batchsize,
+            discovery_space=discoverySpace,
+            batch_size=batchsize,
         )
 
     async def remoteEntityIterator(
@@ -325,8 +325,8 @@ async def remoteEntityIterator(
         )
         return _sequential_group_iterator_async(
             generator=grouped_iterator,
-            remoteDiscoverySpace=remoteDiscoverySpace,
-            batchsize=batchsize,
+            remote_discovery_space=remoteDiscoverySpace,
+            batch_size=batchsize,
         )
 
 
@@ -352,7 +352,7 @@ def entityGroupIterator(
         self,
         discoverySpace: DiscoverySpace,
     ) -> Generator[list[Entity], None, None]:
-        """Returns an iterator  that samples groups of entities from a discoveryspace
+        """Returns an iterator  that samples groups of entities from a discovery space
 
         The group definition should be specified on initializing an instance of a subclass of this class
 
@@ -360,7 +360,7 @@ def entityGroupIterator(
         the number of members of the associated group
 
         Parameters:
-            discoverySpace: An orchestrator.model.space.DiscoverySpace instance
+            discovery_space: An orchestrator.model.space.DiscoverySpace instance
         """
         entities = discoverySpace.matchingEntities()
         return _random_iterator(entities=entities, group=self.group)
@@ -369,12 +369,12 @@ async def remoteEntityGroupIterator(
         self, remoteDiscoverySpace: DiscoverySpaceManager
     ) -> AsyncGenerator[list[Entity], None]:
         async def iterator_closure(
-            stateHandle: DiscoverySpaceManager,
+            state_handle: DiscoverySpaceManager,
         ):
-            entities = await stateHandle.matchingEntitiesInSource.remote()
+            entities = await state_handle.matchingEntitiesInSource.remote()
             return _random_iterator_async(entities=entities, group=self.group)
 
-        return await iterator_closure(stateHandle=remoteDiscoverySpace)
+        return await iterator_closure(state_handle=remoteDiscoverySpace)
 
     def entityIterator(
         self, discoverySpace: DiscoverySpace, batchsize=1
@@ -382,8 +382,8 @@ def entityIterator(
         grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace)
         return _sequential_group_iterator(
             generator=grouped_iterator,
-            discoverySpace=discoverySpace,
-            batchsize=batchsize,
+            discovery_space=discoverySpace,
+            batch_size=batchsize,
         )
 
     async def remoteEntityIterator(
@@ -394,8 +394,8 @@ async def remoteEntityIterator(
         )
         return _sequential_group_iterator_async(
             generator=grouped_iterator,
-            remoteDiscoverySpace=remoteDiscoverySpace,
-            batchsize=batchsize,
+            remote_discovery_space=remoteDiscoverySpace,
+            batch_size=batchsize,
         )
 
 
@@ -422,27 +422,27 @@ def __init__(self, mode: WalkModeEnum, group: list[str]):
 
     def entityGroupIterator(
         self,
-        discoverySpace: DiscoverySpace,
+        discovery_space: DiscoverySpace,
     ) -> Generator[list[dict], None, None]:
-        """Returns an iterator  that samples groups of entities from a discoveryspace
+        """Returns an iterator  that samples groups of entities from a discovery space
 
         Note: The number of entities returned on each call to the iterator can vary as it depends on
         the number of members of the associated group
 
         Parameters:
-            discoverySpace: An orchestrator.model.space.DiscoverySpace instance
+            discovery_space: An orchestrator.model.space.DiscoverySpace instance
         """
 
-        entitySpace = discoverySpace.entitySpace
+        entity_space = discovery_space.entitySpace
 
         if not ExplicitEntitySpaceGroupedGridSampleGenerator.samplerCompatibleWithEntitySpace(
-            entitySpace=entitySpace
+            entity_space
         ):
             raise ValueError(
-                f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entitySpace}"
+                f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entity_space}"
             )
 
-        points = _get_space_points(discoverySpace=discoverySpace)
+        points = _get_space_points(discovery_space=discovery_space)
 
         def iterator_closure() -> Generator[list[Entity], None, None]:
             def sequential_iterator() -> Generator[list[Entity], None, None]:
@@ -467,19 +467,19 @@ async def remoteEntityGroupIterator(
         """Returns an async iterator that returns groups of entities as defined by the instances group property"""
 
         async def iterator_closure(
-            remoteDiscoverySpace: DiscoverySpaceManager,
+            remote_discovery_space: DiscoverySpaceManager,
         ) -> AsyncGenerator[list[Entity], None]:
 
             # noinspection PyUnresolvedReferences
-            entitySpace = await remoteDiscoverySpace.entitySpace.remote()
-            discoverySpace = await remoteDiscoverySpace.discoverySpace.remote()
-            points = _get_space_points(discoverySpace=discoverySpace)
+            entity_space = await remote_discovery_space.entitySpace.remote()
+            discovery_space = await remote_discovery_space.discoverySpace.remote()
+            points = _get_space_points(discovery_space=discovery_space)
 
             if not ExplicitEntitySpaceGroupedGridSampleGenerator.samplerCompatibleWithEntitySpace(
-                entitySpace=entitySpace
+                entity_space
             ):
                 raise ValueError(
-                    f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entitySpace}"
+                    f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entity_space}"
                 )
 
             def sequential_iterator() -> AsyncGenerator[list[Entity], None]:
@@ -501,14 +501,14 @@ def random_iterator() -> AsyncGenerator[list[Entity], None]:
         return await iterator_closure(remoteDiscoverySpace)
 
     def entityIterator(
-        self, discoverySpace: DiscoverySpace, batchsize=1
+        self, discovery_space: DiscoverySpace, batchsize=1
     ) -> Generator[list[Entity], None, None]:
         """Returns an iterator over a sequence of entities ordered by group"""
-        grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace)
+        grouped_iterator = self.entityGroupIterator(discovery_space=discovery_space)
         return _sequential_group_iterator(
             generator=grouped_iterator,
-            discoverySpace=discoverySpace,
-            batchsize=batchsize,
+            discovery_space=discovery_space,
+            batch_size=batchsize,
         )
 
     async def remoteEntityIterator(
@@ -519,6 +519,6 @@ async def remoteEntityIterator(
         )
         return _sequential_group_iterator_async(
             generator=grouped_iterator,
-            remoteDiscoverySpace=remoteDiscoverySpace,
-            batchsize=batchsize,
+            remote_discovery_space=remoteDiscoverySpace,
+            batch_size=batchsize,
         )
diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py
index f712c5f1..3e153da4 100644
--- a/tests/core/test_group_samplers.py
+++ b/tests/core/test_group_samplers.py
@@ -291,7 +291,8 @@ async def test_group_sampler_sequential_remote(
     assert RandomGroupSampleSelector.samplerCompatibleWithDiscoverySpaceRemote(manager)
 
     iterator = await sampler.remoteEntityIterator(
-        remoteDiscoverySpace=manager, batchsize=5
+        remoteDiscoverySpace=manager,
+        batchsize=5,
     )
 
     count = 0

From 21859059c5276e4a86e67345ab0324fe8f2351ba Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 21 Nov 2025 09:49:08 +0000
Subject: [PATCH 37/46] chore(group_sampler): Addressed review comments

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../core/discoveryspace/group_samplers.py     | 26 ++++++++-----------
 tests/core/test_group_samplers.py             |  8 +++---
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py
index e142bb20..4b411d95 100644
--- a/orchestrator/core/discoveryspace/group_samplers.py
+++ b/orchestrator/core/discoveryspace/group_samplers.py
@@ -22,10 +22,10 @@
 
 def _get_space_points(discovery_space: DiscoverySpace) -> list[dict]:
     """
-    Building list of entities for a discovery space
+    Building list of points for a discovery space
 
     :param discovery_space: discovery space
-    :return: list of entities
+    :return: list of points
     """
     entity_space = discovery_space.entitySpace
     property_names = [c.identifier for c in entity_space.constitutiveProperties]
@@ -199,7 +199,7 @@ def _sequential_group_iterator(
                     done = True
                     break
             # Retrieve entity from the store
-            if type(sample[0]) is dict:
+            if isinstance(sample[0], dict):
                 entity = discovery_space.entity_for_point(sample[0])
             else:
                 # The sample is already an Entity
@@ -242,7 +242,7 @@ async def _sequential_group_iterator_async(
                     # mark that we are done
                     done = True
                     break
-            if type(sample[0]) is dict:
+            if isinstance(sample[0], dict):
                 # Retrieve entity from the store
                 entity = ray.get(
                     remote_discovery_space.entity_for_point.remote(sample[0])
@@ -280,7 +280,7 @@ def __init__(self, group: list[str]):
 
     def entityGroupIterator(
         self,
-        discovery_space: DiscoverySpace,
+        discoverySpace: DiscoverySpace,
     ) -> Generator[list[Entity], None, None]:
         """Returns an iterator  that samples groups of entities from a discovery space
 
@@ -292,7 +292,7 @@ def entityGroupIterator(
         Parameters:
             discovery_space: An orchestrator.model.space.DiscoverySpace instance
         """
-        entities = discovery_space.matchingEntities()
+        entities = discoverySpace.matchingEntities()
         return _sequential_iterator(entities=entities, group=self.group)
 
     async def remoteEntityGroupIterator(
@@ -310,7 +310,7 @@ async def iterator_closure():
     def entityIterator(
         self, discoverySpace: DiscoverySpace, batchsize=1
     ) -> Generator[list[Entity], None, None]:
-        grouped_iterator = self.entityGroupIterator(discovery_space=discoverySpace)
+        grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace)
         return _sequential_group_iterator(
             generator=grouped_iterator,
             discovery_space=discoverySpace,
@@ -422,7 +422,7 @@ def __init__(self, mode: WalkModeEnum, group: list[str]):
 
     def entityGroupIterator(
         self,
-        discovery_space: DiscoverySpace,
+        discoverySpace: DiscoverySpace,
     ) -> Generator[list[dict], None, None]:
         """Returns an iterator  that samples groups of entities from a discovery space
 
@@ -433,7 +433,7 @@ def entityGroupIterator(
             discovery_space: An orchestrator.model.space.DiscoverySpace instance
         """
 
-        entity_space = discovery_space.entitySpace
+        entity_space = discoverySpace.entitySpace
 
         if not ExplicitEntitySpaceGroupedGridSampleGenerator.samplerCompatibleWithEntitySpace(
             entity_space
@@ -442,17 +442,13 @@ def entityGroupIterator(
                 f"Cannot use ExplicitEntitySpaceGroupedGridSampleGenerator with {entity_space}"
             )
 
-        points = _get_space_points(discovery_space=discovery_space)
+        points = _get_space_points(discovery_space=discoverySpace)
 
         def iterator_closure() -> Generator[list[Entity], None, None]:
             def sequential_iterator() -> Generator[list[Entity], None, None]:
                 return _sequential_iterator(entities=points, group=self.group)
 
             def random_iterator() -> Generator[list[Entity], None, None]:
-                import time
-
-                now = time.perf_counter()
-                print(f"Getting all entities took {time.perf_counter() - now}")
                 return _random_iterator(entities=points, group=self.group)
 
             if self.mode == WalkModeEnum.SEQUENTIAL:
@@ -504,7 +500,7 @@ def entityIterator(
         self, discovery_space: DiscoverySpace, batchsize=1
     ) -> Generator[list[Entity], None, None]:
         """Returns an iterator over a sequence of entities ordered by group"""
-        grouped_iterator = self.entityGroupIterator(discovery_space=discovery_space)
+        grouped_iterator = self.entityGroupIterator(discoverySpace=discovery_space)
         return _sequential_group_iterator(
             generator=grouped_iterator,
             discovery_space=discovery_space,
diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py
index 3e153da4..5b91bb27 100644
--- a/tests/core/test_group_samplers.py
+++ b/tests/core/test_group_samplers.py
@@ -120,7 +120,7 @@ def test_group_sampler_local(
         node_value = {
             (
                 e["nodes"]
-                if type(e) is dict
+                if isinstance(e, dict)
                 else e.valueForConstitutivePropertyIdentifier("nodes").value
             )
             for e in group
@@ -128,7 +128,7 @@ def test_group_sampler_local(
         cpu_value = {
             (
                 e["cpu_family"]
-                if type(e) is dict
+                if isinstance(e, dict)
                 else e.valueForConstitutivePropertyIdentifier("cpu_family").value
             )
             for e in group
@@ -228,7 +228,7 @@ async def test_group_sampler_remote(
         node_value = {
             (
                 e["nodes"]
-                if type(e) is dict
+                if isinstance(e, dict)
                 else e.valueForConstitutivePropertyIdentifier("nodes").value
             )
             for e in group
@@ -236,7 +236,7 @@ async def test_group_sampler_remote(
         cpu_value = {
             (
                 e["cpu_family"]
-                if type(e) is dict
+                if isinstance(e, dict)
                 else e.valueForConstitutivePropertyIdentifier("cpu_family").value
             )
             for e in group

From 665bde0c87e6ad5bbe0eb5e8d452509da077171e Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 21 Nov 2025 10:29:06 +0000
Subject: [PATCH 38/46] chore(group_sampler): Reverted wrong conversion to
 snake casing

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/core/discoveryspace/group_samplers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py
index 4b411d95..15569e56 100644
--- a/orchestrator/core/discoveryspace/group_samplers.py
+++ b/orchestrator/core/discoveryspace/group_samplers.py
@@ -497,13 +497,13 @@ def random_iterator() -> AsyncGenerator[list[Entity], None]:
         return await iterator_closure(remoteDiscoverySpace)
 
     def entityIterator(
-        self, discovery_space: DiscoverySpace, batchsize=1
+        self, discoverySpace: DiscoverySpace, batchsize=1
     ) -> Generator[list[Entity], None, None]:
         """Returns an iterator over a sequence of entities ordered by group"""
-        grouped_iterator = self.entityGroupIterator(discoverySpace=discovery_space)
+        grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace)
         return _sequential_group_iterator(
             generator=grouped_iterator,
-            discovery_space=discovery_space,
+            discovery_space=discoverySpace,
             batch_size=batchsize,
         )
 

From 0a9730ca0485bacf0aab06418ad16467772054f8 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 21 Nov 2025 14:45:05 +0000
Subject: [PATCH 39/46] chore(group_sampler): Simplified handling of points for
 creating groups

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../core/discoveryspace/group_samplers.py     | 247 +++++++++++-------
 tests/core/test_group_samplers.py             |  12 +-
 2 files changed, 155 insertions(+), 104 deletions(-)

diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py
index 15569e56..da201f16 100644
--- a/orchestrator/core/discoveryspace/group_samplers.py
+++ b/orchestrator/core/discoveryspace/group_samplers.py
@@ -6,7 +6,6 @@
 from typing import Any
 
 import numpy as np
-import ray
 
 from orchestrator.core.discoveryspace.samplers import (
     ExplicitEntitySpaceGridSampleGenerator,
@@ -35,29 +34,64 @@ def _get_space_points(discovery_space: DiscoverySpace) -> list[dict]:
     ]
 
 
-def _build_entity_group_values(
-    entity: dict | Entity, group: list[str]
+def _get_space_matching_points(discovery_space: DiscoverySpace) -> list[dict]:
+    """
+    Building list of points from the matching entities for a discovery space
+
+    :param discovery_space: discovery space
+    :return: list of points
+    """
+    matching_entities = discovery_space.matchingEntities()
+    points = []
+    for entity in matching_entities:
+        point = {
+            v.property.identifier: v.value for v in entity.constitutive_property_values
+        }
+        # we save the entity identifier to retrieve the entity from the store at a later stage,
+        # assuming they are cached locally
+        point["entity_identifier"] = entity.identifier
+        points.append(point)
+
+    return points
+
+
+def _entity_for_point(point: dict, discovery_space: DiscoverySpace) -> list[Entity]:
+    if "entity_identifier" in point:
+        entity = discovery_space.sample_store.entityWithIdentifier(
+            entityIdentifier=point["entity_identifier"]
+        )
+    else:
+        entity = discovery_space.entity_for_point(point)
+
+    return entity
+
+
+async def _entity_for_point_async(
+    point: dict, remote_discovery_space: DiscoverySpaceManager
+) -> list[Entity]:
+    if "entity_identifier" in point:
+        entity = await remote_discovery_space.storedEntityWithIdentifier.remote(
+            entityIdentifier=point["entity_identifier"]
+        )
+    else:
+        entity = await remote_discovery_space.entity_for_point.remote(point)
+
+    return entity
+
+
+def _build_point_group_values(
+    point: dict, group: list[str]
 ) -> frozenset[tuple[str, Any]]:
     """
     :return: A frozen set of (key,value) pairs
     """
-    # build a dictionary of entity values given the group
-    if isinstance(entity, Entity):
-        return frozenset(
-            {
-                (v.property.identifier, v.value)
-                for v in entity.constitutive_property_values
-                if v.property.identifier in group
-            }
-        )
-    # We get a dict in case of generator type of samplers where we defer fetching the
-    # full entity from store until it is returned by the iterator.
-    return frozenset({(k, v) for k, v in entity.items() if k in group})
+
+    return frozenset({(k, v) for k, v in point.items() if k in group})
 
 
 def _build_groups_dict(
-    entities: list[dict | Entity], group: list[str]
-) -> dict[frozenset[tuple[str, Any]], list[dict | Entity]]:
+    points: list[dict], group: list[str]
+) -> dict[frozenset[tuple[str, Any]], list[Entity]]:
     """
     builds a dict of lists of entities, combining entities based on group definitions
     :param entities: list of entities
@@ -65,30 +99,28 @@ def _build_groups_dict(
     :return: A dictionary whose keys are groups and whose values are list of entities
     """
     groups = {}
-    for element in entities:
-        grp = _build_entity_group_values(entity=element, group=group)
+    for point in points:
+        grp = _build_point_group_values(point=point, group=group)
         lst = groups.get(grp, [])
-        lst.append(element)
+        lst.append(point)
         groups[grp] = lst
 
     return groups
 
 
-def _build_groups_list(
-    entities: list[dict | Entity], group: list[str]
-) -> list[list[dict | Entity]]:
+def _build_groups_list(points: list[dict], group: list[str]) -> list[list[dict]]:
     """
-    builds a list of lists of entities, combining entities based on group definitions
-    :param entities: list of entities
+    builds a list of lists of points, combining entities based on group definitions
+    :param points: list of points
     :param group: group definition
     :return:
     """
-    return list(_build_groups_dict(entities=entities, group=group).values())
+    return list(_build_groups_dict(points=points, group=group).values())
 
 
 async def _get_grouped_sample_async(
-    generator: AsyncGenerator[list[dict], None],
-) -> list[dict] | None:
+    generator: AsyncGenerator[list[Entity], None],
+) -> list[Entity] | None:
     try:
         return await anext(generator)
     except (StopAsyncIteration, StopIteration):
@@ -97,7 +129,7 @@ async def _get_grouped_sample_async(
 
 def _get_grouped_sample(
     generator: Generator[list[Entity], None, None],
-) -> list[dict] | None:
+) -> list[Entity] | None:
     try:
         return next(generator)
     except (StopAsyncIteration, StopIteration):
@@ -105,82 +137,104 @@ def _get_grouped_sample(
 
 
 async def _sequential_iterator_async(
-    entities: list[dict | Entity], group: list[str]
-) -> AsyncGenerator[list[dict | Entity], None]:
+    points: list[dict],
+    group: list[str],
+    remote_discovery_space=DiscoverySpaceManager,
+) -> AsyncGenerator[list[Entity], None]:
     """
     Sequential iterator through discovery space with grouping
-    :param entities: list of entities
+    :param points: list of points
     :param group: group definition
     :return:
     """
-    group_list = _build_groups_list(entities=entities, group=group)
+    group_list = _build_groups_list(points=points, group=group)
     for i in range(len(group_list)):
-        lst = group_list[i]
+        entity_list = [
+            await _entity_for_point_async(
+                point=point, remote_discovery_space=remote_discovery_space
+            )
+            for point in group_list[i]
+        ]
+        lst = entity_list
         yield lst
 
 
 def _sequential_iterator(
-    entities: list[dict | Entity], group: list[str]
-) -> Generator[list[dict | Entity], None, None]:
+    points: list[dict],
+    group: list[str],
+    discovery_space: DiscoverySpace,
+) -> Generator[list[Entity], None, None]:
     """
     Sequential iterator through discovery space with grouping
-    :param entities: list of entities
+    :param points: list of points
     :param group: group definition
     :return:
     """
-    group_list = _build_groups_list(entities=entities, group=group)
+    group_list = _build_groups_list(points=points, group=group)
     for i in range(len(group_list)):
-        lst = group_list[i]
-        yield lst
+        entity_list = [
+            _entity_for_point(point=point, discovery_space=discovery_space)
+            for point in group_list[i]
+        ]
+        yield entity_list
 
 
 async def _random_iterator_async(
-    entities: list[dict], group: list[str]
-) -> AsyncGenerator[list[dict], None]:
+    points: list[dict],
+    group: list[str],
+    remote_discovery_space: DiscoverySpaceManager,
+) -> AsyncGenerator[list[Entity], None]:
     """
     Random iterator through discovery space with grouping
-    :param entities: list of entities
+    :param points: list of points
     :param group: group definition
     :return:
     """
-    group_list = _build_groups_list(entities=entities, group=group)
+    group_list = _build_groups_list(points=points, group=group)
     randomized = np.random.choice(
         a=range(len(group_list)), size=len(group_list), replace=False
     )
     for i in range(len(randomized)):
-        lst = group_list[randomized[i]]
-        yield lst
+        entity_list = [
+            await _entity_for_point_async(
+                remote_discovery_space=remote_discovery_space, point=point
+            )
+            for point in group_list[randomized[i]]
+        ]
+        yield entity_list
 
 
 def _random_iterator(
-    entities: list[dict],
+    points: list[dict],
     group: list[str],
-) -> Generator[list[dict], None, None]:
+    discovery_space: DiscoverySpace,
+) -> Generator[list[Entity], None, None]:
     """
     Random iterator through discovery space with grouping
-    :param entities: list of entities
+    :param points: list of points
     :param group: group definition
     :return:
     """
-    group_list = _build_groups_list(entities=entities, group=group)
-
+    group_list = _build_groups_list(points=points, group=group)
     randomized = np.random.choice(
         a=range(len(group_list)), size=len(group_list), replace=False
     )
     for i in range(len(randomized)):
-        lst = group_list[randomized[i]]
-        yield lst
+        entity_list = [
+            _entity_for_point(point=point, discovery_space=discovery_space)
+            for point in group_list[randomized[i]]
+        ]
+        yield entity_list
 
 
 def _sequential_group_iterator(
     generator: Generator[list[dict], None, None],
-    discovery_space: DiscoverySpace,
     batch_size: int,
 ) -> Generator[list[Entity], None, None]:
     """
     Sequential group iterator
     :param generator: grouped iterator
-    :param batchsize: batch size
+    :param batch_size: batch size
     :return:
     """
     sample = []
@@ -198,16 +252,10 @@ def _sequential_group_iterator(
                     # mark that we are done and break
                     done = True
                     break
-            # Retrieve entity from the store
-            if isinstance(sample[0], dict):
-                entity = discovery_space.entity_for_point(sample[0])
-            else:
-                # The sample is already an Entity
-                entity = sample[0]
-            # append a new entity to batch
+
+            # append a new entity to the batch
+            entity = sample.pop(0)
             batch.append(entity)
-            # remove entity from samples
-            sample = sample[1:]
         # submit a batch and clean it up
         # The last batch may be empty - if so don't return it
         if batch:
@@ -217,13 +265,12 @@ def _sequential_group_iterator(
 
 async def _sequential_group_iterator_async(
     generator: AsyncGenerator[list[dict], None],
-    remote_discovery_space: DiscoverySpaceManager,
     batch_size: int,
 ) -> AsyncGenerator[list[Entity], None]:
     """
     Async sequential group iterator
     :param generator: grouped iterator
-    :param batchsize: batch size
+    :param batch_size: batch size
     :return:
     """
     sample = []
@@ -242,17 +289,10 @@ async def _sequential_group_iterator_async(
                     # mark that we are done
                     done = True
                     break
-            if isinstance(sample[0], dict):
-                # Retrieve entity from the store
-                entity = ray.get(
-                    remote_discovery_space.entity_for_point.remote(sample[0])
-                )
-            else:
-                entity = sample[0]
-            # append a new entity to batch
+
+            # append a new entity to the batch
+            entity = sample.pop(0)
             batch.append(entity)
-            # remove entity from samples
-            sample = sample[1:]
         # submit a batch and clean it up
         # The last batch may be empty - if so don't return it
         if batch:
@@ -282,7 +322,7 @@ def entityGroupIterator(
         self,
         discoverySpace: DiscoverySpace,
     ) -> Generator[list[Entity], None, None]:
-        """Returns an iterator  that samples groups of entities from a discovery space
+        """Returns an iterator that samples groups of entities from a discovery space
 
         The group definition should be specified on initializing an instance of a subclass of this class
 
@@ -292,17 +332,21 @@ def entityGroupIterator(
         Parameters:
             discovery_space: An orchestrator.model.space.DiscoverySpace instance
         """
-        entities = discoverySpace.matchingEntities()
-        return _sequential_iterator(entities=entities, group=self.group)
+        points = _get_space_matching_points(discovery_space=discoverySpace)
+        return _sequential_iterator(
+            points=points, group=self.group, discovery_space=discoverySpace
+        )
 
     async def remoteEntityGroupIterator(
         self, remoteDiscoverySpace: DiscoverySpaceManager
     ) -> AsyncGenerator[list[Entity], None]:
         async def iterator_closure():
-            entities = await remoteDiscoverySpace.matchingEntitiesInSource.remote()
+            discovery_space = await remoteDiscoverySpace.discoverySpace.remote()
+            points = _get_space_matching_points(discovery_space=discovery_space)
             return _sequential_iterator_async(
-                entities=entities,
+                points=points,
                 group=self.group,
+                remote_discovery_space=remoteDiscoverySpace,
             )
 
         return await iterator_closure()
@@ -313,7 +357,6 @@ def entityIterator(
         grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace)
         return _sequential_group_iterator(
             generator=grouped_iterator,
-            discovery_space=discoverySpace,
             batch_size=batchsize,
         )
 
@@ -325,7 +368,6 @@ async def remoteEntityIterator(
         )
         return _sequential_group_iterator_async(
             generator=grouped_iterator,
-            remote_discovery_space=remoteDiscoverySpace,
             batch_size=batchsize,
         )
 
@@ -352,7 +394,7 @@ def entityGroupIterator(
         self,
         discoverySpace: DiscoverySpace,
     ) -> Generator[list[Entity], None, None]:
-        """Returns an iterator  that samples groups of entities from a discovery space
+        """Returns an iterator that samples groups of entities from a discovery space
 
         The group definition should be specified on initializing an instance of a subclass of this class
 
@@ -362,8 +404,10 @@ def entityGroupIterator(
         Parameters:
             discovery_space: An orchestrator.model.space.DiscoverySpace instance
         """
-        entities = discoverySpace.matchingEntities()
-        return _random_iterator(entities=entities, group=self.group)
+        points = _get_space_matching_points(discovery_space=discoverySpace)
+        return _random_iterator(
+            points=points, group=self.group, discovery_space=discoverySpace
+        )
 
     async def remoteEntityGroupIterator(
         self, remoteDiscoverySpace: DiscoverySpaceManager
@@ -371,8 +415,13 @@ async def remoteEntityGroupIterator(
         async def iterator_closure(
             state_handle: DiscoverySpaceManager,
         ):
-            entities = await state_handle.matchingEntitiesInSource.remote()
-            return _random_iterator_async(entities=entities, group=self.group)
+            discovery_space = await state_handle.discoverySpace.remote()
+            points = _get_space_matching_points(discovery_space=discovery_space)
+            return _random_iterator_async(
+                points=points,
+                group=self.group,
+                remote_discovery_space=remoteDiscoverySpace,
+            )
 
         return await iterator_closure(state_handle=remoteDiscoverySpace)
 
@@ -382,7 +431,6 @@ def entityIterator(
         grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace)
         return _sequential_group_iterator(
             generator=grouped_iterator,
-            discovery_space=discoverySpace,
             batch_size=batchsize,
         )
 
@@ -394,7 +442,6 @@ async def remoteEntityIterator(
         )
         return _sequential_group_iterator_async(
             generator=grouped_iterator,
-            remote_discovery_space=remoteDiscoverySpace,
             batch_size=batchsize,
         )
 
@@ -423,8 +470,8 @@ def __init__(self, mode: WalkModeEnum, group: list[str]):
     def entityGroupIterator(
         self,
         discoverySpace: DiscoverySpace,
-    ) -> Generator[list[dict], None, None]:
-        """Returns an iterator  that samples groups of entities from a discovery space
+    ) -> Generator[list[Entity], None, None]:
+        """Returns an iterator that samples groups of entities from a discovery space
 
         Note: The number of entities returned on each call to the iterator can vary as it depends on
         the number of members of the associated group
@@ -446,10 +493,14 @@ def entityGroupIterator(
 
         def iterator_closure() -> Generator[list[Entity], None, None]:
             def sequential_iterator() -> Generator[list[Entity], None, None]:
-                return _sequential_iterator(entities=points, group=self.group)
+                return _sequential_iterator(
+                    points=points, group=self.group, discovery_space=discoverySpace
+                )
 
             def random_iterator() -> Generator[list[Entity], None, None]:
-                return _random_iterator(entities=points, group=self.group)
+                return _random_iterator(
+                    points=points, group=self.group, discovery_space=discoverySpace
+                )
 
             if self.mode == WalkModeEnum.SEQUENTIAL:
                 return sequential_iterator()
@@ -480,14 +531,16 @@ async def iterator_closure(
 
             def sequential_iterator() -> AsyncGenerator[list[Entity], None]:
                 return _sequential_iterator_async(
-                    entities=points,
+                    points=points,
                     group=self.group,
+                    remote_discovery_space=remoteDiscoverySpace,
                 )
 
             def random_iterator() -> AsyncGenerator[list[Entity], None]:
                 return _random_iterator_async(
-                    entities=points,
+                    points=points,
                     group=self.group,
+                    remote_discovery_space=remoteDiscoverySpace,
                 )
 
             if self.mode == WalkModeEnum.SEQUENTIAL:
@@ -503,7 +556,6 @@ def entityIterator(
         grouped_iterator = self.entityGroupIterator(discoverySpace=discoverySpace)
         return _sequential_group_iterator(
             generator=grouped_iterator,
-            discovery_space=discoverySpace,
             batch_size=batchsize,
         )
 
@@ -515,6 +567,5 @@ async def remoteEntityIterator(
         )
         return _sequential_group_iterator_async(
             generator=grouped_iterator,
-            remote_discovery_space=remoteDiscoverySpace,
             batch_size=batchsize,
         )
diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py
index 5b91bb27..fccf6325 100644
--- a/tests/core/test_group_samplers.py
+++ b/tests/core/test_group_samplers.py
@@ -10,6 +10,7 @@
     RandomGroupSampleSelector,
     SequentialGroupSampleSelector,
     _build_groups_dict,
+    _get_space_matching_points,
 )
 from orchestrator.core.discoveryspace.samplers import (
     GroupSampler,
@@ -20,7 +21,6 @@
 from orchestrator.modules.operators.discovery_space_manager import (
     DiscoverySpaceManager,
 )
-from orchestrator.schema.entity import Entity
 from orchestrator.schema.entityspace import EntitySpaceRepresentation
 
 
@@ -49,10 +49,10 @@ def check_group_order(
 
     if isinstance(sampler, ExplicitEntitySpaceGroupedGridSampleGenerator):
         ids = [cp.identifier for cp in space.entitySpace.constitutiveProperties]
-        entities = [
+        points = [
             dict(zip(ids, p)) for p in space.entitySpace.sequential_point_iterator()
         ]
-        groups = _build_groups_dict(entities=entities, group=group)
+        groups = _build_groups_dict(points=points, group=group)
         expected_group_order = list(groups.keys())
         if sampler.mode == WalkModeEnum.RANDOM:
             assert group_order != expected_group_order
@@ -60,8 +60,8 @@ def check_group_order(
 
             assert group_order == expected_group_order
     else:
-        entities = space.matchingEntities()
-        groups = _build_groups_dict(entities=entities, group=group)
+        points = _get_space_matching_points(discovery_space=space)
+        groups = _build_groups_dict(points=points, group=group)
         expected_group_order = list(groups.keys())
         if isinstance(sampler, SequentialGroupSampleSelector):
             assert group_order == expected_group_order
@@ -115,7 +115,7 @@ def test_group_sampler_local(
     for i, group in enumerate(sampler.entityGroupIterator(space)):
         count += len(group)
         for entity in group:
-            print(i, count, entity.identifier if isinstance(entity, Entity) else entity)
+            print(i, count, entity)
 
         node_value = {
             (

From 95f39dacdbc1452af7ff590de05e449dda766a17 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Fri, 21 Nov 2025 15:00:03 +0000
Subject: [PATCH 40/46] chore(group_sampler): Some leftover types and
 snake_cases

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/core/discoveryspace/group_samplers.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py
index da201f16..40397ac1 100644
--- a/orchestrator/core/discoveryspace/group_samplers.py
+++ b/orchestrator/core/discoveryspace/group_samplers.py
@@ -307,7 +307,7 @@ class SequentialGroupSampleSelector(GroupSampler):
 
     @classmethod
     def samplerCompatibleWithDiscoverySpaceRemote(
-        cls, remote_discovery_space: DiscoverySpaceManager
+        cls, remoteDiscoverySpace: DiscoverySpaceManager
     ):
         return True
 
@@ -330,7 +330,7 @@ def entityGroupIterator(
         the number of members of the associated group
 
         Parameters:
-            discovery_space: An orchestrator.model.space.DiscoverySpace instance
+            discoverySpace: An orchestrator.model.space.DiscoverySpace instance
         """
         points = _get_space_matching_points(discovery_space=discoverySpace)
         return _sequential_iterator(
@@ -402,7 +402,7 @@ def entityGroupIterator(
         the number of members of the associated group
 
         Parameters:
-            discovery_space: An orchestrator.model.space.DiscoverySpace instance
+            discoverySpace: An orchestrator.model.space.DiscoverySpace instance
         """
         points = _get_space_matching_points(discovery_space=discoverySpace)
         return _random_iterator(
@@ -413,9 +413,9 @@ async def remoteEntityGroupIterator(
         self, remoteDiscoverySpace: DiscoverySpaceManager
     ) -> AsyncGenerator[list[Entity], None]:
         async def iterator_closure(
-            state_handle: DiscoverySpaceManager,
+            remote_discovery_space: DiscoverySpaceManager,
         ):
-            discovery_space = await state_handle.discoverySpace.remote()
+            discovery_space = await remote_discovery_space.discoverySpace.remote()
             points = _get_space_matching_points(discovery_space=discovery_space)
             return _random_iterator_async(
                 points=points,
@@ -423,7 +423,7 @@ async def iterator_closure(
                 remote_discovery_space=remoteDiscoverySpace,
             )
 
-        return await iterator_closure(state_handle=remoteDiscoverySpace)
+        return await iterator_closure(remote_discovery_space=remoteDiscoverySpace)
 
     def entityIterator(
         self, discoverySpace: DiscoverySpace, batchsize=1
@@ -477,7 +477,7 @@ def entityGroupIterator(
         the number of members of the associated group
 
         Parameters:
-            discovery_space: An orchestrator.model.space.DiscoverySpace instance
+            discoverySpace: An orchestrator.model.space.DiscoverySpace instance
         """
 
         entity_space = discoverySpace.entitySpace

From 682fdbbd44df96b254dbb9fb225912a63474137a Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 24 Nov 2025 08:15:29 +0000
Subject: [PATCH 41/46] chore(group_sampler): Tests cleanup

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 tests/core/test_group_samplers.py | 26 ++++----------------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/tests/core/test_group_samplers.py b/tests/core/test_group_samplers.py
index fccf6325..7e708ec1 100644
--- a/tests/core/test_group_samplers.py
+++ b/tests/core/test_group_samplers.py
@@ -118,19 +118,10 @@ def test_group_sampler_local(
             print(i, count, entity)
 
         node_value = {
-            (
-                e["nodes"]
-                if isinstance(e, dict)
-                else e.valueForConstitutivePropertyIdentifier("nodes").value
-            )
-            for e in group
+            (e.valueForConstitutivePropertyIdentifier("nodes").value) for e in group
         }
         cpu_value = {
-            (
-                e["cpu_family"]
-                if isinstance(e, dict)
-                else e.valueForConstitutivePropertyIdentifier("cpu_family").value
-            )
+            (e.valueForConstitutivePropertyIdentifier("cpu_family").value)
             for e in group
         }
 
@@ -226,19 +217,10 @@ async def test_group_sampler_remote(
         count += len(group)
         group_count += 1
         node_value = {
-            (
-                e["nodes"]
-                if isinstance(e, dict)
-                else e.valueForConstitutivePropertyIdentifier("nodes").value
-            )
-            for e in group
+            (e.valueForConstitutivePropertyIdentifier("nodes").value) for e in group
         }
         cpu_value = {
-            (
-                e["cpu_family"]
-                if isinstance(e, dict)
-                else e.valueForConstitutivePropertyIdentifier("cpu_family").value
-            )
+            (e.valueForConstitutivePropertyIdentifier("cpu_family").value)
             for e in group
         }
 

From 89872857a2cae4e94b64ea5e464f10413ef7221b Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 24 Nov 2025 18:09:25 +0000
Subject: [PATCH 42/46] chore(vllm_performance): removed custom
 entity_for_point in facvor of the new one from the discovery space

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../core/discoveryspace/group_samplers.py     | 47 ++++---------------
 1 file changed, 10 insertions(+), 37 deletions(-)

diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py
index 40397ac1..070a17e2 100644
--- a/orchestrator/core/discoveryspace/group_samplers.py
+++ b/orchestrator/core/discoveryspace/group_samplers.py
@@ -1,6 +1,7 @@
 # Copyright (c) IBM Corporation
 # SPDX-License-Identifier: MIT
 
+import asyncio
 import logging
 from collections.abc import AsyncGenerator, Generator
 from typing import Any
@@ -55,30 +56,6 @@ def _get_space_matching_points(discovery_space: DiscoverySpace) -> list[dict]:
     return points
 
 
-def _entity_for_point(point: dict, discovery_space: DiscoverySpace) -> list[Entity]:
-    if "entity_identifier" in point:
-        entity = discovery_space.sample_store.entityWithIdentifier(
-            entityIdentifier=point["entity_identifier"]
-        )
-    else:
-        entity = discovery_space.entity_for_point(point)
-
-    return entity
-
-
-async def _entity_for_point_async(
-    point: dict, remote_discovery_space: DiscoverySpaceManager
-) -> list[Entity]:
-    if "entity_identifier" in point:
-        entity = await remote_discovery_space.storedEntityWithIdentifier.remote(
-            entityIdentifier=point["entity_identifier"]
-        )
-    else:
-        entity = await remote_discovery_space.entity_for_point.remote(point)
-
-    return entity
-
-
 def _build_point_group_values(
     point: dict, group: list[str]
 ) -> frozenset[tuple[str, Any]]:
@@ -149,14 +126,12 @@ async def _sequential_iterator_async(
     """
     group_list = _build_groups_list(points=points, group=group)
     for i in range(len(group_list)):
-        entity_list = [
-            await _entity_for_point_async(
-                point=point, remote_discovery_space=remote_discovery_space
-            )
+        entity_list_refs = [
+            remote_discovery_space.entity_for_point.remote(point)
             for point in group_list[i]
         ]
-        lst = entity_list
-        yield lst
+        entity_list = await asyncio.gather(*entity_list_refs)
+        yield entity_list
 
 
 def _sequential_iterator(
@@ -173,8 +148,7 @@ def _sequential_iterator(
     group_list = _build_groups_list(points=points, group=group)
     for i in range(len(group_list)):
         entity_list = [
-            _entity_for_point(point=point, discovery_space=discovery_space)
-            for point in group_list[i]
+            discovery_space.entity_for_point(point) for point in group_list[i]
         ]
         yield entity_list
 
@@ -195,12 +169,11 @@ async def _random_iterator_async(
         a=range(len(group_list)), size=len(group_list), replace=False
     )
     for i in range(len(randomized)):
-        entity_list = [
-            await _entity_for_point_async(
-                remote_discovery_space=remote_discovery_space, point=point
-            )
+        entity_list_refs = [
+            remote_discovery_space.entity_for_point.remote(point)
             for point in group_list[randomized[i]]
         ]
+        entity_list = await asyncio.gather(*entity_list_refs)
         yield entity_list
 
 
@@ -221,7 +194,7 @@ def _random_iterator(
     )
     for i in range(len(randomized)):
         entity_list = [
-            _entity_for_point(point=point, discovery_space=discovery_space)
+            discovery_space.entity_for_point(point)
             for point in group_list[randomized[i]]
         ]
         yield entity_list

From 6d5c6e0bbd512e25e0b9e1595b7d4636fc714d73 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Tue, 25 Nov 2025 08:28:04 +0000
Subject: [PATCH 43/46] chore(vllm_performance): removed unnecessary
 entity_identifier field from point dict

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 orchestrator/core/discoveryspace/group_samplers.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/orchestrator/core/discoveryspace/group_samplers.py b/orchestrator/core/discoveryspace/group_samplers.py
index 070a17e2..36efa8df 100644
--- a/orchestrator/core/discoveryspace/group_samplers.py
+++ b/orchestrator/core/discoveryspace/group_samplers.py
@@ -48,9 +48,6 @@ def _get_space_matching_points(discovery_space: DiscoverySpace) -> list[dict]:
         point = {
             v.property.identifier: v.value for v in entity.constitutive_property_values
         }
-        # we save the entity identifier to retrieve the entity from the store at a later stage,
-        # assuming they are cached locally
-        point["entity_identifier"] = entity.identifier
         points.append(point)
 
     return points

From 8b2d6f6691e5c7d11ff67d32551c6d0f43274508 Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 8 Dec 2025 13:21:51 +0000
Subject: [PATCH 44/46] fix(vllm_performance): Fixed geospatial experiments
 after merge with master

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance/experiment_executor.py           | 11 +++++++++--
 .../experiments/performance_testing.yaml              |  9 ---------
 .../experiments/performance_testing_geospatial.yaml   |  6 ------
 .../vllm_performance/k8s/manage_components.py         |  5 ++++-
 4 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index ff6b7c0f..c60085d0 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -210,8 +210,8 @@ def _create_environment(
                         reuse_deployment=False,
                         namespace=actuator.namespace,
                         pvc_name=pvc_name,
-                        skip_tokenizer_init=values.get("skip_tokenizer_init"),
-                        enforce_eager=values.get("enforce_eager"),
+                        skip_tokenizer_init=values.get("skip_tokenizer_init", 0) == 1,
+                        enforce_eager=values.get("enforce_eager", 0) == 1,
                         io_processor_plugin=values.get("io_processor_plugin"),
                         check_interval=check_interval,
                         timeout=timeout,
@@ -415,6 +415,13 @@ def run_resource_and_workload_experiment(
             if max_concurrency < 0:
                 max_concurrency = None
             started_benchmarking = True
+            console.put.remote(
+                message=RichConsoleSpinnerMessage(
+                    id=request.requestid,
+                    label=f"({request.requestid}) Executing vllm bench serve",
+                    state="start",
+                )
+            )
             if experiment.identifier in [
                 "test-geospatial-deployment-v1",
                 "test-geospatial-deployment-custom-dataset-v1",
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index 14191aee..65f00181 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -126,12 +126,6 @@ test-deployment-v1:
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
         values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
-    - identifier: 'dataset'
-      metadata:
-        description: "(benchmark) The dataset to use for the benchmark"
-      propertyDomain:
-        variableType: 'CATEGORICAL_VARIABLE_TYPE'
-        values: [ 'random' ]
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -182,9 +176,6 @@ test-deployment-v1:
     - property:
         identifier: 'gpu_type'
       value: 'NVIDIA-A100-80GB-PCIe'
-    - property:
-        identifier: 'dataset'
-      value: 'random'
   # measurements
   targetProperties:
     - identifier: "duration"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index 3df8b5b9..bee5660b 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -71,8 +71,6 @@ performance_testing-geospatial-endpoint:
   targetProperties:
     - identifier: "duration"
     - identifier: "completed"
-    - identifier: "total_input_tokens"
-    - identifier: "total_output_tokens"
     - identifier: "request_throughput"
     - identifier: "mean_e2el_ms"
     - identifier: "median_e2el_ms"
@@ -267,8 +265,6 @@ performance_testing-geospatial-full:
   targetProperties:
     - identifier: "duration"
     - identifier: "completed"
-    - identifier: "total_input_tokens"
-    - identifier: "total_output_tokens"
     - identifier: "request_throughput"
     - identifier: "mean_e2el_ms"
     - identifier: "median_e2el_ms"
@@ -460,8 +456,6 @@ performance_testing-geospatial-full-custom-dataset:
   targetProperties:
     - identifier: "duration"
     - identifier: "completed"
-    - identifier: "total_input_tokens"
-    - identifier: "total_output_tokens"
     - identifier: "request_throughput"
     - identifier: "mean_e2el_ms"
     - identifier: "median_e2el_ms"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py
index 5296b068..3fc633d4 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8s/manage_components.py
@@ -86,7 +86,7 @@ def __init__(
                 self.pvc_name = f"vllm-support-{uuid.uuid4().hex!s}"
                 self.create_pvc(pvc_name=self.pvc_name, template=pvc_template)
                 self.pvc_created = True
-                logger.debug(f"Created pvc {pvc_name} in namespace {namespace}")
+                logger.debug(f"Created pvc {self.pvc_name} in namespace {namespace}")
             else:
                 if not self.check_pvc_exists(pvc_name=pvc_name):
                     error_message = (
@@ -345,6 +345,9 @@ def create_deployment(
             template=template,
             claim_name=claim_name,
             hf_token=hf_token,
+            skip_tokenizer_init=skip_tokenizer_init,
+            io_processor_plugin=io_processor_plugin,
+            enforce_eager=enforce_eager,
         )
         logger.debug(json.dumps(deployment_yaml, indent=2))
 

From 0fa9817143633564439255a1a636b8b9dd1925de Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 8 Dec 2025 13:30:21 +0000
Subject: [PATCH 45/46] fix(vllm_performance): Fixed yet another issue after
 merging with main

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../vllm_performance/experiments/performance_testing.yaml   | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index a5dc1c2a..d02b2212 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -55,12 +55,6 @@ test-deployment-v1:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [1, 10000]
         interval: 1
-    - identifier: 'dataset'
-      metadata:
-        description: "(benchmark) The dataset to be used for the experiment"
-      propertyDomain:
-        variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: ['random']
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"

From 343197160d99ad4f0a1ac7bb0ee66c744aa1f97f Mon Sep 17 00:00:00 2001
From: Christian Pinto <christian.pinto@ibm.com>
Date: Mon, 8 Dec 2025 13:56:16 +0000
Subject: [PATCH 46/46] fix(chore): overall cleanup

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
---
 .../performance_testing_geospatial.yaml       | 107 +++++++++---------
 .../execute_benchmark.py                      |   8 +-
 2 files changed, 56 insertions(+), 59 deletions(-)

diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
index bee5660b..b7a48110 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -1,6 +1,5 @@
 # Copyright (c) IBM Corporation
 # SPDX-License-Identifier: MIT
-
 # The input to an experiment is an Entity. For the Entity to be a valid input
 # it's properties which  match what is defined here
 performance_testing-geospatial-endpoint:
@@ -24,36 +23,36 @@ performance_testing-geospatial-endpoint:
         description: "The number of requests to send per second"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [-1,1000]
-        interval: 1  # -1 means send all requests at time 0
+        domainRange: [-1, 1000]
+        interval: 1 # -1 means send all requests at time 0
   optionalProperties:
     - identifier: 'num_prompts'
       metadata:
         description: "The number of prompts to send (total number of requests)"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [1,10001]
+        domainRange: [1, 10001]
         interval: 1
     - identifier: 'burstiness'
       metadata:
         description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 0, 10 ]
+        domainRange: [0, 10]
         interval: 1
     - identifier: 'max_concurrency'
       metadata:
         description: "The maximum number of concurrent requests to send"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        domainRange: [-1, 500] # -1 means no concurrency control
         interval: 1
     - identifier: 'dataset'
       metadata:
         description: "The dataset to be used for the experiment"
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ 'india_url_in_b64_out', 'valencia_url_in_b64_out' ]
+        values: ['india_url_in_b64_out', 'valencia_url_in_b64_out']
   defaultParameterization:
     - value: 100
       property:
@@ -90,135 +89,134 @@ performance_testing-geospatial-full:
         description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
+        values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
     - identifier: 'request_rate'
       metadata:
         description: "(benchmark) The number of requests to send per second"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [-1,1000]
-        interval: 1  # -1 means send all requests at time 0
+        domainRange: [-1, 1000]
+        interval: 1 # -1 means send all requests at time 0
   optionalProperties:
     - identifier: 'num_prompts'
       metadata:
         description: "(benchmark) The number of prompts to send (total number of requests)"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [1,10001]
+        domainRange: [1, 10001]
         interval: 1
     - identifier: 'max_concurrency'
       metadata:
         description: "(benchmark) The maximum number of concurrent requests to send"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        domainRange: [-1, 500] # -1 means no concurrency control
         interval: 1
     - identifier: 'burstiness'
       metadata:
         description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 0, 10 ]
+        domainRange: [0, 10]
         interval: 1
     - identifier: 'dataset'
       metadata:
         description: "(benchmark) The dataset to be used for the experiment"
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ 'india_url_in_b64_out', 'valencia_url_in_b64_out' ]
+        values: ['india_url_in_b64_out', 'valencia_url_in_b64_out']
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
+        values: ["your/image/with/vllm/and/terratorch:0.1"]
     - identifier: n_cpus
       metadata:
         description: "(deployment) the number of CPUs to use"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 1,17 ]
+        domainRange: [1, 17]
         interval: 1
     - identifier: memory
       metadata:
         description: "(deployment) the amount of memory to allocate to vLLM pod"
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ "64Gi", "128Gi", "256Gi" ]
+        values: ["64Gi", "128Gi", "256Gi"]
     - identifier: dtype
       metadata:
         description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models."
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ "auto", "half", "float16",  "bfloat16", "float", "float32" ]
+        values: ["auto", "half", "float16", "bfloat16", "float", "float32"]
     - identifier: 'gpu_memory_utilization'
       metadata:
         description: "(deployment) The fraction of GPU memory to be used for the model executor,"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        values: [ .5, .75, .9 ]
+        values: [.5, .75, .9]
     - identifier: 'cpu_offload'
       metadata:
         description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU,"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        values: [ 0, 8, 16, 24, 32 ]
+        values: [0, 8, 16, 24, 32]
     - identifier: 'max_num_seq'
       metadata:
         description: "(deployment) Maximum number of sequences per iteration"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [32,2049]
+        domainRange: [32, 2049]
         interval: 32
     - identifier: 'max_batch_tokens'
       metadata:
         description: "(deployment) maximum number of batched tokens per iteration"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 8192, 32769]
+        domainRange: [8192, 32769]
         interval: 1024
     - identifier: 'n_gpus'
       metadata:
         description: "(deployment) Number of GPUs to use"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [1,9]
+        domainRange: [1, 9]
         interval: 1
     - identifier: 'gpu_type'
       metadata:
         description: "(deployment) The GPU type to use"
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+        values: ['NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB']
     - identifier: 'skip_tokenizer_init'
       metadata:
         description: "(deployment) skip tokenizer initialization"
       propertyDomain:
-        variableType: BINARY_VARIABLE_TYPE 
+        variableType: BINARY_VARIABLE_TYPE
         values: [True, False]
     - identifier: 'enforce_eager'
       metadata:
         description: "(deployment) enforce pytorch eager mode"
       propertyDomain:
-        variableType: BINARY_VARIABLE_TYPE 
+        variableType: BINARY_VARIABLE_TYPE
         values: [True, False]
     - identifier: 'io_processor_plugin'
       metadata:
         description: 'IO Processor plugin to load for the model'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ None, "terratorch_segmentation" ]
+        values: [None, "terratorch_segmentation"]
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1"
+      value: "your/image/with/vllm/and/terratorch:0.1"
     - property:
         identifier: n_cpus
       value: 8
     - property:
-        identifier:
-          memory
-      value:  "128Gi"
+        identifier: memory
+      value: "128Gi"
     - property:
         identifier: dtype
       value: "auto"
@@ -284,14 +282,14 @@ performance_testing-geospatial-full-custom-dataset:
         description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
+        values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
     - identifier: 'request_rate'
       metadata:
         description: "(benchmark) The number of requests to send per second"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [-1,1000]
-        interval: 1  # -1 means send all requests at time 0
+        domainRange: [-1, 1000]
+        interval: 1 # -1 means send all requests at time 0
     - identifier: 'dataset'
       metadata:
         description: "(benchmark) The dataset to be used for the experiment"
@@ -304,115 +302,114 @@ performance_testing-geospatial-full-custom-dataset:
         description: "(benchmark) The number of prompts to send (total number of requests)"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [1,10001]
+        domainRange: [1, 10001]
         interval: 1
     - identifier: 'max_concurrency'
       metadata:
         description: "(benchmark) The maximum number of concurrent requests to send"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        domainRange: [-1, 500] # -1 means no concurrency control
         interval: 1
     - identifier: 'burstiness'
       metadata:
         description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 0, 10 ]
+        domainRange: [0, 10]
         interval: 1
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
+        values: ["your/image/with/vllm/and/terratorch:0.1"]
     - identifier: n_cpus
       metadata:
         description: "(deployment) the number of CPUs to use"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 1,17 ]
+        domainRange: [1, 17]
         interval: 1
     - identifier: memory
       metadata:
         description: "(deployment) the amount of memory to allocate to vLLM pod"
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ "64Gi", "128Gi", "256Gi" ]
+        values: ["64Gi", "128Gi", "256Gi"]
     - identifier: dtype
       metadata:
         description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models."
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ "auto", "half", "float16",  "bfloat16", "float", "float32" ]
+        values: ["auto", "half", "float16", "bfloat16", "float", "float32"]
     - identifier: 'gpu_memory_utilization'
       metadata:
         description: "(deployment) The fraction of GPU memory to be used for the model executor,"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        values: [ .5, .75, .9 ]
+        values: [.5, .75, .9]
     - identifier: 'cpu_offload'
       metadata:
         description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU,"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        values: [ 0, 8, 16, 24, 32 ]
+        values: [0, 8, 16, 24, 32]
     - identifier: 'max_num_seq'
       metadata:
         description: "(deployment) Maximum number of sequences per iteration"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [32,2049]
+        domainRange: [32, 2049]
         interval: 32
     - identifier: 'max_batch_tokens'
       metadata:
         description: "(deployment) maximum number of batched tokens per iteration"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [ 8192, 32769]
+        domainRange: [8192, 32769]
         interval: 1024
     - identifier: 'n_gpus'
       metadata:
         description: "(deployment) Number of GPUs to use"
       propertyDomain:
         variableType: 'DISCRETE_VARIABLE_TYPE'
-        domainRange: [1,9]
+        domainRange: [1, 9]
         interval: 1
     - identifier: 'gpu_type'
       metadata:
         description: "(deployment) The GPU type to use"
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
-        values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+        values: ['NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB']
     - identifier: 'skip_tokenizer_init'
       metadata:
         description: "(deployment) skip tokenizer initialization"
       propertyDomain:
-        variableType: BINARY_VARIABLE_TYPE 
+        variableType: BINARY_VARIABLE_TYPE
         values: [True, False]
     - identifier: 'enforce_eager'
       metadata:
         description: "(deployment) enforce PyTorch eager mode"
       propertyDomain:
-        variableType: BINARY_VARIABLE_TYPE 
+        variableType: BINARY_VARIABLE_TYPE
         values: [True, False]
     - identifier: 'io_processor_plugin'
       metadata:
         description: 'IO Processor plugin to load for the model'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
-        values: [ "terratorch_segmentation" ]
+        values: ["terratorch_segmentation"]
   defaultParameterization:
     - property:
         identifier: 'image'
-      value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1"
+      value: "your/image/with/vllm/and/terratorch:0.1"
     - property:
         identifier: n_cpus
       value: 8
     - property:
-        identifier:
-          memory
-      value:  "128Gi"
+        identifier: memory
+      value: "128Gi"
     - property:
         identifier: dtype
       value: "auto"
@@ -465,4 +462,4 @@ performance_testing-geospatial-full-custom-dataset:
     - identifier: "p75_e2el_ms"
     - identifier: "p99_e2el_ms"
   metadata:
-    description: 'VLLM performance testing across compute resource and workload configuration'
\ No newline at end of file
+    description: 'VLLM performance testing across compute resource and workload configuration'
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index 9a477c82..05bd8fb2 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -45,7 +45,7 @@ def execute_benchmark(
     Execute benchmark
     :param base_url: url for vllm endpoint
     :param model: model
-    :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param dataset: data set name ["random"]
     :param backend: name of the vLLM benchmark backend to be used ["vllm", "openai", "openai-chat", "openai-audio", "openai-embeddings"]
     :param interpreter: name of Python interpreter
     :param num_prompts: number of prompts
@@ -83,7 +83,7 @@ def execute_benchmark(
     request = f"export VLLM_BENCH_LOGLEVEL={log_level} && " + request
     request += (
         f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {dataset} "
-        f"--model {model} --seed 12345 --num-prompts 10 --save-result --metric-percentiles "
+        f"--model {model} --seed 12345 --num-prompts {num_prompts!s} --save-result --metric-percentiles "
         f'"25,75,99" --percentile-metrics "ttft,tpot,itl,e2el" --result-dir . --result-filename {f_name} '
         f"--burstiness {burstiness} "
     )
@@ -146,7 +146,7 @@ def execute_random_benchmark(
     Execute benchmark with random dataset
     :param base_url: url for vllm endpoint
     :param model: model
-    :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param dataset: data set name ["random"]
     :param num_prompts: number of prompts
     :param request_rate: request rate
     :param max_concurrency: maximum number of concurrent requests
@@ -197,7 +197,7 @@ def execute_geospatial_benchmark(
     Execute benchmark with random dataset
     :param base_url: url for vllm endpoint
     :param model: model
-    :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param dataset: data set name ["random"]
     :param num_prompts: number of prompts
     :param request_rate: request rate
     :param max_concurrency: maximum number of concurrent requests