From 60fdc32a5872444f728d05826b0a93ffdb0554bf Mon Sep 17 00:00:00 2001
From: Radovan Fuchs <rfuchs@rfuchs-thinkpadp1gen7.tpb.csb>
Date: Mon, 2 Feb 2026 09:45:54 +0100
Subject: [PATCH] add extra e2e tests to align with OLS

add new tests for token quotas
---
 tests/e2e/features/query.feature           |  31 +++-
 tests/e2e/features/steps/token_counters.py | 204 +++++++++++++++++++++
 tests/e2e/features/streaming_query.feature |  67 ++++---
 3 files changed, 269 insertions(+), 33 deletions(-)
 create mode 100644 tests/e2e/features/steps/token_counters.py

diff --git a/tests/e2e/features/query.feature b/tests/e2e/features/query.feature
index 501233b5..ac43b786 100644
--- a/tests/e2e/features/query.feature
+++ b/tests/e2e/features/query.feature
@@ -8,6 +8,7 @@ Feature: Query endpoint API tests
   Scenario: Check if LLM responds properly to restrictive system prompt to sent question with different system prompt
     Given The system is in default state
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    And I capture the current token metrics
     When I use "query" to ask question with authorization header
     """
     {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions", "model": "{MODEL}", "provider": "{PROVIDER}"}
@@ -16,18 +17,22 @@ Feature: Query endpoint API tests
       And The response should contain following fragments
           | Fragments in LLM response |
           | ask                       |
+      And The token metrics should have increased
 
   Scenario: Check if LLM responds properly to non-restrictive system prompt to sent question with different system prompt
     Given The system is in default state
-    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+      And I capture the current token metrics
     When I use "query" to ask question with authorization header
     """
     {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "you are linguistic assistant", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
-     Then The status code of the response is 200
+    Then The status code of the response is 200
       And The response should contain following fragments
           | Fragments in LLM response |
           | checkout                  |
+      And The response should contain token counter fields
+      And The token metrics should have increased
 
   #enable on demand
   @skip 
@@ -79,12 +84,14 @@ Feature: Query endpoint API tests
   Scenario: Check if LLM responds to sent question with error when attempting to access conversation
     Given The system is in default state
      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+     And I capture the current token metrics
      When I use "query" to ask question with authorization header
      """
      {"conversation_id": "123e4567-e89b-12d3-a456-426614174000", "query": "Write a simple code for reversing string", "model": "{MODEL}", "provider": "{PROVIDER}"}
      """
       Then The status code of the response is 404
       And The body of the response contains Conversation not found
+      And The token metrics should not have changed
 
 Scenario: Check if LLM responds to sent question with error when attempting to access conversation with incorrect conversation ID format
     Given The system is in default state
@@ -101,13 +108,25 @@ Scenario: Check if LLM responds for query request with error for missing query
      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
      When I use "query" to ask question with authorization header
      """
-     {"provider": "{PROVIDER}"}
+     {"conversation_id": "123e4567", "query": "Write a simple code for reversing string", "model": "{MODEL}", "provider": "{PROVIDER}"}
      """
-     Then The status code of the response is 422
-     And The body of the response is the following
+      Then The status code of the response is 422
+      And The body of the response contains Value error, Improper conversation ID '123e4567'
+
+  Scenario: Check if LLM responds for query request with error for missing query
+    Given The system is in default state
+      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+      And I capture the current token metrics
+    When I use "query" to ask question with authorization header
+    """
+    {"provider": "{PROVIDER}"}
+    """
+    Then The status code of the response is 422
+      And The body of the response is the following
         """
         { "detail": [{"type": "missing", "loc": [ "body", "query" ], "msg": "Field required", "input": {"provider": "{PROVIDER}"}}] }
         """
+      And The token metrics should not have changed
 
   Scenario: Check if LLM responds for query request for missing model and provider
     Given The system is in default state
@@ -163,12 +182,14 @@ Scenario: Check if LLM responds for query request with error for missing query
     Given The system is in default state
     And The llama-stack connection is disrupted
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    And I capture the current token metrics
     When I use "query" to ask question with authorization header
     """
     {"query": "Say hello"}
     """
      Then The status code of the response is 503
       And The body of the response contains Unable to connect to Llama Stack
+      And The token metrics should not have changed
 
   Scenario: Check if LLM responds properly when XML and JSON attachments are sent
     Given The system is in default state
diff --git a/tests/e2e/features/steps/token_counters.py b/tests/e2e/features/steps/token_counters.py
new file mode 100644
index 00000000..5bb4c83e
--- /dev/null
+++ b/tests/e2e/features/steps/token_counters.py
@@ -0,0 +1,204 @@
+"""Step definitions for token counter validation."""
+
+import json
+
+import requests
+from behave import given, then  # pyright: ignore[reportAttributeAccessIssue]
+from behave.runner import Context
+
+DEFAULT_TIMEOUT = 10
+
+
+@then("The response should contain token counter fields")
+def check_token_counter_fields(context: Context) -> None:
+    """Check that response contains input_tokens and output_tokens fields."""
+    assert context.response is not None, "Request needs to be performed first"
+    response_json = context.response.json()
+
+    input_tokens = response_json.get("input_tokens")
+    output_tokens = response_json.get("output_tokens")
+    assert (
+        "input_tokens" in response_json
+    ), f"Response should contain 'input_tokens' field. Got: {response_json}"
+    assert (
+        "output_tokens" in response_json
+    ), f"Response should contain 'output_tokens' field. Got: {response_json}"
+    assert (
+        "available_quotas" in response_json
+    ), f"Response should contain 'available_quotas' field. Got: {response_json}"
+    assert input_tokens >= 0, f"input_tokens should be non-negative, got {input_tokens}"
+    assert (
+        output_tokens >= 0
+    ), f"output_tokens should be non-negative, got {output_tokens}"
+
+
+@given("I capture the current token metrics")
+def capture_token_metrics(context: Context) -> None:
+    """Capture the current Prometheus token metrics values.
+
+    Stores the metrics in context.initial_token_metrics for later comparison.
+    """
+    context.initial_token_metrics = _get_current_token_metrics(context)
+    print(f"Initial token metrics: {context.initial_token_metrics}")
+
+
+@then("The token metrics should have increased")
+def check_token_metrics_increased(context: Context) -> None:
+    """Check that token metrics have increased after a query.
+
+    Compares current metrics against context.initial_token_metrics.
+    """
+    assert hasattr(
+        context, "initial_token_metrics"
+    ), "Initial metrics not captured. Call 'I capture the current token metrics' first"
+
+    final_metrics = _get_current_token_metrics(context)
+    initial_metrics = context.initial_token_metrics
+
+    print(f"Final token metrics: {final_metrics}")
+
+    # Check that both token metrics increased
+    sent_increased = final_metrics["token_sent"] > initial_metrics["token_sent"]
+    received_increased = (
+        final_metrics["token_received"] > initial_metrics["token_received"]
+    )
+
+    assert sent_increased and received_increased, (
+        f"Both token metrics should have increased. "
+        f"Initial: {initial_metrics}, Final: {final_metrics}"
+    )
+
+
+@then("The token metrics should not have changed")
+def check_token_metrics_unchanged(context: Context) -> None:
+    """Check that token metrics have not changed after an error.
+
+    Compares current metrics against context.initial_token_metrics.
+    """
+    assert hasattr(
+        context, "initial_token_metrics"
+    ), "Initial metrics not captured. Call 'I capture the current token metrics' first"
+
+    final_metrics = _get_current_token_metrics(context)
+    initial_metrics = context.initial_token_metrics
+
+    print(f"Final token metrics: {final_metrics}")
+
+    assert final_metrics["token_sent"] == initial_metrics["token_sent"], (
+        f"token_sent should not have changed. "
+        f"Initial: {initial_metrics['token_sent']}, Final: {final_metrics['token_sent']}"
+    )
+    assert final_metrics["token_received"] == initial_metrics["token_received"], (
+        f"token_received should not have changed. "
+        f"Initial: {initial_metrics['token_received']}, "
+        f"Final: {final_metrics['token_received']}"
+    )
+
+
+@then("The streamed response should contain token counter fields")
+def check_streamed_token_counter_fields(context: Context) -> None:
+    """Check that streamed response end event contains token fields."""
+    assert context.response_data is not None, "Response data needs to be parsed first"
+
+    # Parse the end event from the streaming response to get token info
+    end_event_data = _get_end_event_data(context.response.text)
+    assert end_event_data is not None, "End event not found in streaming response"
+
+    assert "input_tokens" in end_event_data, (
+        f"Streamed response should contain 'input_tokens' in end event. "
+        f"Got: {end_event_data}"
+    )
+    assert "output_tokens" in end_event_data, (
+        f"Streamed response should contain 'output_tokens' in end event. "
+        f"Got: {end_event_data}"
+    )
+    assert "available_quotas" in end_event_data, (
+        f"Streamed response should contain 'available_quotas' in end event. "
+        f"Got: {end_event_data}"
+    )
+    input_tokens: int = end_event_data["input_tokens"]
+    output_tokens: int = end_event_data["output_tokens"]
+    assert (
+        input_tokens >= 0
+    ), f"streamed input_tokens should be non-negative, got {input_tokens}"
+    assert (
+        output_tokens >= 0
+    ), f"streamed output_tokens should be non-negative, got {output_tokens}"
+
+
+def _get_current_token_metrics(context: Context) -> dict[str, float]:
+    """Fetch and parse current token metrics from Prometheus endpoint.
+
+    Parameters:
+        context: Behave context containing hostname, port, and auth_headers.
+
+    Returns:
+        Dictionary with 'token_sent' and 'token_received' totals.
+    """
+    base = f"http://{context.hostname}:{context.port}"
+    url = f"{base}/metrics"
+    headers = context.auth_headers if hasattr(context, "auth_headers") else {}
+
+    response = requests.get(url, headers=headers, timeout=DEFAULT_TIMEOUT)
+    assert (
+        response.status_code == 200
+    ), f"Failed to get metrics, status: {response.status_code}"
+
+    return _parse_token_metrics(response.text)
+
+
+def _get_end_event_data(response_text: str) -> dict | None:
+    """Extract the end event data from streaming SSE response.
+
+    Parameters:
+        response_text: The raw SSE response text.
+
+    Returns:
+        The data dictionary from the end event (including available_quotas),
+        or None if not found.
+    """
+    lines = response_text.strip().split("\n")
+    for line in lines:
+        if line.startswith("data: "):
+            try:
+                event = json.loads(line[6:])
+                if event.get("event") == "end":
+                    # Merge data contents with available_quotas from parent level
+                    result = event.get("data", {})
+                    result["available_quotas"] = event.get("available_quotas", {})
+                    return result
+            except json.JSONDecodeError:
+                continue
+    return None
+
+
+def _parse_token_metrics(metrics_text: str) -> dict[str, float]:
+    """Parse Prometheus metrics text to extract token counter values.
+
+    Parameters:
+        metrics_text: Raw Prometheus metrics text output.
+
+    Returns:
+        Dictionary with 'token_sent' and 'token_received' totals.
+    """
+    token_sent_total = 0.0
+    token_received_total = 0.0
+
+    # Prometheus format: metric_name{labels} value
+    for line in metrics_text.split("\n"):
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+
+        # Extract value (last space-separated element)
+        if line.startswith("ls_llm_token_sent_total{"):
+            value = line.split()[-1]
+            token_sent_total += float(value)
+        elif line.startswith("ls_llm_token_received_total{"):
+            value = line.split()[-1]
+            token_received_total += float(value)
+
+    return {
+        "token_sent": token_sent_total,
+        "token_received": token_received_total,
+    }
diff --git a/tests/e2e/features/streaming_query.feature b/tests/e2e/features/streaming_query.feature
index a89dde12..22b3255b 100644
--- a/tests/e2e/features/streaming_query.feature
+++ b/tests/e2e/features/streaming_query.feature
@@ -19,29 +19,34 @@ Feature: streaming_query endpoint API tests
 
   Scenario: Check if LLM responds properly to restrictive system prompt to sent question with different system prompt
     Given The system is in default state
-    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
-    And I use "streaming_query" to ask question with authorization header
+      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+      And I capture the current token metrics
+      And I use "streaming_query" to ask question with authorization header
     """
     {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
-     When I wait for the response to be completed
-     Then The status code of the response is 200
+    When I wait for the response to be completed
+    Then The status code of the response is 200
       And The streamed response should contain following fragments
           | Fragments in LLM response |
           | questions                 |
+      And The token metrics should have increased
 
   Scenario: Check if LLM responds properly to non-restrictive system prompt to sent question with different system prompt
     Given The system is in default state
-    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
-    And I use "streaming_query" to ask question with authorization header
+      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+      And I capture the current token metrics
+      And I use "streaming_query" to ask question with authorization header
     """
     {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "you are linguistic assistant", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
-     When I wait for the response to be completed
-     Then The status code of the response is 200
+    When I wait for the response to be completed
+    Then The status code of the response is 200
       And The streamed response should contain following fragments
           | Fragments in LLM response |
           | checkout                  |
+      And The streamed response should contain token counter fields
+      And The token metrics should have increased
 
   #enable on demand
   @skip 
@@ -65,16 +70,18 @@ Feature: streaming_query endpoint API tests
 
   Scenario: Check if LLM responds for streaming_query request with error for missing query
     Given The system is in default state
-    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+      And I capture the current token metrics
     When I use "streaming_query" to ask question with authorization header
     """
     {"provider": "{PROVIDER}"}
     """
-     Then The status code of the response is 422
+    Then The status code of the response is 422
       And The body of the response is the following
           """
           { "detail": [{"type": "missing", "loc": [ "body", "query" ], "msg": "Field required", "input": {"provider": "{PROVIDER}"}}] }
           """
+      And The token metrics should not have changed
 
   Scenario: Check if LLM responds for streaming_query request for missing model and provider
     Given The system is in default state
@@ -87,13 +94,15 @@ Feature: streaming_query endpoint API tests
 
   Scenario: Check if LLM responds for streaming_query request with error for missing model
     Given The system is in default state
-    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+      And I capture the current token metrics
     When I use "streaming_query" to ask question with authorization header
     """
     {"query": "Say hello", "provider": "{PROVIDER}"}
     """
-     Then The status code of the response is 422
+    Then The status code of the response is 422
       And The body of the response contains Value error, Model must be specified if provider is specified
+      And The token metrics should not have changed
 
   Scenario: Check if LLM responds for streaming_query request with error for missing provider
     Given The system is in default state
@@ -102,28 +111,30 @@ Feature: streaming_query endpoint API tests
     """
     {"query": "Say hello", "model": "{MODEL}"}
     """
-     Then The status code of the response is 422
+    Then The status code of the response is 422
       And The body of the response contains Value error, Provider must be specified if model is specified
 
-   Scenario: Check if LLM responds for query request with error for unknown model
+  Scenario: Check if LLM responds for streaming_query request with error for unknown model
     Given The system is in default state
-     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
-     When I use "streaming_query" to ask question with authorization header
-     """
-     {"query": "Say hello", "provider": "{PROVIDER}", "model":"unknown"}
-     """
-     Then The status code of the response is 404
+      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "streaming_query" to ask question with authorization header
+    """
+    {"query": "Say hello", "provider": "{PROVIDER}", "model":"unknown"}
+    """
+    Then The status code of the response is 404
       And The body of the response contains Model with ID unknown does not exist
 
-  Scenario: Check if LLM responds for query request with error for unknown provider
+  Scenario: Check if LLM responds for streaming_query request with error for unknown provider
     Given The system is in default state
-    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+      And I capture the current token metrics
     When I use "streaming_query" to ask question with authorization header
     """
     {"query": "Say hello", "model": "{MODEL}", "provider":"unknown"}
     """
-     Then The status code of the response is 404
+    Then The status code of the response is 404
       And The body of the response contains Model with ID gpt-4o-mini does not exist
+      And The token metrics should not have changed
 
   Scenario: Check if LLM responds properly when XML and JSON attachments are sent
     Given The system is in default state
@@ -153,11 +164,11 @@ Feature: streaming_query endpoint API tests
 
   Scenario: Check if LLM responds to sent question with error when not authenticated
     Given The system is in default state
-     When I use "streaming_query" to ask question
-     """
-     {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
-     """
-      Then The status code of the response is 401
+    When I use "streaming_query" to ask question
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    Then The status code of the response is 401
       And The body of the response is the following
           """
           {