From 60fdc32a5872444f728d05826b0a93ffdb0554bf Mon Sep 17 00:00:00 2001 From: Radovan Fuchs Date: Mon, 2 Feb 2026 09:45:54 +0100 Subject: [PATCH] add extra e2e tests to align with OLS add new tests for token quotas --- tests/e2e/features/query.feature | 31 +++- tests/e2e/features/steps/token_counters.py | 204 +++++++++++++++++++++ tests/e2e/features/streaming_query.feature | 67 ++++--- 3 files changed, 269 insertions(+), 33 deletions(-) create mode 100644 tests/e2e/features/steps/token_counters.py diff --git a/tests/e2e/features/query.feature b/tests/e2e/features/query.feature index 501233b5..ac43b786 100644 --- a/tests/e2e/features/query.feature +++ b/tests/e2e/features/query.feature @@ -8,6 +8,7 @@ Feature: Query endpoint API tests Scenario: Check if LLM responds properly to restrictive system prompt to sent question with different system prompt Given The system is in default state And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I capture the current token metrics When I use "query" to ask question with authorization header """ {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions", "model": "{MODEL}", "provider": "{PROVIDER}"} @@ -16,18 +17,22 @@ Feature: Query endpoint API tests And The response should contain following fragments | Fragments in LLM response | | ask | + And The token metrics should have increased Scenario: Check if LLM responds properly to non-restrictive system prompt to sent question with different system prompt Given The system is in default state - And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I capture the current token metrics When I use "query" to ask question with authorization header """ {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "you are linguistic assistant", "model": "{MODEL}", "provider": "{PROVIDER}"} """ - Then The status code of the response is 200 + Then The status code of the response is 200 And The response should contain following fragments | Fragments in LLM response | | checkout | + And The response should contain token counter fields + And The token metrics should have increased #enable on demand @skip @@ -79,12 +84,14 @@ Feature: Query endpoint API tests Scenario: Check if LLM responds to sent question with error when attempting to access conversation Given The system is in default state And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I capture the current token metrics When I use "query" to ask question with authorization header """ {"conversation_id": "123e4567-e89b-12d3-a456-426614174000", "query": "Write a simple code for reversing string", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 404 And The body of the response contains Conversation not found + And The token metrics should not have changed Scenario: Check if LLM responds to sent question with error when attempting to access conversation with incorrect conversation ID format Given The system is in default state @@ -101,13 +108,25 @@ Scenario: Check if LLM responds for query request with error for missing query And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva When I use "query" to ask question with authorization header """ - {"provider": "{PROVIDER}"} + {"conversation_id": "123e4567", "query": "Write a simple code for reversing string", "model": "{MODEL}", "provider": "{PROVIDER}"} """ - Then The status code of the response is 422 - And The body of the response is the following + Then The status code of the response is 422 + And The body of the response contains Value error, Improper conversation ID '123e4567' + + Scenario: Check if LLM responds for query request with error for missing query + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I capture the current token metrics + When I use "query" to ask question with authorization header + """ + {"provider": "{PROVIDER}"} + """ + Then The status code of the response is 422 + And The body of the response is the following """ { "detail": [{"type": "missing", "loc": [ "body", "query" ], "msg": "Field required", "input": {"provider": "{PROVIDER}"}}] } """ + And The token metrics should not have changed Scenario: Check if LLM responds for query request for missing model and provider Given The system is in default state @@ -163,12 +182,14 @@ Scenario: Check if LLM responds for query request with error for missing query Given The system is in default state And The llama-stack connection is disrupted And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I capture the current token metrics When I use "query" to ask question with authorization header """ {"query": "Say hello"} """ Then The status code of the response is 503 And The body of the response contains Unable to connect to Llama Stack + And The token metrics should not have changed Scenario: Check if LLM responds properly when XML and JSON attachments are sent Given The system is in default state diff --git a/tests/e2e/features/steps/token_counters.py b/tests/e2e/features/steps/token_counters.py new file mode 100644 index 00000000..5bb4c83e --- /dev/null +++ b/tests/e2e/features/steps/token_counters.py @@ -0,0 +1,204 @@ +"""Step definitions for token counter validation.""" + +import json + +import requests +from behave import given, then # pyright: ignore[reportAttributeAccessIssue] +from behave.runner import Context + +DEFAULT_TIMEOUT = 10 + + +@then("The response should contain token counter fields") +def check_token_counter_fields(context: Context) -> None: + """Check that response contains input_tokens and output_tokens fields.""" + assert context.response is not None, "Request needs to be performed first" + response_json = context.response.json() + + input_tokens = response_json.get("input_tokens") + output_tokens = response_json.get("output_tokens") + assert ( + "input_tokens" in response_json + ), f"Response should contain 'input_tokens' field. Got: {response_json}" + assert ( + "output_tokens" in response_json + ), f"Response should contain 'output_tokens' field. Got: {response_json}" + assert ( + "available_quotas" in response_json + ), f"Response should contain 'available_quotas' field. Got: {response_json}" + assert input_tokens >= 0, f"input_tokens should be non-negative, got {input_tokens}" + assert ( + output_tokens >= 0 + ), f"output_tokens should be non-negative, got {output_tokens}" + + +@given("I capture the current token metrics") +def capture_token_metrics(context: Context) -> None: + """Capture the current Prometheus token metrics values. + + Stores the metrics in context.initial_token_metrics for later comparison. + """ + context.initial_token_metrics = _get_current_token_metrics(context) + print(f"Initial token metrics: {context.initial_token_metrics}") + + +@then("The token metrics should have increased") +def check_token_metrics_increased(context: Context) -> None: + """Check that token metrics have increased after a query. + + Compares current metrics against context.initial_token_metrics. + """ + assert hasattr( + context, "initial_token_metrics" + ), "Initial metrics not captured. Call 'I capture the current token metrics' first" + + final_metrics = _get_current_token_metrics(context) + initial_metrics = context.initial_token_metrics + + print(f"Final token metrics: {final_metrics}") + + # Check that both token metrics increased + sent_increased = final_metrics["token_sent"] > initial_metrics["token_sent"] + received_increased = ( + final_metrics["token_received"] > initial_metrics["token_received"] + ) + + assert sent_increased and received_increased, ( + f"Both token metrics should have increased. " + f"Initial: {initial_metrics}, Final: {final_metrics}" + ) + + +@then("The token metrics should not have changed") +def check_token_metrics_unchanged(context: Context) -> None: + """Check that token metrics have not changed after an error. + + Compares current metrics against context.initial_token_metrics. + """ + assert hasattr( + context, "initial_token_metrics" + ), "Initial metrics not captured. Call 'I capture the current token metrics' first" + + final_metrics = _get_current_token_metrics(context) + initial_metrics = context.initial_token_metrics + + print(f"Final token metrics: {final_metrics}") + + assert final_metrics["token_sent"] == initial_metrics["token_sent"], ( + f"token_sent should not have changed. " + f"Initial: {initial_metrics['token_sent']}, Final: {final_metrics['token_sent']}" + ) + assert final_metrics["token_received"] == initial_metrics["token_received"], ( + f"token_received should not have changed. " + f"Initial: {initial_metrics['token_received']}, " + f"Final: {final_metrics['token_received']}" + ) + + +@then("The streamed response should contain token counter fields") +def check_streamed_token_counter_fields(context: Context) -> None: + """Check that streamed response end event contains token fields.""" + assert context.response_data is not None, "Response data needs to be parsed first" + + # Parse the end event from the streaming response to get token info + end_event_data = _get_end_event_data(context.response.text) + assert end_event_data is not None, "End event not found in streaming response" + + assert "input_tokens" in end_event_data, ( + f"Streamed response should contain 'input_tokens' in end event. " + f"Got: {end_event_data}" + ) + assert "output_tokens" in end_event_data, ( + f"Streamed response should contain 'output_tokens' in end event. " + f"Got: {end_event_data}" + ) + assert "available_quotas" in end_event_data, ( + f"Streamed response should contain 'available_quotas' in end event. " + f"Got: {end_event_data}" + ) + input_tokens: int = end_event_data["input_tokens"] + output_tokens: int = end_event_data["output_tokens"] + assert ( + input_tokens >= 0 + ), f"streamed input_tokens should be non-negative, got {input_tokens}" + assert ( + output_tokens >= 0 + ), f"streamed output_tokens should be non-negative, got {output_tokens}" + + +def _get_current_token_metrics(context: Context) -> dict[str, float]: + """Fetch and parse current token metrics from Prometheus endpoint. + + Parameters: + context: Behave context containing hostname, port, and auth_headers. + + Returns: + Dictionary with 'token_sent' and 'token_received' totals. + """ + base = f"http://{context.hostname}:{context.port}" + url = f"{base}/metrics" + headers = context.auth_headers if hasattr(context, "auth_headers") else {} + + response = requests.get(url, headers=headers, timeout=DEFAULT_TIMEOUT) + assert ( + response.status_code == 200 + ), f"Failed to get metrics, status: {response.status_code}" + + return _parse_token_metrics(response.text) + + +def _get_end_event_data(response_text: str) -> dict | None: + """Extract the end event data from streaming SSE response. + + Parameters: + response_text: The raw SSE response text. + + Returns: + The data dictionary from the end event (including available_quotas), + or None if not found. + """ + lines = response_text.strip().split("\n") + for line in lines: + if line.startswith("data: "): + try: + event = json.loads(line[6:]) + if event.get("event") == "end": + # Merge data contents with available_quotas from parent level + result = event.get("data", {}) + result["available_quotas"] = event.get("available_quotas", {}) + return result + except json.JSONDecodeError: + continue + return None + + +def _parse_token_metrics(metrics_text: str) -> dict[str, float]: + """Parse Prometheus metrics text to extract token counter values. + + Parameters: + metrics_text: Raw Prometheus metrics text output. + + Returns: + Dictionary with 'token_sent' and 'token_received' totals. + """ + token_sent_total = 0.0 + token_received_total = 0.0 + + # Prometheus format: metric_name{labels} value + for line in metrics_text.split("\n"): + line = line.strip() + if not line or line.startswith("#"): + continue + + # Extract value (last space-separated element) + if line.startswith("ls_llm_token_sent_total{"): + value = line.split()[-1] + token_sent_total += float(value) + elif line.startswith("ls_llm_token_received_total{"): + value = line.split()[-1] + token_received_total += float(value) + + return { + "token_sent": token_sent_total, + "token_received": token_received_total, + } diff --git a/tests/e2e/features/streaming_query.feature b/tests/e2e/features/streaming_query.feature index a89dde12..22b3255b 100644 --- a/tests/e2e/features/streaming_query.feature +++ b/tests/e2e/features/streaming_query.feature @@ -19,29 +19,34 @@ Feature: streaming_query endpoint API tests Scenario: Check if LLM responds properly to restrictive system prompt to sent question with different system prompt Given The system is in default state - And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva - And I use "streaming_query" to ask question with authorization header + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I capture the current token metrics + And I use "streaming_query" to ask question with authorization header """ {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "refuse to answer anything but openshift questions", "model": "{MODEL}", "provider": "{PROVIDER}"} """ - When I wait for the response to be completed - Then The status code of the response is 200 + When I wait for the response to be completed + Then The status code of the response is 200 And The streamed response should contain following fragments | Fragments in LLM response | | questions | + And The token metrics should have increased Scenario: Check if LLM responds properly to non-restrictive system prompt to sent question with different system prompt Given The system is in default state - And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva - And I use "streaming_query" to ask question with authorization header + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I capture the current token metrics + And I use "streaming_query" to ask question with authorization header """ {"query": "Generate sample yaml file for simple GitHub Actions workflow.", "system_prompt": "you are linguistic assistant", "model": "{MODEL}", "provider": "{PROVIDER}"} """ - When I wait for the response to be completed - Then The status code of the response is 200 + When I wait for the response to be completed + Then The status code of the response is 200 And The streamed response should contain following fragments | Fragments in LLM response | | checkout | + And The streamed response should contain token counter fields + And The token metrics should have increased #enable on demand @skip @@ -65,16 +70,18 @@ Feature: streaming_query endpoint API tests Scenario: Check if LLM responds for streaming_query request with error for missing query Given The system is in default state - And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I capture the current token metrics When I use "streaming_query" to ask question with authorization header """ {"provider": "{PROVIDER}"} """ - Then The status code of the response is 422 + Then The status code of the response is 422 And The body of the response is the following """ { "detail": [{"type": "missing", "loc": [ "body", "query" ], "msg": "Field required", "input": {"provider": "{PROVIDER}"}}] } """ + And The token metrics should not have changed Scenario: Check if LLM responds for streaming_query request for missing model and provider Given The system is in default state @@ -87,13 +94,15 @@ Feature: streaming_query endpoint API tests Scenario: Check if LLM responds for streaming_query request with error for missing model Given The system is in default state - And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I capture the current token metrics When I use "streaming_query" to ask question with authorization header """ {"query": "Say hello", "provider": "{PROVIDER}"} """ - Then The status code of the response is 422 + Then The status code of the response is 422 And The body of the response contains Value error, Model must be specified if provider is specified + And The token metrics should not have changed Scenario: Check if LLM responds for streaming_query request with error for missing provider Given The system is in default state @@ -102,28 +111,30 @@ Feature: streaming_query endpoint API tests """ {"query": "Say hello", "model": "{MODEL}"} """ - Then The status code of the response is 422 + Then The status code of the response is 422 And The body of the response contains Value error, Provider must be specified if model is specified - Scenario: Check if LLM responds for query request with error for unknown model + Scenario: Check if LLM responds for streaming_query request with error for unknown model Given The system is in default state - And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva - When I use "streaming_query" to ask question with authorization header - """ - {"query": "Say hello", "provider": "{PROVIDER}", "model":"unknown"} - """ - Then The status code of the response is 404 + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I use "streaming_query" to ask question with authorization header + """ + {"query": "Say hello", "provider": "{PROVIDER}", "model":"unknown"} + """ + Then The status code of the response is 404 And The body of the response contains Model with ID unknown does not exist - Scenario: Check if LLM responds for query request with error for unknown provider + Scenario: Check if LLM responds for streaming_query request with error for unknown provider Given The system is in default state - And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + And I capture the current token metrics When I use "streaming_query" to ask question with authorization header """ {"query": "Say hello", "model": "{MODEL}", "provider":"unknown"} """ - Then The status code of the response is 404 + Then The status code of the response is 404 And The body of the response contains Model with ID gpt-4o-mini does not exist + And The token metrics should not have changed Scenario: Check if LLM responds properly when XML and JSON attachments are sent Given The system is in default state @@ -153,11 +164,11 @@ Feature: streaming_query endpoint API tests Scenario: Check if LLM responds to sent question with error when not authenticated Given The system is in default state - When I use "streaming_query" to ask question - """ - {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} - """ - Then The status code of the response is 401 + When I use "streaming_query" to ask question + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + Then The status code of the response is 401 And The body of the response is the following """ {