Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from fastapi import Request
from fastapi import status
from fastapi.responses import JSONResponse
from fastapi.responses import StreamingResponse
from fastapi.routing import APIRouter

from common_module.common_container import CommonContainer
Expand Down Expand Up @@ -38,6 +39,8 @@
from plugins_module.services.dynamic_query_service import DynamicQueryService
from db_repo_module.cache.cache_manager import CacheManager
from ..utils.helper import generate_cache_key, validate_yaml_query
import csv
import io
import yaml
from ..utils.helper import DynamicQueryRequest
from ..utils.helper import DynamicQueryExecuteRequest
Expand All @@ -47,6 +50,29 @@
datasource_router = APIRouter()


def _serialized_rows_to_csv(rows: list) -> bytes:
"""Convert a list of serialized dicts (e.g. from execute_dynamic_query) to CSV bytes."""
if not rows:
return b''
out = io.StringIO()
fieldnames = list(rows[0].keys())
for row in rows[1:]:
for k in row:
if k not in fieldnames:
fieldnames.append(k)
writer = csv.DictWriter(out, fieldnames=fieldnames, extrasaction='ignore')

def _cell_value(v):
if isinstance(v, (dict, list)):
return json.dumps(v)
return v if v is None or isinstance(v, str) else str(v)

writer.writeheader()
for row in rows:
writer.writerow({k: _cell_value(row.get(k)) for k in fieldnames})
Comment on lines +65 to +72
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Escape formula-like values to prevent CSV injection.
Untrusted values starting with =, +, -, @, or tab can be interpreted as formulas by spreadsheet apps. Sanitize before writing to CSV.

🔒 Proposed fix
     def _cell_value(v):
-        if isinstance(v, (dict, list)):
-            return json.dumps(v)
-        return v if v is None or isinstance(v, str) else str(v)
+        if isinstance(v, (dict, list)):
+            v = json.dumps(v)
+        elif v is None or isinstance(v, str):
+            v = v
+        else:
+            v = str(v)
+        if isinstance(v, str) and v[:1] in ('=', '+', '-', '@', '\t'):
+            return f"'{v}"
+        return v
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In
`@wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py`
around lines 65 - 72, The CSV writer currently writes untrusted values directly
(via _cell_value and writer.writerow) which can lead to CSV injection; update
_cell_value to, after converting dict/list to JSON and non-string types to str,
detect string values that start with any of the characters '=', '+', '-', '@' or
a tab and sanitize them by prefixing a single quote (') unless they already
start with a single quote or are None; keep the existing behavior for None and
other values and ensure this sanitized value is what writer.writerow receives
for each fieldname when iterating rows.

return out.getvalue().encode('utf-8-sig')


@datasource_router.post('/v1/datasources')
@inject
async def add_datasource(
Expand Down Expand Up @@ -718,6 +744,108 @@ async def execute_dynamic_query(
)


@datasource_router.post('/v1/{datasource_id}/dynamic-queries/{query_id}/export')
@inject
async def export_dynamic_query_csv(
request: Request,
datasource_id: str,
query_id: str,
filter: str | None = Query(None, alias='$filter'),
offset: int | None = 0,
limit: int | None = 100,
dynamic_query_params: DynamicQueryExecuteRequest = None,
response_formatter: ResponseFormatter = Depends(
Provide[CommonContainer.response_formatter]
),
dynamic_query_yaml_service: DynamicQueryService = Depends(
Provide[PluginsContainer.dynamic_query_service]
),
user_service: UserService = Depends(Provide[UserContainer.user_service]),
cache_manager: CacheManager = Depends(Provide[PluginsContainer.cache_manager]),
force_fetch: int = Query(0),
):
"""Execute the dynamic query and return results as a downloadable CSV file."""
role_id, user_id, _ = get_current_user(request)
datasource_type, datasource_config = await get_datasource_config(datasource_id)
if not datasource_config:
return JSONResponse(
status_code=status.HTTP_404_NOT_FOUND,
content=response_formatter.buildErrorResponse(
f'Datasource not found: {datasource_id}'
),
)
yaml_query, _ = await dynamic_query_yaml_service.get_dynamic_yaml_query(query_id)
if not yaml_query:
return JSONResponse(
status_code=status.HTTP_404_NOT_FOUND,
content=response_formatter.buildErrorResponse(
f'Dynamic query not found: {query_id}'
),
)

rls_filter_str = None
is_admin = await check_admin(role_id)
if not is_admin:
rls_filters = await user_service.get_user_resources(
user_id=user_id, scope=ResourceScope.DATA
)
if len(rls_filters) == 0:
return JSONResponse(
status_code=status.HTTP_403_FORBIDDEN,
content=response_formatter.buildErrorResponse(
'Data access not set for non-admin user'
),
)
rls_filters = fetch_data_filters(rls_filters)
rls_filter_str = f"{ ' $and '.join(rls_filters)}"

datasource_plugin = DatasourcePlugin(datasource_type, datasource_config)
cache_key = generate_cache_key(
query_id,
filter,
rls_filter_str,
limit,
offset,
dynamic_query_params.params if dynamic_query_params else None,
)
if not force_fetch:
cached_result = cache_manager.get_str(cache_key)
if cached_result:
serialized_res = json.loads(cached_result)
else:
res = await datasource_plugin.execute_dynamic_query(
yaml_query,
rls_filter_str,
filter,
offset,
limit,
dynamic_query_params.params if dynamic_query_params else None,
)
serialized_res = serialize_values(res)
cache_manager.add(cache_key, json.dumps(serialized_res), expiry=60 * 2)
else:
res = await datasource_plugin.execute_dynamic_query(
yaml_query,
rls_filter_str,
filter,
offset,
limit,
dynamic_query_params.params if dynamic_query_params else None,
)
serialized_res = serialize_values(res)
cache_manager.add(cache_key, json.dumps(serialized_res), expiry=60 * 2)

csv_bytes = _serialized_rows_to_csv(serialized_res)
filename = f'export_{query_id}.csv'
Comment on lines +838 to +839
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Search for execute_dynamic_query definition
rg -nP --type=py 'def execute_dynamic_query\b' -A 10

Repository: rootflo/wavefront

Length of output: 10315


🏁 Script executed:

#!/bin/bash
# Search for QueryResult class definition
rg -nP --type=py 'class QueryResult\b' -A 15

Repository: rootflo/wavefront

Length of output: 43


🏁 Script executed:

#!/bin/bash
# Search for _serialized_rows_to_csv function definition
rg -nP --type=py 'def _serialized_rows_to_csv\b' -A 15

Repository: rootflo/wavefront

Length of output: 2182


🏁 Script executed:

#!/bin/bash
# Read the context around lines 838-839 in datasource_controller.py
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '820,860p'

Repository: rootflo/wavefront

Length of output: 1730


🏁 Script executed:

#!/bin/bash
# Find serialize_values function definition
rg -nP --type=py 'def serialize_values\b' -A 20

Repository: rootflo/wavefront

Length of output: 4982


🏁 Script executed:

#!/bin/bash
# Look at actual implementation examples of execute_dynamic_query to see what they return
cat -n wavefront/server/modules/insights_module/insights_module/repository/pvo_repository.py | sed -n '161,220p'

Repository: rootflo/wavefront

Length of output: 2548


🏁 Script executed:

#!/bin/bash
# Check if there are any implementations in datasource module that show return values
rg -nP --type=py 'return.*\[.*\]|return.*{.*:' wavefront/server/plugins/datasource/ -A 2 | head -100

Repository: rootflo/wavefront

Length of output: 2052


🏁 Script executed:

#!/bin/bash
# Check datasource plugin implementations for execute_dynamic_query actual return values
cat -n wavefront/server/plugins/datasource/datasource/redshift/__init__.py | sed -n '55,150p'

Repository: rootflo/wavefront

Length of output: 777


🏁 Script executed:

#!/bin/bash
# Check bigquery implementation
cat -n wavefront/server/plugins/datasource/datasource/bigquery/__init__.py | sed -n '73,200p'

Repository: rootflo/wavefront

Length of output: 3579


🏁 Script executed:

#!/bin/bash
# Check the generic datasource implementation
cat -n wavefront/server/plugins/datasource/datasource/__init__.py | sed -n '99,180p'

Repository: rootflo/wavefront

Length of output: 1031


🏁 Script executed:

#!/bin/bash
# Check what dynamic_query_service.execute_dynamic_query returns
cat -n wavefront/server/modules/insights_module/insights_module/service/dynamic_query_service.py | sed -n '32,120p'

Repository: rootflo/wavefront

Length of output: 3619


🏁 Script executed:

#!/bin/bash
# Check the actual datasource export endpoint to understand the full flow
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '664,720p'

Repository: rootflo/wavefront

Length of output: 2711


🏁 Script executed:

#!/bin/bash
# Check what datasource_plugin is and how it's used
rg -nP --type=py 'datasource_plugin\s*=' wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py -B 5 -A 5

Repository: rootflo/wavefront

Length of output: 3841


🏁 Script executed:

#!/bin/bash
# Check all datasource plugin types and their return values more carefully
cat -n wavefront/server/plugins/datasource/datasource/__init__.py | sed -n '1,50p'

Repository: rootflo/wavefront

Length of output: 2279


🏁 Script executed:

#!/bin/bash
# Look at what DatasourcePlugin does with different datasource types
fd -t f "DatasourcePlugin" --exec cat {} \; 2>/dev/null | head -100

Repository: rootflo/wavefront

Length of output: 43


🏁 Script executed:

#!/bin/bash
# Check the datasource __init__.py for DatasourcePlugin class
rg -nP --type=py 'class DatasourcePlugin' -A 30

Repository: rootflo/wavefront

Length of output: 3336


🏁 Script executed:

#!/bin/bash
# Check if there are any tests that show what execute_dynamic_query returns
find . -name "*.py" -path "*/test*" -type f | xargs grep -l "execute_dynamic_query" | head -5

Repository: rootflo/wavefront

Length of output: 43


🏁 Script executed:

#!/bin/bash
# Look for actual calls to _serialized_rows_to_csv to see what's passed to it
rg -nP --type=py '_serialized_rows_to_csv\(' -B 10 -A 2

Repository: rootflo/wavefront

Length of output: 3345


🏁 Script executed:

#!/bin/bash
# Check what the other execution path returns - the insights module path
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '810,826p'

Repository: rootflo/wavefront

Length of output: 795


🏁 Script executed:

#!/bin/bash
# Look at where insights dynamic_query_service.execute_dynamic_query is called
rg -nP --type=py 'dynamic_query_service.execute_dynamic_query' -B 5 -A 10

Repository: rootflo/wavefront

Length of output: 2165


🏁 Script executed:

#!/bin/bash
# Let me verify the complete return type from BigQuery by checking the full implementation
cat -n wavefront/server/plugins/datasource/datasource/bigquery/__init__.py | sed -n '127,149p'

Repository: rootflo/wavefront

Length of output: 1065


🏁 Script executed:

#!/bin/bash
# Check if there's a 'records' key pattern anywhere or understand the actual structure better
rg -nP --type=py "results\[.*\] = {" -B 2 -A 2

Repository: rootflo/wavefront

Length of output: 1144


🏁 Script executed:

#!/bin/bash
# Verify what yaml_query structure is - is it a list of queries?
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '690,710p'

Repository: rootflo/wavefront

Length of output: 1092


🏁 Script executed:

#!/bin/bash
# Check if there are actual tests showing the expected behavior
find . -type f -name "*.py" | xargs grep -l "DatasourcePlugin\|datasource_controller" | grep -i test

Repository: rootflo/wavefront

Length of output: 43


🏁 Script executed:

#!/bin/bash
# Find get_dynamic_yaml_query to understand yaml_query structure
rg -nP --type=py 'def get_dynamic_yaml_query' -A 30

Repository: rootflo/wavefront

Length of output: 4170


🏁 Script executed:

#!/bin/bash
# Check DynamicQuery model to see what's in yaml_query
rg -nP --type=py 'class.*Query.*:' wavefront/server/modules/plugins_module/ | head -20

Repository: rootflo/wavefront

Length of output: 409


🏁 Script executed:

#!/bin/bash
# Check if there's validation on the query structure for CSV export
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '750,795p'

Repository: rootflo/wavefront

Length of output: 2254


🏁 Script executed:

#!/bin/bash
# Let's see the complete structure of the export endpoint around line 764
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '750,847p'

Repository: rootflo/wavefront

Length of output: 4392


🏁 Script executed:

#!/bin/bash
# Final verification - let's trace through what actually gets passed to _serialized_rows_to_csv
# by checking if there's any code between execute_dynamic_query and _serialized_rows_to_csv that unwraps
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '815,846p'

Repository: rootflo/wavefront

Length of output: 1409


🏁 Script executed:

#!/bin/bash
# Check if there are example YAML files showing the structure
find . -name "*.yaml" -o -name "*.yml" | xargs grep -l "queries:" | head -3

Repository: rootflo/wavefront

Length of output: 43


Normalize dynamic query result structure before CSV conversion.

execute_dynamic_query returns a dict keyed by query ID with structure {query_id: {status, error, description, result: [rows]}}, but _serialized_rows_to_csv expects a list of dicts. The current code passes the dict directly, which will fail at rows[0] (line 58). Extract the rows from the result structure or return an error for unexpected formats.

🧩 Suggested fix
-    csv_bytes = _serialized_rows_to_csv(serialized_res)
+    # Extract rows from query result dict
+    rows = []
+    if isinstance(serialized_res, dict):
+        # Handle dict keyed by query_id with result/records subkeys
+        for query_id, result in serialized_res.items():
+            if isinstance(result, dict) and 'result' in result:
+                rows.extend(result['result'] if result['result'] else [])
+            elif isinstance(result, list):
+                rows.extend(result)
+    elif isinstance(serialized_res, list):
+        rows = serialized_res
+    
+    if not isinstance(rows, list):
+        return JSONResponse(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            content=response_formatter.buildErrorResponse(
+                'Unexpected dynamic query result format'
+            ),
+        )
+    csv_bytes = _serialized_rows_to_csv(rows)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In
`@wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py`
around lines 838 - 839, The code passes the whole dynamic query dict into
_serialized_rows_to_csv, but execute_dynamic_query returns {query_id: {status,
error, description, result: [...]}}, while _serialized_rows_to_csv expects a
list of row dicts; update the logic around serialized_res and csv_bytes to
extract rows = serialized_res[query_id]['result'] (or
serialized_res.get(query_id, {}).get('result')) and pass that list to
_serialized_rows_to_csv, and add a guard to return a clear error or empty CSV if
the expected keys or list are missing/invalid; reference variables/functions
serialized_res, query_id, execute_dynamic_query and _serialized_rows_to_csv when
making the change.

return StreamingResponse(
iter([csv_bytes]),
media_type='text/csv',
headers={
'Content-Disposition': f'attachment; filename="{filename}"',
},
)


@datasource_router.delete('/v1/{datasource_id}/dynamic-queries/{query_id}')
@inject
async def delete_dynamic_query(
Expand Down