-
Notifications
You must be signed in to change notification settings - Fork 27
feature to export dynamic queries #227
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
| from fastapi import Request | ||
| from fastapi import status | ||
| from fastapi.responses import JSONResponse | ||
| from fastapi.responses import StreamingResponse | ||
| from fastapi.routing import APIRouter | ||
|
|
||
| from common_module.common_container import CommonContainer | ||
|
|
@@ -38,6 +39,8 @@ | |
| from plugins_module.services.dynamic_query_service import DynamicQueryService | ||
| from db_repo_module.cache.cache_manager import CacheManager | ||
| from ..utils.helper import generate_cache_key, validate_yaml_query | ||
| import csv | ||
| import io | ||
| import yaml | ||
| from ..utils.helper import DynamicQueryRequest | ||
| from ..utils.helper import DynamicQueryExecuteRequest | ||
|
|
@@ -47,6 +50,29 @@ | |
| datasource_router = APIRouter() | ||
|
|
||
|
|
||
| def _serialized_rows_to_csv(rows: list) -> bytes: | ||
| """Convert a list of serialized dicts (e.g. from execute_dynamic_query) to CSV bytes.""" | ||
| if not rows: | ||
| return b'' | ||
| out = io.StringIO() | ||
| fieldnames = list(rows[0].keys()) | ||
| for row in rows[1:]: | ||
| for k in row: | ||
| if k not in fieldnames: | ||
| fieldnames.append(k) | ||
| writer = csv.DictWriter(out, fieldnames=fieldnames, extrasaction='ignore') | ||
|
|
||
| def _cell_value(v): | ||
| if isinstance(v, (dict, list)): | ||
| return json.dumps(v) | ||
| return v if v is None or isinstance(v, str) else str(v) | ||
|
|
||
| writer.writeheader() | ||
| for row in rows: | ||
| writer.writerow({k: _cell_value(row.get(k)) for k in fieldnames}) | ||
| return out.getvalue().encode('utf-8-sig') | ||
|
|
||
|
|
||
| @datasource_router.post('/v1/datasources') | ||
| @inject | ||
| async def add_datasource( | ||
|
|
@@ -718,6 +744,108 @@ async def execute_dynamic_query( | |
| ) | ||
|
|
||
|
|
||
| @datasource_router.post('/v1/{datasource_id}/dynamic-queries/{query_id}/export') | ||
| @inject | ||
| async def export_dynamic_query_csv( | ||
| request: Request, | ||
| datasource_id: str, | ||
| query_id: str, | ||
| filter: str | None = Query(None, alias='$filter'), | ||
| offset: int | None = 0, | ||
| limit: int | None = 100, | ||
| dynamic_query_params: DynamicQueryExecuteRequest = None, | ||
| response_formatter: ResponseFormatter = Depends( | ||
| Provide[CommonContainer.response_formatter] | ||
| ), | ||
| dynamic_query_yaml_service: DynamicQueryService = Depends( | ||
| Provide[PluginsContainer.dynamic_query_service] | ||
| ), | ||
| user_service: UserService = Depends(Provide[UserContainer.user_service]), | ||
| cache_manager: CacheManager = Depends(Provide[PluginsContainer.cache_manager]), | ||
| force_fetch: int = Query(0), | ||
| ): | ||
| """Execute the dynamic query and return results as a downloadable CSV file.""" | ||
| role_id, user_id, _ = get_current_user(request) | ||
| datasource_type, datasource_config = await get_datasource_config(datasource_id) | ||
| if not datasource_config: | ||
| return JSONResponse( | ||
| status_code=status.HTTP_404_NOT_FOUND, | ||
| content=response_formatter.buildErrorResponse( | ||
| f'Datasource not found: {datasource_id}' | ||
| ), | ||
| ) | ||
| yaml_query, _ = await dynamic_query_yaml_service.get_dynamic_yaml_query(query_id) | ||
| if not yaml_query: | ||
| return JSONResponse( | ||
| status_code=status.HTTP_404_NOT_FOUND, | ||
| content=response_formatter.buildErrorResponse( | ||
| f'Dynamic query not found: {query_id}' | ||
| ), | ||
| ) | ||
|
|
||
| rls_filter_str = None | ||
| is_admin = await check_admin(role_id) | ||
| if not is_admin: | ||
| rls_filters = await user_service.get_user_resources( | ||
| user_id=user_id, scope=ResourceScope.DATA | ||
| ) | ||
| if len(rls_filters) == 0: | ||
| return JSONResponse( | ||
| status_code=status.HTTP_403_FORBIDDEN, | ||
| content=response_formatter.buildErrorResponse( | ||
| 'Data access not set for non-admin user' | ||
| ), | ||
| ) | ||
| rls_filters = fetch_data_filters(rls_filters) | ||
| rls_filter_str = f"{ ' $and '.join(rls_filters)}" | ||
|
|
||
| datasource_plugin = DatasourcePlugin(datasource_type, datasource_config) | ||
| cache_key = generate_cache_key( | ||
| query_id, | ||
| filter, | ||
| rls_filter_str, | ||
| limit, | ||
| offset, | ||
| dynamic_query_params.params if dynamic_query_params else None, | ||
| ) | ||
| if not force_fetch: | ||
| cached_result = cache_manager.get_str(cache_key) | ||
| if cached_result: | ||
| serialized_res = json.loads(cached_result) | ||
| else: | ||
| res = await datasource_plugin.execute_dynamic_query( | ||
| yaml_query, | ||
| rls_filter_str, | ||
| filter, | ||
| offset, | ||
| limit, | ||
| dynamic_query_params.params if dynamic_query_params else None, | ||
| ) | ||
| serialized_res = serialize_values(res) | ||
| cache_manager.add(cache_key, json.dumps(serialized_res), expiry=60 * 2) | ||
| else: | ||
| res = await datasource_plugin.execute_dynamic_query( | ||
| yaml_query, | ||
| rls_filter_str, | ||
| filter, | ||
| offset, | ||
| limit, | ||
| dynamic_query_params.params if dynamic_query_params else None, | ||
| ) | ||
| serialized_res = serialize_values(res) | ||
| cache_manager.add(cache_key, json.dumps(serialized_res), expiry=60 * 2) | ||
|
|
||
| csv_bytes = _serialized_rows_to_csv(serialized_res) | ||
| filename = f'export_{query_id}.csv' | ||
|
Comment on lines
+838
to
+839
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Search for execute_dynamic_query definition
rg -nP --type=py 'def execute_dynamic_query\b' -A 10Repository: rootflo/wavefront Length of output: 10315 🏁 Script executed: #!/bin/bash
# Search for QueryResult class definition
rg -nP --type=py 'class QueryResult\b' -A 15Repository: rootflo/wavefront Length of output: 43 🏁 Script executed: #!/bin/bash
# Search for _serialized_rows_to_csv function definition
rg -nP --type=py 'def _serialized_rows_to_csv\b' -A 15Repository: rootflo/wavefront Length of output: 2182 🏁 Script executed: #!/bin/bash
# Read the context around lines 838-839 in datasource_controller.py
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '820,860p'Repository: rootflo/wavefront Length of output: 1730 🏁 Script executed: #!/bin/bash
# Find serialize_values function definition
rg -nP --type=py 'def serialize_values\b' -A 20Repository: rootflo/wavefront Length of output: 4982 🏁 Script executed: #!/bin/bash
# Look at actual implementation examples of execute_dynamic_query to see what they return
cat -n wavefront/server/modules/insights_module/insights_module/repository/pvo_repository.py | sed -n '161,220p'Repository: rootflo/wavefront Length of output: 2548 🏁 Script executed: #!/bin/bash
# Check if there are any implementations in datasource module that show return values
rg -nP --type=py 'return.*\[.*\]|return.*{.*:' wavefront/server/plugins/datasource/ -A 2 | head -100Repository: rootflo/wavefront Length of output: 2052 🏁 Script executed: #!/bin/bash
# Check datasource plugin implementations for execute_dynamic_query actual return values
cat -n wavefront/server/plugins/datasource/datasource/redshift/__init__.py | sed -n '55,150p'Repository: rootflo/wavefront Length of output: 777 🏁 Script executed: #!/bin/bash
# Check bigquery implementation
cat -n wavefront/server/plugins/datasource/datasource/bigquery/__init__.py | sed -n '73,200p'Repository: rootflo/wavefront Length of output: 3579 🏁 Script executed: #!/bin/bash
# Check the generic datasource implementation
cat -n wavefront/server/plugins/datasource/datasource/__init__.py | sed -n '99,180p'Repository: rootflo/wavefront Length of output: 1031 🏁 Script executed: #!/bin/bash
# Check what dynamic_query_service.execute_dynamic_query returns
cat -n wavefront/server/modules/insights_module/insights_module/service/dynamic_query_service.py | sed -n '32,120p'Repository: rootflo/wavefront Length of output: 3619 🏁 Script executed: #!/bin/bash
# Check the actual datasource export endpoint to understand the full flow
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '664,720p'Repository: rootflo/wavefront Length of output: 2711 🏁 Script executed: #!/bin/bash
# Check what datasource_plugin is and how it's used
rg -nP --type=py 'datasource_plugin\s*=' wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py -B 5 -A 5Repository: rootflo/wavefront Length of output: 3841 🏁 Script executed: #!/bin/bash
# Check all datasource plugin types and their return values more carefully
cat -n wavefront/server/plugins/datasource/datasource/__init__.py | sed -n '1,50p'Repository: rootflo/wavefront Length of output: 2279 🏁 Script executed: #!/bin/bash
# Look at what DatasourcePlugin does with different datasource types
fd -t f "DatasourcePlugin" --exec cat {} \; 2>/dev/null | head -100Repository: rootflo/wavefront Length of output: 43 🏁 Script executed: #!/bin/bash
# Check the datasource __init__.py for DatasourcePlugin class
rg -nP --type=py 'class DatasourcePlugin' -A 30Repository: rootflo/wavefront Length of output: 3336 🏁 Script executed: #!/bin/bash
# Check if there are any tests that show what execute_dynamic_query returns
find . -name "*.py" -path "*/test*" -type f | xargs grep -l "execute_dynamic_query" | head -5Repository: rootflo/wavefront Length of output: 43 🏁 Script executed: #!/bin/bash
# Look for actual calls to _serialized_rows_to_csv to see what's passed to it
rg -nP --type=py '_serialized_rows_to_csv\(' -B 10 -A 2Repository: rootflo/wavefront Length of output: 3345 🏁 Script executed: #!/bin/bash
# Check what the other execution path returns - the insights module path
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '810,826p'Repository: rootflo/wavefront Length of output: 795 🏁 Script executed: #!/bin/bash
# Look at where insights dynamic_query_service.execute_dynamic_query is called
rg -nP --type=py 'dynamic_query_service.execute_dynamic_query' -B 5 -A 10Repository: rootflo/wavefront Length of output: 2165 🏁 Script executed: #!/bin/bash
# Let me verify the complete return type from BigQuery by checking the full implementation
cat -n wavefront/server/plugins/datasource/datasource/bigquery/__init__.py | sed -n '127,149p'Repository: rootflo/wavefront Length of output: 1065 🏁 Script executed: #!/bin/bash
# Check if there's a 'records' key pattern anywhere or understand the actual structure better
rg -nP --type=py "results\[.*\] = {" -B 2 -A 2Repository: rootflo/wavefront Length of output: 1144 🏁 Script executed: #!/bin/bash
# Verify what yaml_query structure is - is it a list of queries?
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '690,710p'Repository: rootflo/wavefront Length of output: 1092 🏁 Script executed: #!/bin/bash
# Check if there are actual tests showing the expected behavior
find . -type f -name "*.py" | xargs grep -l "DatasourcePlugin\|datasource_controller" | grep -i testRepository: rootflo/wavefront Length of output: 43 🏁 Script executed: #!/bin/bash
# Find get_dynamic_yaml_query to understand yaml_query structure
rg -nP --type=py 'def get_dynamic_yaml_query' -A 30Repository: rootflo/wavefront Length of output: 4170 🏁 Script executed: #!/bin/bash
# Check DynamicQuery model to see what's in yaml_query
rg -nP --type=py 'class.*Query.*:' wavefront/server/modules/plugins_module/ | head -20Repository: rootflo/wavefront Length of output: 409 🏁 Script executed: #!/bin/bash
# Check if there's validation on the query structure for CSV export
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '750,795p'Repository: rootflo/wavefront Length of output: 2254 🏁 Script executed: #!/bin/bash
# Let's see the complete structure of the export endpoint around line 764
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '750,847p'Repository: rootflo/wavefront Length of output: 4392 🏁 Script executed: #!/bin/bash
# Final verification - let's trace through what actually gets passed to _serialized_rows_to_csv
# by checking if there's any code between execute_dynamic_query and _serialized_rows_to_csv that unwraps
cat -n wavefront/server/modules/plugins_module/plugins_module/controllers/datasource_controller.py | sed -n '815,846p'Repository: rootflo/wavefront Length of output: 1409 🏁 Script executed: #!/bin/bash
# Check if there are example YAML files showing the structure
find . -name "*.yaml" -o -name "*.yml" | xargs grep -l "queries:" | head -3Repository: rootflo/wavefront Length of output: 43 Normalize dynamic query result structure before CSV conversion.
🧩 Suggested fix- csv_bytes = _serialized_rows_to_csv(serialized_res)
+ # Extract rows from query result dict
+ rows = []
+ if isinstance(serialized_res, dict):
+ # Handle dict keyed by query_id with result/records subkeys
+ for query_id, result in serialized_res.items():
+ if isinstance(result, dict) and 'result' in result:
+ rows.extend(result['result'] if result['result'] else [])
+ elif isinstance(result, list):
+ rows.extend(result)
+ elif isinstance(serialized_res, list):
+ rows = serialized_res
+
+ if not isinstance(rows, list):
+ return JSONResponse(
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+ content=response_formatter.buildErrorResponse(
+ 'Unexpected dynamic query result format'
+ ),
+ )
+ csv_bytes = _serialized_rows_to_csv(rows)🤖 Prompt for AI Agents |
||
| return StreamingResponse( | ||
| iter([csv_bytes]), | ||
| media_type='text/csv', | ||
| headers={ | ||
| 'Content-Disposition': f'attachment; filename="{filename}"', | ||
| }, | ||
| ) | ||
|
|
||
|
|
||
| @datasource_router.delete('/v1/{datasource_id}/dynamic-queries/{query_id}') | ||
| @inject | ||
| async def delete_dynamic_query( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Escape formula-like values to prevent CSV injection.
Untrusted values starting with
=,+,-,@, or tab can be interpreted as formulas by spreadsheet apps. Sanitize before writing to CSV.🔒 Proposed fix
🤖 Prompt for AI Agents