Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
362 changes: 362 additions & 0 deletions hack/tide-query-manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,362 @@
#!/usr/bin/env python3
"""
Tide Query Manager - A query-based tool to modify Prow Tide configurations.

Usage examples:
# Add staff-eng-approved to release-4.21 queries that have backport-risk-assessed
./tide-query-manager.py --prow-config-dir /path/to/prow/config \\
--query "is:release-4.21 has:backport-risk-assessed add:staff-eng-approved remove:backport-risk-assessed"

# Add verified label to all main/master branches for specific repos
./tide-query-manager.py --prow-config-dir /path/to/prow/config \\
--query "is:main repo:openshift/ci-tools add:verified"

# Multiple conditions with wildcards
./tide-query-manager.py --prow-config-dir /path/to/prow/config \\
--query "is:release-4.* has:staff-eng-approved add-branch:release-4.22"
"""

import argparse
import os
import re
import sys
from pathlib import Path
from typing import Dict, List, Optional, Set
import yaml


class QueryCondition:
"""Represents a set of conditions and actions for matching and modifying Tide queries."""

def __init__(self):
self.branches: List[str] = [] # branch patterns to match (supports wildcards)
self.repos: List[str] = [] # repos to match (org/repo format)
self.has_labels: List[str] = [] # queries must have these labels
self.missing_labels: List[str] = [] # queries must NOT have these labels
self.add_labels: List[str] = [] # labels to add
self.remove_labels: List[str] = [] # labels to remove
self.add_branches: List[str] = [] # branches to add
self.remove_branches: List[str] = [] # branches to remove

@classmethod
def from_query_string(cls, query: str) -> 'QueryCondition':
"""Parse a query string into a QueryCondition.

Query format: "key:value key:value ..."

Supported keys:
- is, branch: branch pattern to match (supports *)
- repo: repository to match (org/repo)
- has, has-label: label that must be present
- missing, missing-label: label that must be absent
- add, add-label: label to add
- remove, remove-label: label to remove
- add-branch: branch to add
- remove-branch: branch to remove
"""
cond = cls()

# Split by spaces while respecting quotes
parts = query.split()
Comment on lines +59 to +60
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Comment claims quote handling that isn't implemented.

The comment states "Split by spaces while respecting quotes" but query.split() does not handle quoted values. If this is intentional (labels don't contain spaces), remove the misleading comment. If quote support is needed, consider using shlex.split().

🤖 Prompt for AI Agents
In `@hack/tide-query-manager.py` around lines 59 - 60, The comment "Split by
spaces while respecting quotes" is inaccurate for the parts = query.split() call
in this code; either remove/adjust the comment to reflect that query.split()
does not handle quotes, or replace the splitting logic with a quote-aware
splitter by importing and using shlex.split(query) wherever parts is produced.
Locate the parts = query.split() usage and update the comment or change to
shlex.split, ensuring any necessary import of the shlex module and that
downstream code still expects a list of tokens.


for part in parts:
if ':' not in part:
raise ValueError(f"Invalid query part '{part}', expected format 'key:value'")

key, value = part.split(':', 1)

if key in ('is', 'branch'):
cond.branches.append(value)
elif key == 'repo':
cond.repos.append(value)
elif key in ('has', 'has-label'):
cond.has_labels.append(value)
elif key in ('missing', 'missing-label'):
cond.missing_labels.append(value)
elif key in ('add', 'add-label'):
cond.add_labels.append(value)
elif key in ('remove', 'remove-label'):
cond.remove_labels.append(value)
elif key == 'add-branch':
cond.add_branches.append(value)
elif key == 'remove-branch':
cond.remove_branches.append(value)
else:
raise ValueError(f"Unknown query key '{key}'")

return cond


def branch_matches(branch: str, pattern: str) -> bool:
"""Check if a branch matches a pattern (supports * wildcard)."""
regex_pattern = '^' + re.escape(pattern).replace(r'\*', '.*') + '$'
return bool(re.match(regex_pattern, branch))


def query_matches_condition(query: dict, repo: str, condition: QueryCondition, debug: bool = False) -> bool:
"""Check if a Tide query matches the filter conditions."""
# Check repo filter
if condition.repos:
if repo not in condition.repos:
if debug:
print(f" DEBUG: Repo {repo} not in {condition.repos}")
return False

# Check branch filter
if condition.branches:
included_branches = query.get('includedBranches', [])
has_matching_branch = any(
branch_matches(branch, pattern)
for branch in included_branches
for pattern in condition.branches
)
if not has_matching_branch:
if debug:
print(f" DEBUG: No matching branch in {included_branches} for patterns {condition.branches}")
return False

# Check has-label filter
query_labels = set(query.get('labels', []))
for label in condition.has_labels:
if label not in query_labels:
if debug:
print(f" DEBUG: Missing required label '{label}' in {query_labels}")
return False

# Check missing-label filter
for label in condition.missing_labels:
if label in query_labels:
if debug:
print(f" DEBUG: Found excluded label '{label}' in {query_labels}")
return False

return True


class TideQueryManager:
"""Manages modifications to Prow Tide queries."""

def __init__(self, prow_config_dir: str, sharded_config_dir: str, dry_run: bool = False, verbose: bool = False):
self.prow_config_dir = Path(prow_config_dir)
self.sharded_config_dir = Path(sharded_config_dir)
self.dry_run = dry_run
self.verbose = verbose
self.changed_count = 0

def load_config(self, file_path: Path) -> Optional[dict]:
"""Load a YAML configuration file."""
if not file_path.exists():
return None

with open(file_path, 'r') as f:
return yaml.safe_load(f)

def save_config(self, file_path: Path, config: dict):
"""Save a YAML configuration file."""
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, 'w') as f:
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
Comment on lines +146 to +158
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Specify explicit UTF-8 encoding for file operations.

Missing explicit encoding can cause issues on systems with non-UTF-8 default locale, potentially corrupting config files with non-ASCII characters.

Proposed fix
     def load_config(self, file_path: Path) -> Optional[dict]:
         """Load a YAML configuration file."""
         if not file_path.exists():
             return None

-        with open(file_path, 'r') as f:
+        with open(file_path, 'r', encoding='utf-8') as f:
             return yaml.safe_load(f)

     def save_config(self, file_path: Path, config: dict):
         """Save a YAML configuration file."""
         file_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(file_path, 'w') as f:
+        with open(file_path, 'w', encoding='utf-8') as f:
             yaml.dump(config, f, default_flow_style=False, sort_keys=False)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def load_config(self, file_path: Path) -> Optional[dict]:
"""Load a YAML configuration file."""
if not file_path.exists():
return None
with open(file_path, 'r') as f:
return yaml.safe_load(f)
def save_config(self, file_path: Path, config: dict):
"""Save a YAML configuration file."""
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, 'w') as f:
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
def load_config(self, file_path: Path) -> Optional[dict]:
"""Load a YAML configuration file."""
if not file_path.exists():
return None
with open(file_path, 'r', encoding='utf-8') as f:
return yaml.safe_load(f)
def save_config(self, file_path: Path, config: dict):
"""Save a YAML configuration file."""
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, 'w', encoding='utf-8') as f:
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
🤖 Prompt for AI Agents
In `@hack/tide-query-manager.py` around lines 146 - 158, Update the file I/O in
load_config and save_config to always open files with explicit UTF-8 encoding:
in load_config (which uses open(...) and yaml.safe_load) open the file with
encoding='utf-8', and in save_config (which calls open(...) and yaml.dump) open
the file with encoding='utf-8' as well; keep the existing Path parent mkdir
logic and yaml.dump options (default_flow_style=False, sort_keys=False).


def process_query(self, query: dict, repo: str, condition: QueryCondition, debug: bool = False) -> bool:
"""Process a single Tide query. Returns True if modified."""
if debug:
print(f"DEBUG: Checking query for repo {repo}")
print(f" Branches: {query.get('includedBranches', [])}")
print(f" Labels: {query.get('labels', [])}")

if not query_matches_condition(query, repo, condition, debug=debug):
return False

labels = set(query.get('labels', []))
branches = set(query.get('includedBranches', []))

original_labels = labels.copy()
original_branches = branches.copy()

# Add labels
labels.update(condition.add_labels)

# Remove labels
labels.difference_update(condition.remove_labels)

# Add branches
branches.update(condition.add_branches)

# Remove branches
branches.difference_update(condition.remove_branches)

labels_changed = labels != original_labels
branches_changed = branches != original_branches

if labels_changed or branches_changed:
self.changed_count += 1

action = "Would modify" if self.dry_run else "Modified"
print(f"{action} query for repo {repo}:")

if labels_changed:
print(f" Labels: {sorted(original_labels)} -> {sorted(labels)}")
if branches_changed:
print(f" Branches: {sorted(original_branches)} -> {sorted(branches)}")

if not self.dry_run:
query['labels'] = sorted(labels)
if branches:
query['includedBranches'] = sorted(branches)
elif 'includedBranches' in query:
del query['includedBranches']

return True

return False

def process_sharded_configs(self, condition: QueryCondition):
"""Process all sharded Prow config files."""
# Find all _prowconfig.yaml files in the sharded config directory
config_files = list(self.sharded_config_dir.glob('**/*_prowconfig.yaml'))

print(f"DEBUG: Searching in: {self.sharded_config_dir}")
print(f"DEBUG: Found {len(config_files)} config files")
Comment on lines +218 to +219
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

DEBUG prints are always shown regardless of verbose flag.

These debug statements should be gated by self.verbose to avoid cluttering output in normal operation.

Proposed fix
-        print(f"DEBUG: Searching in: {self.sharded_config_dir}")
-        print(f"DEBUG: Found {len(config_files)} config files")
+        if self.verbose:
+            print(f"DEBUG: Searching in: {self.sharded_config_dir}")
+            print(f"DEBUG: Found {len(config_files)} config files")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
print(f"DEBUG: Searching in: {self.sharded_config_dir}")
print(f"DEBUG: Found {len(config_files)} config files")
if self.verbose:
print(f"DEBUG: Searching in: {self.sharded_config_dir}")
print(f"DEBUG: Found {len(config_files)} config files")
🤖 Prompt for AI Agents
In `@hack/tide-query-manager.py` around lines 218 - 219, The two unconditional
debug prints referencing self.sharded_config_dir and len(config_files) should be
gated by the object's verbosity flag; update the code around those prints (the
DEBUG lines) to only execute when self.verbose is truthy so they don't always
appear — i.e., wrap or guard the print statements with an if self.verbose (or
equivalent) check in the method where self.sharded_config_dir and config_files
are used.


if not config_files:
print(f"Warning: No sharded config files found in {self.sharded_config_dir}")
return

for config_file in config_files:
# Extract org/repo from path
# Path structure: org/repo/_prowconfig.yaml or org/_prowconfig.yaml
parts = config_file.relative_to(self.sharded_config_dir).parts
if len(parts) == 2:
# org/repo/_prowconfig.yaml
org, repo = parts[0], parts[1].replace('_prowconfig.yaml', '')
if repo == '_prowconfig.yaml': # org-level config
repo = parts[0]
full_repo = org
else:
full_repo = f"{org}/{repo}"
elif len(parts) == 3:
# org/repo/_prowconfig.yaml
org, repo = parts[0], parts[1]
full_repo = f"{org}/{repo}"
else:
continue
Comment on lines +229 to +242
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Logic bug in org-level config path parsing.

The check at line 232 always evaluates to False because repo has already been transformed by replace() at line 231. For a path like org/_prowconfig.yaml, this produces full_repo = "org/" instead of just "org".

Proposed fix
             if len(parts) == 2:
                 # org/repo/_prowconfig.yaml
-                org, repo = parts[0], parts[1].replace('_prowconfig.yaml', '')
-                if repo == '_prowconfig.yaml':  # org-level config
-                    repo = parts[0]
-                    full_repo = org
+                org = parts[0]
+                filename = parts[1]
+                if filename == '_prowconfig.yaml':  # org-level config
+                    full_repo = org
                 else:
+                    repo = filename.replace('_prowconfig.yaml', '')
                     full_repo = f"{org}/{repo}"
             elif len(parts) == 3:
🤖 Prompt for AI Agents
In `@hack/tide-query-manager.py` around lines 229 - 242, The org-level config
detection is wrong because repo is modified before testing; change the logic in
the block handling len(parts) == 2 so you first check if parts[1] ==
'_prowconfig.yaml' (org-level) and set repo = org and full_repo = org in that
branch, otherwise set repo = parts[1].replace('_prowconfig.yaml', '') and
full_repo = f"{org}/{repo}"; update the len(parts) == 2 branch in
hack/tide-query-manager.py accordingly to avoid producing "org/".


config = self.load_config(config_file)
if not config:
continue

# Process Tide queries
tide_config = config.get('tide', {})
queries = tide_config.get('queries', [])

modified = False
for query in queries:
# Get the repo from the query (should be a single repo in sharded configs)
query_repos = query.get('repos', [])
if query_repos:
for qr in query_repos:
if self.process_query(query, qr, condition, debug=self.verbose):
modified = True
else:
# Org-level query
if self.process_query(query, full_repo, condition, debug=self.verbose):
modified = True

# Save modified config
if modified and not self.dry_run:
self.save_config(config_file, config)

def run(self, query_string: str):
"""Run the tide query manager with the given query."""
try:
condition = QueryCondition.from_query_string(query_string)
except ValueError as e:
print(f"Error parsing query: {e}", file=sys.stderr)
sys.exit(1)

print(f"Processing with query: {query_string}")
print(f"Dry run: {self.dry_run}\n")

self.process_sharded_configs(condition)

if self.changed_count == 0:
print("\nNo queries matched the specified conditions")
else:
if self.dry_run:
print(f"\nDry run: would have modified {self.changed_count} queries")
else:
print(f"\nSuccessfully modified {self.changed_count} queries")


def main():
parser = argparse.ArgumentParser(
description='Modify Prow Tide configurations using a query-based syntax',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Query syntax:
is:BRANCH - Match queries with this branch pattern (supports *)
repo:ORG/REPO - Match queries for this repository
has:LABEL - Match queries that have this label
missing:LABEL - Match queries that don't have this label
add:LABEL - Add this label to matched queries
remove:LABEL - Remove this label from matched queries
add-branch:BRANCH - Add this branch to matched queries
remove-branch:BRANCH - Remove this branch from matched queries

Examples:
# Replace backport-risk-assessed with staff-eng-approved for release-4.21
--query "is:release-4.21 has:backport-risk-assessed add:staff-eng-approved remove:backport-risk-assessed"

# Add verified to all main/master branches
--query "is:main add:verified"

# Add acknowledge-critical-fixes-only to specific repos on main
--query "is:main repo:openshift/ci-tools add:acknowledge-critical-fixes-only"

# Modify all release-4.x branches
--query "is:release-4.* has:staff-eng-approved remove:staff-eng-approved"
"""
)

parser.add_argument(
'--prow-config-dir',
required=True,
help='Path to the Prow configuration directory'
)
parser.add_argument(
'--sharded-prow-config-base-dir',
help='Path to the sharded Prow config base directory (defaults to prow-config-dir)'
)
parser.add_argument(
'--query',
required=True,
help='Query string specifying conditions and actions'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Print what would be changed without modifying files'
)
parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Enable verbose debug output'
)

args = parser.parse_args()

# Default sharded config dir to prow config dir if not specified
sharded_dir = args.sharded_prow_config_base_dir or args.prow_config_dir

manager = TideQueryManager(
prow_config_dir=args.prow_config_dir,
sharded_config_dir=sharded_dir,
dry_run=args.dry_run,
verbose=args.verbose
)

manager.run(args.query)


if __name__ == '__main__':
main()