diff --git a/src/codespy/agents/reviewer/modules/code_reviewer.py b/src/codespy/agents/reviewer/modules/code_reviewer.py index 337ccce..5018614 100644 --- a/src/codespy/agents/reviewer/modules/code_reviewer.py +++ b/src/codespy/agents/reviewer/modules/code_reviewer.py @@ -14,6 +14,7 @@ make_scope_relative, resolve_scope_root, restore_repo_paths, + validate_issue_lines, ) from codespy.config import get_settings from codespy.tools.mcp_utils import cleanup_mcp_contexts, connect_mcp_server @@ -108,6 +109,19 @@ class CodeReviewSignature(dspy.Signature): - Idiomatic short names (i, j, k, err, ctx, db, tx) - Test file naming conventions + ═══════════════════════════════════════════════════════════════════════════════ + LINE NUMBER ANNOTATIONS + ═══════════════════════════════════════════════════════════════════════════════ + + Each patch line is prefixed with its NEW-file line number: + L{n}: context or added line (n = line number in new file) + -removed line (no number — line does not exist in new file) + + Use EXACTLY the L{n} numbers for line_start and line_end in reported issues. + Do NOT guess or compute line numbers from hunk headers — read them from the + prefix. For multi-line issues, set line_start to the first L{n} and line_end + to the last L{n} that covers the issue. + ═══════════════════════════════════════════════════════════════════════════════ OUTPUT RULES ═══════════════════════════════════════════════════════════════════════════════ @@ -239,6 +253,7 @@ async def aforward( if issue.confidence >= MIN_CONFIDENCE ] restore_repo_paths(issues, scope.subroot) + validate_issue_lines(issues, scope.changed_files) all_issues.extend(issues) logger.debug( f" Scope {scope.subroot}: {len(issues)} code review issues" diff --git a/src/codespy/agents/reviewer/modules/doc_reviewer.py b/src/codespy/agents/reviewer/modules/doc_reviewer.py index b9530ff..7f0e75c 100644 --- a/src/codespy/agents/reviewer/modules/doc_reviewer.py +++ b/src/codespy/agents/reviewer/modules/doc_reviewer.py @@ -15,6 +15,7 @@ make_scope_relative, resolve_scope_root, restore_repo_paths, + validate_issue_lines, ) from codespy.config import get_settings @@ -62,6 +63,12 @@ class DocReviewSignature(dspy.Signature): - Documentation that is correct but could be "better" - Issues unrelated to the code changes in the patches + LINE NUMBER ANNOTATIONS: + Each patch line is prefixed with its NEW-file line number: + L{n}: context or added line (n = line number in new file) + -removed line (no number — line does not exist in new file) + Use EXACTLY the L{n} numbers for line_start and line_end in reported issues. + OUTPUT RULES: - Set category to "documentation" - description: ≤25 words, imperative tone ("Update X section", "Add Y to README") @@ -172,6 +179,7 @@ async def aforward( if issue.confidence >= MIN_CONFIDENCE ] restore_repo_paths(issues, scope.subroot) + validate_issue_lines(issues, scope.changed_files) all_issues.extend(issues) logger.debug( f" Scope {scope.subroot}: {len(issues)} doc issues" diff --git a/src/codespy/agents/reviewer/modules/helpers.py b/src/codespy/agents/reviewer/modules/helpers.py index 5e71b3a..8ddc6ed 100644 --- a/src/codespy/agents/reviewer/modules/helpers.py +++ b/src/codespy/agents/reviewer/modules/helpers.py @@ -120,21 +120,24 @@ def make_scope_relative(scope: ScopeResult) -> ScopeResult: """ from codespy.agents.reviewer.models import PackageManifest, ScopeResult as SR + def _annotate(f: ChangedFile) -> ChangedFile: + """Replace patch with annotated version (line-numbered).""" + return f.model_copy(update={"patch": f.annotated_patch or f.patch}) + if scope.subroot == ".": - return scope # Already at repo root, no transformation needed + # Still need to annotate patches even at repo root + return scope.model_copy( + update={"changed_files": [_annotate(f) for f in scope.changed_files]} + ) relative_files = [ - ChangedFile( - filename=strip_prefix(f.filename, scope.subroot), - status=f.status, - additions=f.additions, - deletions=f.deletions, - patch=f.patch, - previous_filename=( + _annotate(f).model_copy(update={ + "filename": strip_prefix(f.filename, scope.subroot), + "previous_filename": ( strip_prefix(f.previous_filename, scope.subroot) if f.previous_filename else None ), - ) + }) for f in scope.changed_files ] # Adjust manifest paths too @@ -163,6 +166,99 @@ def make_scope_relative(scope: ScopeResult) -> ScopeResult: ) +# Maximum distance (in lines) to snap an invalid line to a valid diff line. +# Beyond this threshold the line reference is dropped entirely. +_MAX_SNAP_DISTANCE = 5 + + +def _nearest_valid_line(line: int, valid_lines: set[int]) -> int | None: + """Return the nearest valid diff line within ``_MAX_SNAP_DISTANCE``, or None.""" + if not valid_lines: + return None + sorted_lines = sorted(valid_lines) + best: int | None = None + best_dist = _MAX_SNAP_DISTANCE + 1 + for vl in sorted_lines: + dist = abs(vl - line) + if dist < best_dist: + best = vl + best_dist = dist + # Once we've passed the target, remaining lines are farther away + if vl > line and dist > best_dist: + break + return best if best_dist <= _MAX_SNAP_DISTANCE else None + + +def validate_issue_lines( + issues: list[Issue], + changed_files: list[ChangedFile], +) -> None: + """Validate and fix ``line_start`` / ``line_end`` on each issue (in-place). + + For every issue that carries line numbers, this function checks whether those + lines actually appear in the diff of the corresponding ``ChangedFile``. If a + line is outside the diff it is *snapped* to the nearest valid diff line (within + ``_MAX_SNAP_DISTANCE``). If no close match exists the line reference is cleared + to ``None`` so the comment falls back to the review body instead of landing on + an unrelated line. + + Args: + issues: Issues to validate (modified in-place). + changed_files: The original (non-annotated) changed files whose + ``valid_new_line_numbers`` represent the diff. + """ + file_map: dict[str, ChangedFile] = {f.filename: f for f in changed_files} + + for issue in issues: + cf = file_map.get(issue.filename) + if cf is None: + # File not in diff — clear lines + issue.line_start = None + issue.line_end = None + continue + + valid = cf.valid_new_line_numbers + if not valid: + issue.line_start = None + issue.line_end = None + continue + + # --- line_start --- + if issue.line_start is not None: + if issue.line_start not in valid: + snapped = _nearest_valid_line(issue.line_start, valid) + if snapped is None: + logger.debug( + f"Clearing line_start={issue.line_start} for " + f"{issue.filename}: no valid diff line nearby" + ) + issue.line_start = None + issue.line_end = None + continue + else: + logger.debug( + f"Snapping line_start {issue.line_start}→{snapped} " + f"for {issue.filename}" + ) + issue.line_start = snapped + + # --- line_end --- + if issue.line_end is not None and issue.line_start is not None: + if issue.line_end not in valid: + snapped = _nearest_valid_line(issue.line_end, valid) + if snapped is None or snapped < issue.line_start: + # Collapse to single-line + issue.line_end = issue.line_start + else: + logger.debug( + f"Snapping line_end {issue.line_end}→{snapped} " + f"for {issue.filename}" + ) + issue.line_end = snapped + elif issue.line_end is not None and issue.line_start is None: + issue.line_end = None + + def restore_repo_paths(issues: list[Issue], subroot: str) -> None: """Restore repo-root-relative paths in issue filenames (in-place). diff --git a/src/codespy/agents/reviewer/modules/supply_chain_auditor.py b/src/codespy/agents/reviewer/modules/supply_chain_auditor.py index e86caee..a224554 100644 --- a/src/codespy/agents/reviewer/modules/supply_chain_auditor.py +++ b/src/codespy/agents/reviewer/modules/supply_chain_auditor.py @@ -9,7 +9,7 @@ from codespy.agents import SignatureContext, get_cost_tracker from codespy.agents.reviewer.models import Issue, IssueCategory, ScopeResult -from codespy.agents.reviewer.modules.helpers import MIN_CONFIDENCE, resolve_scope_root, strip_prefix, restore_repo_paths +from codespy.agents.reviewer.modules.helpers import MIN_CONFIDENCE, resolve_scope_root, strip_prefix, restore_repo_paths, validate_issue_lines from codespy.config import get_settings from codespy.tools.mcp_utils import cleanup_mcp_contexts, connect_mcp_server @@ -318,6 +318,7 @@ async def aforward(self, scopes: Sequence[ScopeResult], repo_path: Path) -> list ] # Restore repo-root-relative paths in reported issues restore_repo_paths(issues, scope.subroot) + validate_issue_lines(issues, scope.changed_files) all_issues.extend(issues) logger.debug(f" Supply chain security in scope {scope.subroot}: {len(issues)} issues") except Exception as e: diff --git a/src/codespy/tools/git/models.py b/src/codespy/tools/git/models.py index 6b397fe..23350a1 100644 --- a/src/codespy/tools/git/models.py +++ b/src/codespy/tools/git/models.py @@ -188,6 +188,64 @@ def valid_new_line_numbers(self) -> set[int]: return valid_lines + @property + def annotated_patch(self) -> str | None: + """Return the unified diff patch with new-file line numbers prefixed. + + Each context line and addition line is prefixed with ``L{n}:`` where *n* + is the corresponding line number in the **new** version of the file. + Deletion lines keep their original ``-`` prefix without a line number + (they don't exist in the new file). Hunk headers are passed through + unchanged. + + Example output:: + + @@ -10,5 +10,6 @@ + L10: context line + -removed line + L11: +added line + L12: +another added line + L13: context line + + Returns: + Annotated patch string, or ``None`` if the file has no patch. + """ + if not self.patch: + return None + + annotated_lines: list[str] = [] + current_new_line = 0 + + for line in self.patch.split("\n"): + # Hunk header — pass through as-is + if line.startswith("@@"): + match = re.search(r"\+(\d+)", line) + if match: + current_new_line = int(match.group(1)) + annotated_lines.append(line) + continue + + if not current_new_line: + annotated_lines.append(line) + continue + + # Context line (unchanged) — prefix with line number + if line.startswith(" "): + annotated_lines.append(f"L{current_new_line}:{line}") + current_new_line += 1 + # Addition line — prefix with line number + elif line.startswith("+"): + annotated_lines.append(f"L{current_new_line}:{line}") + current_new_line += 1 + # Deletion line — no line number (not in new file), indent to align + elif line.startswith("-"): + annotated_lines.append(f" {line}") + # Other lines (e.g. "\ No newline at end of file") + else: + annotated_lines.append(line) + + return "\n".join(annotated_lines) + def is_line_in_diff(self, line_number: int) -> bool: """Check if a line number is valid for inline comments.