Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/codespy/agents/reviewer/modules/code_reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
make_scope_relative,
resolve_scope_root,
restore_repo_paths,
validate_issue_lines,
)
from codespy.config import get_settings
from codespy.tools.mcp_utils import cleanup_mcp_contexts, connect_mcp_server
Expand Down Expand Up @@ -108,6 +109,19 @@ class CodeReviewSignature(dspy.Signature):
- Idiomatic short names (i, j, k, err, ctx, db, tx)
- Test file naming conventions

═══════════════════════════════════════════════════════════════════════════════
LINE NUMBER ANNOTATIONS
═══════════════════════════════════════════════════════════════════════════════

Each patch line is prefixed with its NEW-file line number:
L{n}: context or added line (n = line number in new file)
-removed line (no number — line does not exist in new file)

Use EXACTLY the L{n} numbers for line_start and line_end in reported issues.
Do NOT guess or compute line numbers from hunk headers — read them from the
prefix. For multi-line issues, set line_start to the first L{n} and line_end
to the last L{n} that covers the issue.

═══════════════════════════════════════════════════════════════════════════════
OUTPUT RULES
═══════════════════════════════════════════════════════════════════════════════
Expand Down Expand Up @@ -239,6 +253,7 @@ async def aforward(
if issue.confidence >= MIN_CONFIDENCE
]
restore_repo_paths(issues, scope.subroot)
validate_issue_lines(issues, scope.changed_files)
all_issues.extend(issues)
logger.debug(
f" Scope {scope.subroot}: {len(issues)} code review issues"
Expand Down
8 changes: 8 additions & 0 deletions src/codespy/agents/reviewer/modules/doc_reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
make_scope_relative,
resolve_scope_root,
restore_repo_paths,
validate_issue_lines,
)
from codespy.config import get_settings

Expand Down Expand Up @@ -62,6 +63,12 @@ class DocReviewSignature(dspy.Signature):
- Documentation that is correct but could be "better"
- Issues unrelated to the code changes in the patches

LINE NUMBER ANNOTATIONS:
Each patch line is prefixed with its NEW-file line number:
L{n}: context or added line (n = line number in new file)
-removed line (no number — line does not exist in new file)
Use EXACTLY the L{n} numbers for line_start and line_end in reported issues.

OUTPUT RULES:
- Set category to "documentation"
- description: ≤25 words, imperative tone ("Update X section", "Add Y to README")
Expand Down Expand Up @@ -172,6 +179,7 @@ async def aforward(
if issue.confidence >= MIN_CONFIDENCE
]
restore_repo_paths(issues, scope.subroot)
validate_issue_lines(issues, scope.changed_files)
all_issues.extend(issues)
logger.debug(
f" Scope {scope.subroot}: {len(issues)} doc issues"
Expand Down
114 changes: 105 additions & 9 deletions src/codespy/agents/reviewer/modules/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,21 +120,24 @@ def make_scope_relative(scope: ScopeResult) -> ScopeResult:
"""
from codespy.agents.reviewer.models import PackageManifest, ScopeResult as SR

def _annotate(f: ChangedFile) -> ChangedFile:
"""Replace patch with annotated version (line-numbered)."""
return f.model_copy(update={"patch": f.annotated_patch or f.patch})

if scope.subroot == ".":
return scope # Already at repo root, no transformation needed
# Still need to annotate patches even at repo root
return scope.model_copy(
update={"changed_files": [_annotate(f) for f in scope.changed_files]}
)
relative_files = [
ChangedFile(
filename=strip_prefix(f.filename, scope.subroot),
status=f.status,
additions=f.additions,
deletions=f.deletions,
patch=f.patch,
previous_filename=(
_annotate(f).model_copy(update={
"filename": strip_prefix(f.filename, scope.subroot),
"previous_filename": (
strip_prefix(f.previous_filename, scope.subroot)
if f.previous_filename
else None
),
)
})
for f in scope.changed_files
]
# Adjust manifest paths too
Expand Down Expand Up @@ -163,6 +166,99 @@ def make_scope_relative(scope: ScopeResult) -> ScopeResult:
)


# Maximum distance (in lines) to snap an invalid line to a valid diff line.
# Beyond this threshold the line reference is dropped entirely.
_MAX_SNAP_DISTANCE = 5


def _nearest_valid_line(line: int, valid_lines: set[int]) -> int | None:
"""Return the nearest valid diff line within ``_MAX_SNAP_DISTANCE``, or None."""
if not valid_lines:
return None
sorted_lines = sorted(valid_lines)
best: int | None = None
best_dist = _MAX_SNAP_DISTANCE + 1
for vl in sorted_lines:
dist = abs(vl - line)
if dist < best_dist:
best = vl
best_dist = dist
# Once we've passed the target, remaining lines are farther away
if vl > line and dist > best_dist:
break
return best if best_dist <= _MAX_SNAP_DISTANCE else None


def validate_issue_lines(
issues: list[Issue],
changed_files: list[ChangedFile],
) -> None:
"""Validate and fix ``line_start`` / ``line_end`` on each issue (in-place).

For every issue that carries line numbers, this function checks whether those
lines actually appear in the diff of the corresponding ``ChangedFile``. If a
line is outside the diff it is *snapped* to the nearest valid diff line (within
``_MAX_SNAP_DISTANCE``). If no close match exists the line reference is cleared
to ``None`` so the comment falls back to the review body instead of landing on
an unrelated line.

Args:
issues: Issues to validate (modified in-place).
changed_files: The original (non-annotated) changed files whose
``valid_new_line_numbers`` represent the diff.
"""
file_map: dict[str, ChangedFile] = {f.filename: f for f in changed_files}

for issue in issues:
cf = file_map.get(issue.filename)
if cf is None:
# File not in diff — clear lines
issue.line_start = None
issue.line_end = None
continue

valid = cf.valid_new_line_numbers
if not valid:
issue.line_start = None
issue.line_end = None
continue

# --- line_start ---
if issue.line_start is not None:
if issue.line_start not in valid:
snapped = _nearest_valid_line(issue.line_start, valid)
if snapped is None:
logger.debug(
f"Clearing line_start={issue.line_start} for "
f"{issue.filename}: no valid diff line nearby"
)
issue.line_start = None
issue.line_end = None
continue
else:
logger.debug(
f"Snapping line_start {issue.line_start}→{snapped} "
f"for {issue.filename}"
)
issue.line_start = snapped

# --- line_end ---
if issue.line_end is not None and issue.line_start is not None:
if issue.line_end not in valid:
snapped = _nearest_valid_line(issue.line_end, valid)
if snapped is None or snapped < issue.line_start:
# Collapse to single-line
issue.line_end = issue.line_start
else:
logger.debug(
f"Snapping line_end {issue.line_end}→{snapped} "
f"for {issue.filename}"
)
issue.line_end = snapped
elif issue.line_end is not None and issue.line_start is None:
issue.line_end = None


def restore_repo_paths(issues: list[Issue], subroot: str) -> None:
"""Restore repo-root-relative paths in issue filenames (in-place).

Expand Down
3 changes: 2 additions & 1 deletion src/codespy/agents/reviewer/modules/supply_chain_auditor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from codespy.agents import SignatureContext, get_cost_tracker
from codespy.agents.reviewer.models import Issue, IssueCategory, ScopeResult
from codespy.agents.reviewer.modules.helpers import MIN_CONFIDENCE, resolve_scope_root, strip_prefix, restore_repo_paths
from codespy.agents.reviewer.modules.helpers import MIN_CONFIDENCE, resolve_scope_root, strip_prefix, restore_repo_paths, validate_issue_lines
from codespy.config import get_settings
from codespy.tools.mcp_utils import cleanup_mcp_contexts, connect_mcp_server

Expand Down Expand Up @@ -318,6 +318,7 @@ async def aforward(self, scopes: Sequence[ScopeResult], repo_path: Path) -> list
]
# Restore repo-root-relative paths in reported issues
restore_repo_paths(issues, scope.subroot)
validate_issue_lines(issues, scope.changed_files)
all_issues.extend(issues)
logger.debug(f" Supply chain security in scope {scope.subroot}: {len(issues)} issues")
except Exception as e:
Expand Down
58 changes: 58 additions & 0 deletions src/codespy/tools/git/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,64 @@ def valid_new_line_numbers(self) -> set[int]:

return valid_lines

@property
def annotated_patch(self) -> str | None:
"""Return the unified diff patch with new-file line numbers prefixed.

Each context line and addition line is prefixed with ``L{n}:`` where *n*
is the corresponding line number in the **new** version of the file.
Deletion lines keep their original ``-`` prefix without a line number
(they don't exist in the new file). Hunk headers are passed through
unchanged.

Example output::

@@ -10,5 +10,6 @@
L10: context line
-removed line
L11: +added line
L12: +another added line
L13: context line

Returns:
Annotated patch string, or ``None`` if the file has no patch.
"""
if not self.patch:
return None

annotated_lines: list[str] = []
current_new_line = 0

for line in self.patch.split("\n"):
# Hunk header — pass through as-is
if line.startswith("@@"):
match = re.search(r"\+(\d+)", line)
if match:
current_new_line = int(match.group(1))
annotated_lines.append(line)
continue

if not current_new_line:
annotated_lines.append(line)
continue

# Context line (unchanged) — prefix with line number
if line.startswith(" "):
annotated_lines.append(f"L{current_new_line}:{line}")
current_new_line += 1
# Addition line — prefix with line number
elif line.startswith("+"):
annotated_lines.append(f"L{current_new_line}:{line}")
current_new_line += 1
# Deletion line — no line number (not in new file), indent to align
elif line.startswith("-"):
annotated_lines.append(f" {line}")
# Other lines (e.g. "\ No newline at end of file")
else:
annotated_lines.append(line)

return "\n".join(annotated_lines)

def is_line_in_diff(self, line_number: int) -> bool:
"""Check if a line number is valid for inline comments.

Expand Down