Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 89 additions & 16 deletions tools/Code Review/scripts/Invoke-CopilotPRReview.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,44 @@ function Test-OrderedSubsequence {
return $true
}

# Character-level longest-common-subsequence length between two strings.
# Used to score how close a suggested (edited) line is to a candidate file
# line. O(n*m) with a rolling two-row buffer; lines are short so this is cheap.
function Get-LcsLength {
param([string] $A, [string] $B)

if ([string]::IsNullOrEmpty($A) -or [string]::IsNullOrEmpty($B)) { return 0 }
$n = $A.Length; $m = $B.Length
$prev = New-Object 'int[]' ($m + 1)
$curr = New-Object 'int[]' ($m + 1)
for ($i = 1; $i -le $n; $i++) {
$ai = $A[$i - 1]
for ($j = 1; $j -le $m; $j++) {
if ($ai -eq $B[$j - 1]) {
$curr[$j] = $prev[$j - 1] + 1
} else {
$curr[$j] = [math]::Max($prev[$j], $curr[$j - 1])
}
}
$tmp = $prev; $prev = $curr; $curr = $tmp
[Array]::Clear($curr, 0, $curr.Length)
}
return $prev[$m]
}

# Similarity in [0,1] between two already-loosened lines (whitespace stripped),
# based on character LCS over max length. 1.0 == identical; pure insertions
# (e.g. adding 'this.') stay high because the shorter line is almost entirely a
# subsequence of the longer one, while unrelated lines score low.
function Get-LooseLineSimilarity {
param([string] $A, [string] $B)

if ($A -eq $B) { return 1.0 }
$maxLen = [math]::Max($A.Length, $B.Length)
if ($maxLen -eq 0) { return 1.0 }
return ((Get-LcsLength -A $A -B $B) / $maxLen)
}

# Resolve the RIGHT-side file span a suggestion should replace.
# Returns @{ startLine; endLine } (1-based, inclusive) or $null when the
# suggestion cannot be placed with confidence (caller drops the block).
Expand All @@ -514,25 +552,60 @@ function Resolve-SuggestionPlacement {
$firstLoose = ConvertTo-LooseLine $SuggestedLines[0]
$lastLoose = ConvertTo-LooseLine $SuggestedLines[$sCount - 1]

# --- Single-line suggestion: snap to the nearest unique content match. ---
# --- Single-line suggestion: re-anchor to the file line the edit targets. ---
# A one-line suggestion almost always *edits* a line, so it is NOT equal to
# the line it replaces (e.g. inserting 'this.' for CodeCop AA0248). Exact
# equality therefore fails for the common case and the model's reported
# anchor is unreliable: it frequently points at a neighbouring statement
# inside the same procedure (observed off by 10+ lines), a comment, or a
# blank line. We score the file lines in a window around the anchor by
# similarity to the suggested line and take the clear winner.
#
# The design favours PRECISION over recall: a wrong auto-applicable
# suggestion corrupts the file, whereas declining to re-anchor merely falls
# back to a manual (non-applicable) snippet. We therefore re-anchor only when
# a candidate is *confidently* the edit target -- it must clear a high
# similarity floor AND beat the runner-up by the ambiguity margin. This
# cleanly separates genuine targets (an edited statement or a renamed
# declaration score ~0.75-0.99) from coincidental look-alikes elsewhere in
# the window: e.g. a label-rename whose added Comment text echoes the field
# captions at the Error()/Confirm() call site tops out around ~0.55, so it
# stays below the floor and is suppressed rather than re-anchored onto the
# call site. When no candidate is confident and unambiguous we return $null
# so the caller posts a manual snippet instead of a wrong anchor.
if ($sCount -eq 1) {
if ((ConvertTo-LooseLine $FileLines[$AnchorLine - 1]) -eq $firstLoose) {
return [pscustomobject]@{ startLine = $AnchorLine; endLine = $AnchorLine }
}
for ($d = 1; $d -le 8; $d++) {
$hits = @()
foreach ($cand in @(($AnchorLine - $d), ($AnchorLine + $d))) {
if ($cand -ge 1 -and $cand -le $fileCount -and
(ConvertTo-LooseLine $FileLines[$cand - 1]) -eq $firstLoose) {
$hits += $cand
}
$minSimilarity = 0.6 # absolute confidence floor for a re-anchor
$ambiguityMargin = 0.1 # winner must beat the runner-up by this much
$window = 20 # lines either side of the anchor to consider

$lo = [math]::Max(1, $AnchorLine - $window)
$hi = [math]::Min($fileCount, $AnchorLine + $window)

$cands = for ($i = $lo; $i -le $hi; $i++) {
[pscustomobject]@{
line = $i
score = Get-LooseLineSimilarity (ConvertTo-LooseLine $FileLines[$i - 1]) $firstLoose
dist = [math]::Abs($i - $AnchorLine)
}
if ($hits.Count -eq 1) { return [pscustomobject]@{ startLine = $hits[0]; endLine = $hits[0] } }
if ($hits.Count -gt 1) { break } # ambiguous at this distance
}
# No content match found: a one-line replacement of the model's anchor
# is still safe (it cannot duplicate context), so trust the anchor.
return [pscustomobject]@{ startLine = $AnchorLine; endLine = $AnchorLine }
# Best score first; ties broken by proximity to the model anchor.
$ranked = @($cands | Sort-Object @{ Expression = 'score'; Descending = $true }, @{ Expression = 'dist'; Descending = $false })
$top = $ranked[0]

# An exact loose match is always safe to apply (the replaced text is
# identical), so accept it regardless of the ambiguity margin.
if ($top.score -ge 1.0) {
return [pscustomobject]@{ startLine = $top.line; endLine = $top.line }
}

$runnerScore = if ($ranked.Count -gt 1) { $ranked[1].score } else { -1.0 }
if ($top.score -ge $minSimilarity -and ($top.score - $runnerScore) -ge $ambiguityMargin) {
return [pscustomobject]@{ startLine = $top.line; endLine = $top.line }
}

# No confident, unambiguous target — let the caller fall back to a
# manual snippet instead of posting an auto-applicable wrong anchor.
return $null
}

# --- Multi-line suggestion: find an additive span [s,e] near the anchor. ---
Expand Down
Loading