Move diff-range computation into utils for reuse

This commit is contained in:
Kasper Svendsen
2025-10-27 09:43:11 +01:00
parent 4e0b2cd814
commit 91ec0ed58f
4 changed files with 302 additions and 306 deletions

View File

@@ -7,13 +7,13 @@ import * as sinon from "sinon";
import * as actionsUtil from "./actions-util";
import { CodeQuality, CodeScanning } from "./analyses";
import {
exportedForTesting,
runQueries,
defaultSuites,
resolveQuerySuiteAlias,
addSarifExtension,
} from "./analyze";
import { createStubCodeQL } from "./codeql";
import { exportedForTesting } from "./diff-informed-analysis-utils";
import { Feature } from "./feature-flags";
import { KnownLanguage } from "./languages";
import { getRunnerLogger } from "./logging";

View File

@@ -6,13 +6,8 @@ import * as io from "@actions/io";
import * as del from "del";
import * as yaml from "js-yaml";
import {
getRequiredInput,
getTemporaryDirectory,
PullRequestBranches,
} from "./actions-util";
import { getTemporaryDirectory, PullRequestBranches } from "./actions-util";
import * as analyses from "./analyses";
import { getApiClient } from "./api-client";
import { setupCppAutobuild } from "./autobuild";
import { type CodeQL } from "./codeql";
import * as configUtils from "./config-utils";
@@ -21,13 +16,13 @@ import { addDiagnostic, makeDiagnostic } from "./diagnostics";
import {
DiffThunkRange,
writeDiffRangesJsonFile,
getPullRequestEditedDiffRanges,
} from "./diff-informed-analysis-utils";
import { EnvVar } from "./environment";
import { FeatureEnablement, Feature } from "./feature-flags";
import { KnownLanguage, Language } from "./languages";
import { Logger, withGroupAsync } from "./logging";
import { OverlayDatabaseMode } from "./overlay-database-utils";
import { getRepositoryNwoFromEnv } from "./repository";
import { DatabaseCreationTimings, EventReport } from "./status-report";
import { endTracingForCluster } from "./tracer-config";
import * as util from "./util";
@@ -313,185 +308,6 @@ export async function setupDiffInformedQueryRun(
);
}
/**
* Return the file line ranges that were added or modified in the pull request.
*
* @param branches The base and head branches of the pull request.
* @param logger
* @returns An array of tuples, where each tuple contains the absolute path of a
* file, the start line and the end line (both 1-based and inclusive) of an
* added or modified range in that file. Returns `undefined` if the action was
* not triggered by a pull request or if there was an error.
*/
async function getPullRequestEditedDiffRanges(
branches: PullRequestBranches,
logger: Logger,
): Promise<DiffThunkRange[] | undefined> {
const fileDiffs = await getFileDiffsWithBasehead(branches, logger);
if (fileDiffs === undefined) {
return undefined;
}
if (fileDiffs.length >= 300) {
// The "compare two commits" API returns a maximum of 300 changed files. If
// we see that many changed files, it is possible that there could be more,
// with the rest being truncated. In this case, we should not attempt to
// compute the diff ranges, as the result would be incomplete.
logger.warning(
`Cannot retrieve the full diff because there are too many ` +
`(${fileDiffs.length}) changed files in the pull request.`,
);
return undefined;
}
const results: DiffThunkRange[] = [];
for (const filediff of fileDiffs) {
const diffRanges = getDiffRanges(filediff, logger);
if (diffRanges === undefined) {
return undefined;
}
results.push(...diffRanges);
}
return results;
}
/**
* This interface is an abbreviated version of the file diff object returned by
* the GitHub API.
*/
interface FileDiff {
filename: string;
changes: number;
// A patch may be absent if the file is binary, if the file diff is too large,
// or if the file is unchanged.
patch?: string | undefined;
}
async function getFileDiffsWithBasehead(
branches: PullRequestBranches,
logger: Logger,
): Promise<FileDiff[] | undefined> {
// Check CODE_SCANNING_REPOSITORY first. If it is empty or not set, fall back
// to GITHUB_REPOSITORY.
const repositoryNwo = getRepositoryNwoFromEnv(
"CODE_SCANNING_REPOSITORY",
"GITHUB_REPOSITORY",
);
const basehead = `${branches.base}...${branches.head}`;
try {
const response = await getApiClient().rest.repos.compareCommitsWithBasehead(
{
owner: repositoryNwo.owner,
repo: repositoryNwo.repo,
basehead,
per_page: 1,
},
);
logger.debug(
`Response from compareCommitsWithBasehead(${basehead}):` +
`\n${JSON.stringify(response, null, 2)}`,
);
return response.data.files;
} catch (error: any) {
if (error.status) {
logger.warning(`Error retrieving diff ${basehead}: ${error.message}`);
logger.debug(
`Error running compareCommitsWithBasehead(${basehead}):` +
`\nRequest: ${JSON.stringify(error.request, null, 2)}` +
`\nError Response: ${JSON.stringify(error.response, null, 2)}`,
);
return undefined;
} else {
throw error;
}
}
}
function getDiffRanges(
fileDiff: FileDiff,
logger: Logger,
): DiffThunkRange[] | undefined {
// Diff-informed queries expect the file path to be absolute. CodeQL always
// uses forward slashes as the path separator, so on Windows we need to
// replace any backslashes with forward slashes.
const filename = path
.join(getRequiredInput("checkout_path"), fileDiff.filename)
.replaceAll(path.sep, "/");
if (fileDiff.patch === undefined) {
if (fileDiff.changes === 0) {
// There are situations where a changed file legitimately has no diff.
// For example, the file may be a binary file, or that the file may have
// been renamed with no changes to its contents. In these cases, the
// file would be reported as having 0 changes, and we can return an empty
// array to indicate no diff range in this file.
return [];
}
// If a file is reported to have nonzero changes but no patch, that may be
// due to the file diff being too large. In this case, we should fall back
// to a special diff range that covers the entire file.
return [
{
path: filename,
startLine: 0,
endLine: 0,
},
];
}
// The 1-based file line number of the current line
let currentLine = 0;
// The 1-based file line number that starts the current range of added lines
let additionRangeStartLine: number | undefined = undefined;
const diffRanges: DiffThunkRange[] = [];
const diffLines = fileDiff.patch.split("\n");
// Adding a fake context line at the end ensures that the following loop will
// always terminate the last range of added lines.
diffLines.push(" ");
for (const diffLine of diffLines) {
if (diffLine.startsWith("-")) {
// Ignore deletions completely -- we do not even want to consider them when
// calculating consecutive ranges of added lines.
continue;
}
if (diffLine.startsWith("+")) {
if (additionRangeStartLine === undefined) {
additionRangeStartLine = currentLine;
}
currentLine++;
continue;
}
if (additionRangeStartLine !== undefined) {
// Any line that does not start with a "+" or "-" terminates the current
// range of added lines.
diffRanges.push({
path: filename,
startLine: additionRangeStartLine,
endLine: currentLine - 1,
});
additionRangeStartLine = undefined;
}
if (diffLine.startsWith("@@ ")) {
// A new hunk header line resets the current line number.
const match = diffLine.match(/^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
if (match === null) {
logger.warning(
`Cannot parse diff hunk header for ${fileDiff.filename}: ${diffLine}`,
);
return undefined;
}
currentLine = parseInt(match[1], 10);
continue;
}
if (diffLine.startsWith(" ")) {
// An unchanged context line advances the current line number.
currentLine++;
continue;
}
}
return diffRanges;
}
/**
* Create an extension pack in the temporary directory that contains the file
* line ranges that were added or modified in the pull request.
@@ -922,7 +738,3 @@ export async function warnIfGoInstalledAfterInit(
}
}
}
export const exportedForTesting = {
getDiffRanges,
};

View File

@@ -3,12 +3,25 @@ import * as path from "path";
import * as actionsUtil from "./actions-util";
import type { PullRequestBranches } from "./actions-util";
import { getGitHubVersion } from "./api-client";
import { getApiClient, getGitHubVersion } from "./api-client";
import type { CodeQL } from "./codeql";
import { Feature, FeatureEnablement } from "./feature-flags";
import { Logger } from "./logging";
import { getRepositoryNwoFromEnv } from "./repository";
import { GitHubVariant, satisfiesGHESVersion } from "./util";
/**
* This interface is an abbreviated version of the file diff object returned by
* the GitHub API.
*/
interface FileDiff {
filename: string;
changes: number;
// A patch may be absent if the file is binary, if the file diff is too large,
// or if the file is unchanged.
patch?: string | undefined;
}
/**
* Check if the action should perform diff-informed analysis.
*/
@@ -93,3 +106,174 @@ export function readDiffRangesJsonFile(
);
return JSON.parse(jsonContents) as DiffThunkRange[];
}
/**
* Return the file line ranges that were added or modified in the pull request.
*
* @param branches The base and head branches of the pull request.
* @param logger
* @returns An array of tuples, where each tuple contains the absolute path of a
* file, the start line and the end line (both 1-based and inclusive) of an
* added or modified range in that file. Returns `undefined` if the action was
* not triggered by a pull request or if there was an error.
*/
export async function getPullRequestEditedDiffRanges(
branches: PullRequestBranches,
logger: Logger,
): Promise<DiffThunkRange[] | undefined> {
const fileDiffs = await getFileDiffsWithBasehead(branches, logger);
if (fileDiffs === undefined) {
return undefined;
}
if (fileDiffs.length >= 300) {
// The "compare two commits" API returns a maximum of 300 changed files. If
// we see that many changed files, it is possible that there could be more,
// with the rest being truncated. In this case, we should not attempt to
// compute the diff ranges, as the result would be incomplete.
logger.warning(
`Cannot retrieve the full diff because there are too many ` +
`(${fileDiffs.length}) changed files in the pull request.`,
);
return undefined;
}
const results: DiffThunkRange[] = [];
for (const filediff of fileDiffs) {
const diffRanges = getDiffRanges(filediff, logger);
if (diffRanges === undefined) {
return undefined;
}
results.push(...diffRanges);
}
return results;
}
async function getFileDiffsWithBasehead(
branches: PullRequestBranches,
logger: Logger,
): Promise<FileDiff[] | undefined> {
// Check CODE_SCANNING_REPOSITORY first. If it is empty or not set, fall back
// to GITHUB_REPOSITORY.
const repositoryNwo = getRepositoryNwoFromEnv(
"CODE_SCANNING_REPOSITORY",
"GITHUB_REPOSITORY",
);
const basehead = `${branches.base}...${branches.head}`;
try {
const response = await getApiClient().rest.repos.compareCommitsWithBasehead(
{
owner: repositoryNwo.owner,
repo: repositoryNwo.repo,
basehead,
per_page: 1,
},
);
logger.debug(
`Response from compareCommitsWithBasehead(${basehead}):` +
`\n${JSON.stringify(response, null, 2)}`,
);
return response.data.files;
} catch (error: any) {
if (error.status) {
logger.warning(`Error retrieving diff ${basehead}: ${error.message}`);
logger.debug(
`Error running compareCommitsWithBasehead(${basehead}):` +
`\nRequest: ${JSON.stringify(error.request, null, 2)}` +
`\nError Response: ${JSON.stringify(error.response, null, 2)}`,
);
return undefined;
} else {
throw error;
}
}
}
function getDiffRanges(
fileDiff: FileDiff,
logger: Logger,
): DiffThunkRange[] | undefined {
// Diff-informed queries expect the file path to be absolute. CodeQL always
// uses forward slashes as the path separator, so on Windows we need to
// replace any backslashes with forward slashes.
const filename = path
.join(actionsUtil.getRequiredInput("checkout_path"), fileDiff.filename)
.replaceAll(path.sep, "/");
if (fileDiff.patch === undefined) {
if (fileDiff.changes === 0) {
// There are situations where a changed file legitimately has no diff.
// For example, the file may be a binary file, or that the file may have
// been renamed with no changes to its contents. In these cases, the
// file would be reported as having 0 changes, and we can return an empty
// array to indicate no diff range in this file.
return [];
}
// If a file is reported to have nonzero changes but no patch, that may be
// due to the file diff being too large. In this case, we should fall back
// to a special diff range that covers the entire file.
return [
{
path: filename,
startLine: 0,
endLine: 0,
},
];
}
// The 1-based file line number of the current line
let currentLine = 0;
// The 1-based file line number that starts the current range of added lines
let additionRangeStartLine: number | undefined = undefined;
const diffRanges: DiffThunkRange[] = [];
const diffLines = fileDiff.patch.split("\n");
// Adding a fake context line at the end ensures that the following loop will
// always terminate the last range of added lines.
diffLines.push(" ");
for (const diffLine of diffLines) {
if (diffLine.startsWith("-")) {
// Ignore deletions completely -- we do not even want to consider them when
// calculating consecutive ranges of added lines.
continue;
}
if (diffLine.startsWith("+")) {
if (additionRangeStartLine === undefined) {
additionRangeStartLine = currentLine;
}
currentLine++;
continue;
}
if (additionRangeStartLine !== undefined) {
// Any line that does not start with a "+" or "-" terminates the current
// range of added lines.
diffRanges.push({
path: filename,
startLine: additionRangeStartLine,
endLine: currentLine - 1,
});
additionRangeStartLine = undefined;
}
if (diffLine.startsWith("@@ ")) {
// A new hunk header line resets the current line number.
const match = diffLine.match(/^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
if (match === null) {
logger.warning(
`Cannot parse diff hunk header for ${fileDiff.filename}: ${diffLine}`,
);
return undefined;
}
currentLine = parseInt(match[1], 10);
continue;
}
if (diffLine.startsWith(" ")) {
// An unchanged context line advances the current line number.
currentLine++;
continue;
}
}
return diffRanges;
}
export const exportedForTesting = {
getDiffRanges,
};