Overlay databases: use --overlay-changes

This commit changes overlay database creation to use the
--overlay-changes flag. It also implements Git-based file change
detection to generate the list of files to extract for the overlay
database.
This commit is contained in:
Chuan-kai Lin
2025-03-19 11:38:45 -07:00
parent c50c157cc3
commit 6be6984cc1
9 changed files with 333 additions and 11 deletions

6
lib/codeql.js generated
View File

@@ -292,7 +292,8 @@ async function getCodeQLForCmd(cmd, checkVersion) {
? "--force-overwrite"
: "--overwrite";
if (overlayDatabaseMode === overlay_database_utils_1.OverlayDatabaseMode.Overlay) {
extraArgs.push("--overlay");
const overlayChangesFile = await (0, overlay_database_utils_1.writeOverlayChangesFile)(config, sourceRoot, logger);
extraArgs.push(`--overlay-changes=${overlayChangesFile}`);
}
else if (overlayDatabaseMode === overlay_database_utils_1.OverlayDatabaseMode.OverlayBase) {
extraArgs.push("--overlay-base");
@@ -314,6 +315,9 @@ async function getCodeQLForCmd(cmd, checkVersion) {
ignoringOptions: ["--overwrite"],
}),
], { stdin: externalRepositoryToken });
if (overlayDatabaseMode === overlay_database_utils_1.OverlayDatabaseMode.OverlayBase) {
await (0, overlay_database_utils_1.writeBaseDatabaseOidsFile)(config, sourceRoot);
}
},
async runAutobuild(config, language) {
applyAutobuildAzurePipelinesTimeoutFix();

File diff suppressed because one or more lines are too long

38
lib/git-utils.js generated
View File

@@ -33,7 +33,7 @@ var __importStar = (this && this.__importStar) || (function () {
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.getGitRoot = exports.decodeGitFilePath = exports.gitRepack = exports.gitFetch = exports.deepenGitHistory = exports.determineBaseBranchHeadCommitOid = exports.getCommitOid = void 0;
exports.getFileOidsUnderPath = exports.getGitRoot = exports.decodeGitFilePath = exports.gitRepack = exports.gitFetch = exports.deepenGitHistory = exports.determineBaseBranchHeadCommitOid = exports.getCommitOid = void 0;
exports.getRef = getRef;
exports.isAnalyzingDefaultBranch = isAnalyzingDefaultBranch;
const core = __importStar(require("@actions/core"));
@@ -41,7 +41,7 @@ const toolrunner = __importStar(require("@actions/exec/lib/toolrunner"));
const io = __importStar(require("@actions/io"));
const actions_util_1 = require("./actions-util");
const util_1 = require("./util");
async function runGitCommand(checkoutPath, args, customErrorMessage) {
async function runGitCommand(workingDirectory, args, customErrorMessage) {
let stdout = "";
let stderr = "";
core.debug(`Running git command: git ${args.join(" ")}`);
@@ -56,7 +56,7 @@ async function runGitCommand(checkoutPath, args, customErrorMessage) {
stderr += data.toString();
},
},
cwd: checkoutPath,
cwd: workingDirectory,
}).exec();
return stdout;
}
@@ -247,6 +247,38 @@ const getGitRoot = async function (sourceRoot) {
}
};
exports.getGitRoot = getGitRoot;
/**
* Returns the Git OIDs of all tracked files (in the index and in the working
* tree) that are under the given base path, including files in active
* submodules. Untracked files and files not under the given base path are
* ignored.
*
* @param basePath A path into the Git repository.
* @returns a map from file paths (relative to `basePath`) to Git OIDs.
* @throws {Error} if "git ls-tree" produces unexpected output.
*/
const getFileOidsUnderPath = async function (basePath) {
// Without the --full-name flag, the path is relative to the current working
// directory of the git command, which is basePath.
const stdout = await runGitCommand(basePath, ["ls-files", "--recurse-submodules", "--format=%(objectname)_%(path)"], "Cannot list Git OIDs of tracked files.");
const fileOidMap = {};
const regex = /^([0-9a-f]{40})_(.+)$/;
for (const line of stdout.split("\n")) {
if (line) {
const match = line.match(regex);
if (match) {
const oid = match[1];
const path = (0, exports.decodeGitFilePath)(match[2]);
fileOidMap[path] = oid;
}
else {
throw new Error(`Unexpected "git ls-files" output: ${line}`);
}
}
}
return fileOidMap;
};
exports.getFileOidsUnderPath = getFileOidsUnderPath;
function getRefFromEnv() {
// To workaround a limitation of Actions dynamic workflows not setting
// the GITHUB_REF in some cases, we accept also the ref within the

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,45 @@
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.CODEQL_OVERLAY_MINIMUM_VERSION = exports.OverlayDatabaseMode = void 0;
exports.writeBaseDatabaseOidsFile = writeBaseDatabaseOidsFile;
exports.writeOverlayChangesFile = writeOverlayChangesFile;
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const actions_util_1 = require("./actions-util");
const git_utils_1 = require("./git-utils");
var OverlayDatabaseMode;
(function (OverlayDatabaseMode) {
OverlayDatabaseMode["Overlay"] = "overlay";
@@ -8,4 +47,83 @@ var OverlayDatabaseMode;
OverlayDatabaseMode["None"] = "none";
})(OverlayDatabaseMode || (exports.OverlayDatabaseMode = OverlayDatabaseMode = {}));
exports.CODEQL_OVERLAY_MINIMUM_VERSION = "2.20.5";
/**
* Writes a JSON file containing Git OIDs for all tracked files (represented
* by path relative to the source root) under the source root. The file is
* written into the database location specified in the config.
*
* @param config The configuration object containing the database location
* @param sourceRoot The root directory containing the source files to process
* @throws {Error} If the Git repository root cannot be determined
*/
async function writeBaseDatabaseOidsFile(config, sourceRoot) {
const gitFileOids = await (0, git_utils_1.getFileOidsUnderPath)(sourceRoot);
const gitFileOidsJson = JSON.stringify(gitFileOids);
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
await fs.promises.writeFile(baseDatabaseOidsFilePath, gitFileOidsJson);
}
/**
* Reads and parses the JSON file containing the base database Git OIDs.
* This file contains the mapping of file paths to their corresponding Git OIDs
* that was previously written by writeBaseDatabaseOidsFile().
*
* @param config The configuration object containing the database location
* @param logger The logger instance to use for error reporting
* @returns An object mapping file paths (relative to source root) to their Git OIDs
* @throws {Error} If the file cannot be read or parsed
*/
async function readBaseDatabaseOidsFile(config, logger) {
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
try {
const contents = await fs.promises.readFile(baseDatabaseOidsFilePath, "utf-8");
return JSON.parse(contents);
}
catch (e) {
logger.error("Failed to read overlay-base file OIDs from " +
`${baseDatabaseOidsFilePath}: ${e.message || e}`);
throw e;
}
}
function getBaseDatabaseOidsFilePath(config) {
return path.join(config.dbLocation, "base-database-oids.json");
}
/**
* Writes a JSON file containing the source-root-relative paths of files under
* `sourceRoot` that have changed (added, removed, or modified) from the overlay
* base database.
*
* This function uses the Git index to determine which files have changed, so it
* requires the following preconditions, both when this function is called and
* when the overlay-base database was initialized:
*
* - It requires that `sourceRoot` is inside a Git repository.
* - It assumes that all changes in the working tree are staged in the index.
* - It assumes that all files of interest are tracked by Git, e.g. not covered
* by `.gitignore`.
*/
async function writeOverlayChangesFile(config, sourceRoot, logger) {
const baseFileOids = await readBaseDatabaseOidsFile(config, logger);
const overlayFileOids = await (0, git_utils_1.getFileOidsUnderPath)(sourceRoot);
const changedFiles = computeChangedFiles(baseFileOids, overlayFileOids);
logger.info(`Found ${changedFiles.length} changed file(s) under ${sourceRoot}.`);
const changedFilesJson = JSON.stringify({ changes: changedFiles });
const overlayChangesFile = path.join((0, actions_util_1.getTemporaryDirectory)(), "overlay-changes.json");
logger.debug(`Writing overlay changed files to ${overlayChangesFile}: ${changedFilesJson}`);
await fs.promises.writeFile(overlayChangesFile, changedFilesJson);
return overlayChangesFile;
}
function computeChangedFiles(baseFileOids, overlayFileOids) {
const changes = [];
for (const [file, oid] of Object.entries(overlayFileOids)) {
if (!(file in baseFileOids) || baseFileOids[file] !== oid) {
changes.push(file);
}
}
for (const file of Object.keys(baseFileOids)) {
if (!(file in overlayFileOids)) {
changes.push(file);
}
}
return changes;
}
//# sourceMappingURL=overlay-database-utils.js.map

View File

@@ -1 +1 @@
{"version":3,"file":"overlay-database-utils.js","sourceRoot":"","sources":["../src/overlay-database-utils.ts"],"names":[],"mappings":";;;AAAA,IAAY,mBAIX;AAJD,WAAY,mBAAmB;IAC7B,0CAAmB,CAAA;IACnB,mDAA4B,CAAA;IAC5B,oCAAa,CAAA;AACf,CAAC,EAJW,mBAAmB,mCAAnB,mBAAmB,QAI9B;AAEY,QAAA,8BAA8B,GAAG,QAAQ,CAAC"}
{"version":3,"file":"overlay-database-utils.js","sourceRoot":"","sources":["../src/overlay-database-utils.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAyBA,8DAQC;AAkDD,0DAsBC;AAzGD,uCAAyB;AACzB,2CAA6B;AAE7B,iDAAuD;AAEvD,2CAAmD;AAGnD,IAAY,mBAIX;AAJD,WAAY,mBAAmB;IAC7B,0CAAmB,CAAA;IACnB,mDAA4B,CAAA;IAC5B,oCAAa,CAAA;AACf,CAAC,EAJW,mBAAmB,mCAAnB,mBAAmB,QAI9B;AAEY,QAAA,8BAA8B,GAAG,QAAQ,CAAC;AAEvD;;;;;;;;GAQG;AACI,KAAK,UAAU,yBAAyB,CAC7C,MAAc,EACd,UAAkB;IAElB,MAAM,WAAW,GAAG,MAAM,IAAA,gCAAoB,EAAC,UAAU,CAAC,CAAC;IAC3D,MAAM,eAAe,GAAG,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IACpD,MAAM,wBAAwB,GAAG,2BAA2B,CAAC,MAAM,CAAC,CAAC;IACrE,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,wBAAwB,EAAE,eAAe,CAAC,CAAC;AACzE,CAAC;AAED;;;;;;;;;GASG;AACH,KAAK,UAAU,wBAAwB,CACrC,MAAc,EACd,MAAc;IAEd,MAAM,wBAAwB,GAAG,2BAA2B,CAAC,MAAM,CAAC,CAAC;IACrE,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CACzC,wBAAwB,EACxB,OAAO,CACR,CAAC;QACF,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,CAA8B,CAAC;IAC3D,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,CAAC,KAAK,CACV,6CAA6C;YAC3C,GAAG,wBAAwB,KAAM,CAAS,CAAC,OAAO,IAAI,CAAC,EAAE,CAC5D,CAAC;QACF,MAAM,CAAC,CAAC;IACV,CAAC;AACH,CAAC;AAED,SAAS,2BAA2B,CAAC,MAAc;IACjD,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,yBAAyB,CAAC,CAAC;AACjE,CAAC;AAED;;;;;;;;;;;;;GAaG;AACI,KAAK,UAAU,uBAAuB,CAC3C,MAAc,EACd,UAAkB,EAClB,MAAc;IAEd,MAAM,YAAY,GAAG,MAAM,wBAAwB,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACpE,MAAM,eAAe,GAAG,MAAM,IAAA,gCAAoB,EAAC,UAAU,CAAC,CAAC;IAC/D,MAAM,YAAY,GAAG,mBAAmB,CAAC,YAAY,EAAE,eAAe,CAAC,CAAC;IACxE,MAAM,CAAC,IAAI,CACT,SAAS,YAAY,CAAC,MAAM,0BAA0B,UAAU,GAAG,CACpE,CAAC;IAEF,MAAM,gBAAgB,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC;IACnE,MAAM,kBAAkB,GAAG,IAAI,CAAC,IAAI,CAClC,IAAA,oCAAqB,GAAE,EACvB,sBAAsB,CACvB,CAAC;IACF,MAAM,CAAC,KAAK,CACV,oCAAoC,kBAAkB,KAAK,gBAAgB,EAAE,CAC9E,CAAC;IACF,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,kBAAkB,EAAE,gBAAgB,CAAC,CAAC;IAClE,OAAO,kBAAkB,CAAC;AAC5B,CAAC;AAED,SAAS,mBAAmB,CAC1B,YAAuC,EACvC,eAA0C;IAE1C,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,EAAE,CAAC;QAC1D,IAAI,CAAC,CAAC,IAAI,IAAI,YAAY,CAAC,IAAI,YAAY,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC;YAC1D,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IACD,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,EAAE,CAAC;QAC7C,IAAI,CAAC,CAAC,IAAI,IAAI,eAAe,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC"}

View File

@@ -24,7 +24,11 @@ import {
import { isAnalyzingDefaultBranch } from "./git-utils";
import { Language } from "./languages";
import { Logger } from "./logging";
import { OverlayDatabaseMode } from "./overlay-database-utils";
import {
OverlayDatabaseMode,
writeBaseDatabaseOidsFile,
writeOverlayChangesFile,
} from "./overlay-database-utils";
import * as setupCodeql from "./setup-codeql";
import { ZstdAvailability } from "./tar";
import { ToolsDownloadStatusReport } from "./tools-download";
@@ -610,7 +614,12 @@ export async function getCodeQLForCmd(
: "--overwrite";
if (overlayDatabaseMode === OverlayDatabaseMode.Overlay) {
extraArgs.push("--overlay");
const overlayChangesFile = await writeOverlayChangesFile(
config,
sourceRoot,
logger,
);
extraArgs.push(`--overlay-changes=${overlayChangesFile}`);
} else if (overlayDatabaseMode === OverlayDatabaseMode.OverlayBase) {
extraArgs.push("--overlay-base");
}
@@ -636,6 +645,10 @@ export async function getCodeQLForCmd(
],
{ stdin: externalRepositoryToken },
);
if (overlayDatabaseMode === OverlayDatabaseMode.OverlayBase) {
await writeBaseDatabaseOidsFile(config, sourceRoot);
}
},
async runAutobuild(config: Config, language: Language) {
applyAutobuildAzurePipelinesTimeoutFix();

View File

@@ -10,7 +10,7 @@ import {
import { ConfigurationError, getRequiredEnvParam } from "./util";
async function runGitCommand(
checkoutPath: string | undefined,
workingDirectory: string | undefined,
args: string[],
customErrorMessage: string,
): Promise<string> {
@@ -28,7 +28,7 @@ async function runGitCommand(
stderr += data.toString();
},
},
cwd: checkoutPath,
cwd: workingDirectory,
}).exec();
return stdout;
} catch (error) {
@@ -253,6 +253,44 @@ export const getGitRoot = async function (
}
};
/**
* Returns the Git OIDs of all tracked files (in the index and in the working
* tree) that are under the given base path, including files in active
* submodules. Untracked files and files not under the given base path are
* ignored.
*
* @param basePath A path into the Git repository.
* @returns a map from file paths (relative to `basePath`) to Git OIDs.
* @throws {Error} if "git ls-tree" produces unexpected output.
*/
export const getFileOidsUnderPath = async function (
basePath: string,
): Promise<{ [key: string]: string }> {
// Without the --full-name flag, the path is relative to the current working
// directory of the git command, which is basePath.
const stdout = await runGitCommand(
basePath,
["ls-files", "--recurse-submodules", "--format=%(objectname)_%(path)"],
"Cannot list Git OIDs of tracked files.",
);
const fileOidMap: { [key: string]: string } = {};
const regex = /^([0-9a-f]{40})_(.+)$/;
for (const line of stdout.split("\n")) {
if (line) {
const match = line.match(regex);
if (match) {
const oid = match[1];
const path = decodeGitFilePath(match[2]);
fileOidMap[path] = oid;
} else {
throw new Error(`Unexpected "git ls-files" output: ${line}`);
}
}
}
return fileOidMap;
};
function getRefFromEnv(): string {
// To workaround a limitation of Actions dynamic workflows not setting
// the GITHUB_REF in some cases, we accept also the ref within the

View File

@@ -1,3 +1,11 @@
import * as fs from "fs";
import * as path from "path";
import { getTemporaryDirectory } from "./actions-util";
import { type Config } from "./config-utils";
import { getFileOidsUnderPath } from "./git-utils";
import { Logger } from "./logging";
export enum OverlayDatabaseMode {
Overlay = "overlay",
OverlayBase = "overlay-base",
@@ -5,3 +13,112 @@ export enum OverlayDatabaseMode {
}
export const CODEQL_OVERLAY_MINIMUM_VERSION = "2.20.5";
/**
* Writes a JSON file containing Git OIDs for all tracked files (represented
* by path relative to the source root) under the source root. The file is
* written into the database location specified in the config.
*
* @param config The configuration object containing the database location
* @param sourceRoot The root directory containing the source files to process
* @throws {Error} If the Git repository root cannot be determined
*/
export async function writeBaseDatabaseOidsFile(
config: Config,
sourceRoot: string,
): Promise<void> {
const gitFileOids = await getFileOidsUnderPath(sourceRoot);
const gitFileOidsJson = JSON.stringify(gitFileOids);
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
await fs.promises.writeFile(baseDatabaseOidsFilePath, gitFileOidsJson);
}
/**
* Reads and parses the JSON file containing the base database Git OIDs.
* This file contains the mapping of file paths to their corresponding Git OIDs
* that was previously written by writeBaseDatabaseOidsFile().
*
* @param config The configuration object containing the database location
* @param logger The logger instance to use for error reporting
* @returns An object mapping file paths (relative to source root) to their Git OIDs
* @throws {Error} If the file cannot be read or parsed
*/
async function readBaseDatabaseOidsFile(
config: Config,
logger: Logger,
): Promise<{ [key: string]: string }> {
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
try {
const contents = await fs.promises.readFile(
baseDatabaseOidsFilePath,
"utf-8",
);
return JSON.parse(contents) as { [key: string]: string };
} catch (e) {
logger.error(
"Failed to read overlay-base file OIDs from " +
`${baseDatabaseOidsFilePath}: ${(e as any).message || e}`,
);
throw e;
}
}
function getBaseDatabaseOidsFilePath(config: Config): string {
return path.join(config.dbLocation, "base-database-oids.json");
}
/**
* Writes a JSON file containing the source-root-relative paths of files under
* `sourceRoot` that have changed (added, removed, or modified) from the overlay
* base database.
*
* This function uses the Git index to determine which files have changed, so it
* requires the following preconditions, both when this function is called and
* when the overlay-base database was initialized:
*
* - It requires that `sourceRoot` is inside a Git repository.
* - It assumes that all changes in the working tree are staged in the index.
* - It assumes that all files of interest are tracked by Git, e.g. not covered
* by `.gitignore`.
*/
export async function writeOverlayChangesFile(
config: Config,
sourceRoot: string,
logger: Logger,
): Promise<string> {
const baseFileOids = await readBaseDatabaseOidsFile(config, logger);
const overlayFileOids = await getFileOidsUnderPath(sourceRoot);
const changedFiles = computeChangedFiles(baseFileOids, overlayFileOids);
logger.info(
`Found ${changedFiles.length} changed file(s) under ${sourceRoot}.`,
);
const changedFilesJson = JSON.stringify({ changes: changedFiles });
const overlayChangesFile = path.join(
getTemporaryDirectory(),
"overlay-changes.json",
);
logger.debug(
`Writing overlay changed files to ${overlayChangesFile}: ${changedFilesJson}`,
);
await fs.promises.writeFile(overlayChangesFile, changedFilesJson);
return overlayChangesFile;
}
function computeChangedFiles(
baseFileOids: { [key: string]: string },
overlayFileOids: { [key: string]: string },
): string[] {
const changes: string[] = [];
for (const [file, oid] of Object.entries(overlayFileOids)) {
if (!(file in baseFileOids) || baseFileOids[file] !== oid) {
changes.push(file);
}
}
for (const file of Object.keys(baseFileOids)) {
if (!(file in overlayFileOids)) {
changes.push(file);
}
}
return changes;
}