Files
codeql-action/src/overlay-database-utils.ts
Chuan-kai Lin fc5847810e Overlay: clarify componentsJson computation
This commit updates componentsJson computation to call JSON.stringify()
without the replacer array and documents why the result is stable.
2025-09-05 11:36:58 -07:00

461 lines
16 KiB
TypeScript

import * as crypto from "crypto";
import * as fs from "fs";
import * as path from "path";
import * as actionsCache from "@actions/cache";
import { getRequiredInput, getTemporaryDirectory } from "./actions-util";
import { getAutomationID } from "./api-client";
import { type CodeQL } from "./codeql";
import { type Config } from "./config-utils";
import { getCommitOid, getFileOidsUnderPath } from "./git-utils";
import { Logger, withGroupAsync } from "./logging";
import { isInTestMode, tryGetFolderBytes, withTimeout } from "./util";
export enum OverlayDatabaseMode {
Overlay = "overlay",
OverlayBase = "overlay-base",
None = "none",
}
export const CODEQL_OVERLAY_MINIMUM_VERSION = "2.22.3";
/**
* The maximum (uncompressed) size of the overlay base database that we will
* upload. Actions Cache has an overall capacity of 10 GB, and the Actions Cache
* client library uses zstd compression.
*
* Ideally we would apply a size limit to the compressed overlay-base database,
* but we cannot do so because compression is handled transparently by the
* Actions Cache client library. Instead we place a limit on the uncompressed
* size of the overlay-base database.
*
* Assuming 2.5:1 compression ratio, the 6 GB limit on uncompressed data would
* translate to a limit of around 2.4 GB after compression.
*/
const OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_MB = 6000;
const OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_BYTES =
OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_MB * 1_000_000;
/**
* Writes a JSON file containing Git OIDs for all tracked files (represented
* by path relative to the source root) under the source root. The file is
* written into the database location specified in the config.
*
* @param config The configuration object containing the database location
* @param sourceRoot The root directory containing the source files to process
* @throws {Error} If the Git repository root cannot be determined
*/
export async function writeBaseDatabaseOidsFile(
config: Config,
sourceRoot: string,
): Promise<void> {
const gitFileOids = await getFileOidsUnderPath(sourceRoot);
const gitFileOidsJson = JSON.stringify(gitFileOids);
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
await fs.promises.writeFile(baseDatabaseOidsFilePath, gitFileOidsJson);
}
/**
* Reads and parses the JSON file containing the base database Git OIDs.
* This file contains the mapping of file paths to their corresponding Git OIDs
* that was previously written by writeBaseDatabaseOidsFile().
*
* @param config The configuration object containing the database location
* @param logger The logger instance to use for error reporting
* @returns An object mapping file paths (relative to source root) to their Git OIDs
* @throws {Error} If the file cannot be read or parsed
*/
async function readBaseDatabaseOidsFile(
config: Config,
logger: Logger,
): Promise<{ [key: string]: string }> {
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
try {
const contents = await fs.promises.readFile(
baseDatabaseOidsFilePath,
"utf-8",
);
return JSON.parse(contents) as { [key: string]: string };
} catch (e) {
logger.error(
"Failed to read overlay-base file OIDs from " +
`${baseDatabaseOidsFilePath}: ${(e as any).message || e}`,
);
throw e;
}
}
function getBaseDatabaseOidsFilePath(config: Config): string {
return path.join(config.dbLocation, "base-database-oids.json");
}
/**
* Writes a JSON file containing the source-root-relative paths of files under
* `sourceRoot` that have changed (added, removed, or modified) from the overlay
* base database.
*
* This function uses the Git index to determine which files have changed, so it
* requires the following preconditions, both when this function is called and
* when the overlay-base database was initialized:
*
* - It requires that `sourceRoot` is inside a Git repository.
* - It assumes that all changes in the working tree are staged in the index.
* - It assumes that all files of interest are tracked by Git, e.g. not covered
* by `.gitignore`.
*/
export async function writeOverlayChangesFile(
config: Config,
sourceRoot: string,
logger: Logger,
): Promise<string> {
const baseFileOids = await readBaseDatabaseOidsFile(config, logger);
const overlayFileOids = await getFileOidsUnderPath(sourceRoot);
const changedFiles = computeChangedFiles(baseFileOids, overlayFileOids);
logger.info(
`Found ${changedFiles.length} changed file(s) under ${sourceRoot}.`,
);
const changedFilesJson = JSON.stringify({ changes: changedFiles });
const overlayChangesFile = path.join(
getTemporaryDirectory(),
"overlay-changes.json",
);
logger.debug(
`Writing overlay changed files to ${overlayChangesFile}: ${changedFilesJson}`,
);
await fs.promises.writeFile(overlayChangesFile, changedFilesJson);
return overlayChangesFile;
}
function computeChangedFiles(
baseFileOids: { [key: string]: string },
overlayFileOids: { [key: string]: string },
): string[] {
const changes: string[] = [];
for (const [file, oid] of Object.entries(overlayFileOids)) {
if (!(file in baseFileOids) || baseFileOids[file] !== oid) {
changes.push(file);
}
}
for (const file of Object.keys(baseFileOids)) {
if (!(file in overlayFileOids)) {
changes.push(file);
}
}
return changes;
}
// Constants for database caching
const CACHE_VERSION = 1;
const CACHE_PREFIX = "codeql-overlay-base-database";
const MAX_CACHE_OPERATION_MS = 120_000; // Two minutes
/**
* Checks that the overlay-base database is valid by checking for the
* existence of the base database OIDs file.
*
* @param config The configuration object
* @param logger The logger instance
* @param warningPrefix Prefix for the check failure warning message
* @returns True if the verification succeeded, false otherwise
*/
export function checkOverlayBaseDatabase(
config: Config,
logger: Logger,
warningPrefix: string,
): boolean {
// An overlay-base database should contain the base database OIDs file.
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
if (!fs.existsSync(baseDatabaseOidsFilePath)) {
logger.warning(
`${warningPrefix}: ${baseDatabaseOidsFilePath} does not exist`,
);
return false;
}
return true;
}
/**
* Uploads the overlay-base database to the GitHub Actions cache. If conditions
* for uploading are not met, the function does nothing and returns false.
*
* This function uses the `checkout_path` input to determine the repository path
* and works only when called from `analyze` or `upload-sarif`.
*
* @param codeql The CodeQL instance
* @param config The configuration object
* @param logger The logger instance
* @returns A promise that resolves to true if the upload was performed and
* successfully completed, or false otherwise
*/
export async function uploadOverlayBaseDatabaseToCache(
codeql: CodeQL,
config: Config,
logger: Logger,
): Promise<boolean> {
const overlayDatabaseMode = config.overlayDatabaseMode;
if (overlayDatabaseMode !== OverlayDatabaseMode.OverlayBase) {
logger.debug(
`Overlay database mode is ${overlayDatabaseMode}. ` +
"Skip uploading overlay-base database to cache.",
);
return false;
}
if (!config.useOverlayDatabaseCaching) {
logger.debug(
"Overlay database caching is disabled. " +
"Skip uploading overlay-base database to cache.",
);
return false;
}
if (isInTestMode()) {
logger.debug(
"In test mode. Skip uploading overlay-base database to cache.",
);
return false;
}
const databaseIsValid = checkOverlayBaseDatabase(
config,
logger,
"Abort uploading overlay-base database to cache",
);
if (!databaseIsValid) {
return false;
}
// Clean up the database using the overlay cleanup level.
await withGroupAsync("Cleaning up databases", async () => {
await codeql.databaseCleanupCluster(config, "overlay");
});
const dbLocation = config.dbLocation;
const databaseSizeBytes = await tryGetFolderBytes(dbLocation, logger);
if (databaseSizeBytes === undefined) {
logger.warning(
"Failed to determine database size. " +
"Skip uploading overlay-base database to cache.",
);
return false;
}
if (databaseSizeBytes > OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_BYTES) {
const databaseSizeMB = Math.round(databaseSizeBytes / 1_000_000);
logger.warning(
`Database size (${databaseSizeMB} MB) ` +
`exceeds maximum upload size (${OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_MB} MB). ` +
"Skip uploading overlay-base database to cache.",
);
return false;
}
const codeQlVersion = (await codeql.getVersion()).version;
const checkoutPath = getRequiredInput("checkout_path");
const cacheKey = await generateCacheKey(config, codeQlVersion, checkoutPath);
logger.info(
`Uploading overlay-base database to Actions cache with key ${cacheKey}`,
);
try {
const cacheId = await withTimeout(
MAX_CACHE_OPERATION_MS,
actionsCache.saveCache([dbLocation], cacheKey),
() => {},
);
if (cacheId === undefined) {
logger.warning("Timed out while uploading overlay-base database");
return false;
}
} catch (error) {
logger.warning(
"Failed to upload overlay-base database to cache: " +
`${error instanceof Error ? error.message : String(error)}`,
);
return false;
}
logger.info(`Successfully uploaded overlay-base database from ${dbLocation}`);
return true;
}
export interface OverlayBaseDatabaseDownloadStats {
databaseSizeBytes: number;
databaseDownloadDurationMs: number;
}
/**
* Downloads the overlay-base database from the GitHub Actions cache. If conditions
* for downloading are not met, the function does nothing and returns false.
*
* @param codeql The CodeQL instance
* @param config The configuration object
* @param logger The logger instance
* @returns A promise that resolves to download statistics if an overlay-base
* database was successfully downloaded, or undefined if the download was
* either not performed or failed.
*/
export async function downloadOverlayBaseDatabaseFromCache(
codeql: CodeQL,
config: Config,
logger: Logger,
): Promise<OverlayBaseDatabaseDownloadStats | undefined> {
const overlayDatabaseMode = config.overlayDatabaseMode;
if (overlayDatabaseMode !== OverlayDatabaseMode.Overlay) {
logger.debug(
`Overlay database mode is ${overlayDatabaseMode}. ` +
"Skip downloading overlay-base database from cache.",
);
return undefined;
}
if (!config.useOverlayDatabaseCaching) {
logger.debug(
"Overlay database caching is disabled. " +
"Skip downloading overlay-base database from cache.",
);
return undefined;
}
if (isInTestMode()) {
logger.debug(
"In test mode. Skip downloading overlay-base database from cache.",
);
return undefined;
}
const dbLocation = config.dbLocation;
const codeQlVersion = (await codeql.getVersion()).version;
const restoreKey = await getCacheRestoreKey(config, codeQlVersion);
logger.info(
`Looking in Actions cache for overlay-base database with restore key ${restoreKey}`,
);
let databaseDownloadDurationMs = 0;
try {
const databaseDownloadStart = performance.now();
const foundKey = await withTimeout(
MAX_CACHE_OPERATION_MS,
actionsCache.restoreCache([dbLocation], restoreKey),
() => {
logger.info("Timed out downloading overlay-base database from cache");
},
);
databaseDownloadDurationMs = Math.round(
performance.now() - databaseDownloadStart,
);
if (foundKey === undefined) {
logger.info("No overlay-base database found in Actions cache");
return undefined;
}
logger.info(
`Downloaded overlay-base database in cache with key ${foundKey}`,
);
} catch (error) {
logger.warning(
"Failed to download overlay-base database from cache: " +
`${error instanceof Error ? error.message : String(error)}`,
);
return undefined;
}
const databaseIsValid = checkOverlayBaseDatabase(
config,
logger,
"Downloaded overlay-base database is invalid",
);
if (!databaseIsValid) {
logger.warning("Downloaded overlay-base database failed validation");
return undefined;
}
const databaseSizeBytes = await tryGetFolderBytes(dbLocation, logger);
if (databaseSizeBytes === undefined) {
logger.info(
"Filesystem error while accessing downloaded overlay-base database",
);
// The problem that warrants reporting download failure is not that we are
// unable to determine the size of the database. Rather, it is that we
// encountered a filesystem error while accessing the database, which
// indicates that an overlay analysis will likely fail.
return undefined;
}
logger.info(`Successfully downloaded overlay-base database to ${dbLocation}`);
return {
databaseSizeBytes: Math.round(databaseSizeBytes),
databaseDownloadDurationMs,
};
}
async function generateCacheKey(
config: Config,
codeQlVersion: string,
checkoutPath: string,
): Promise<string> {
const sha = await getCommitOid(checkoutPath);
const restoreKey = await getCacheRestoreKey(config, codeQlVersion);
return `${restoreKey}${sha}`;
}
async function getCacheRestoreKey(
config: Config,
codeQlVersion: string,
): Promise<string> {
// The restore key (prefix) specifies which cached overlay-base databases are
// compatible with the current analysis: the cached database must have the
// same cache version and the same CodeQL bundle version.
//
// Actions cache supports using multiple restore keys to indicate preference.
// Technically we prefer a cached overlay-base database with the same SHA as
// we are analyzing. However, since overlay-base databases are built from the
// default branch and used in PR analysis, it is exceedingly unlikely that
// the commit SHA will ever be the same, so we can just leave it out.
const languages = [...config.languages].sort().join("_");
const cacheKeyComponents = {
automationID: await getAutomationID(),
// Add more components here as needed in the future
};
const componentsHash = createCacheKeyHash(cacheKeyComponents);
// For a cached overlay-base database to be considered compatible for overlay
// analysis, all components in the cache restore key must match:
//
// CACHE_PREFIX: distinguishes overlay-base databases from other cache objects
// CACHE_VERSION: cache format version
// componentsHash: hash of additional components (see above for details)
// languages: the languages included in the overlay-base database
// codeQlVersion: CodeQL bundle version
//
// Technically we can also include languages and codeQlVersion in the
// componentsHash, but including them explicitly in the cache key makes it
// easier to debug and understand the cache key structure.
return `${CACHE_PREFIX}-${CACHE_VERSION}-${componentsHash}-${languages}-${codeQlVersion}-`;
}
/**
* Creates a SHA-256 hash of the cache key components to ensure uniqueness
* while keeping the cache key length manageable.
*
* @param components Object containing all components that should influence cache key uniqueness
* @returns A short SHA-256 hash (first 16 characters) of the components
*/
function createCacheKeyHash(components: Record<string, any>): string {
// From https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify
//
// "Properties are visited using the same algorithm as Object.keys(), which
// has a well-defined order and is stable across implementations. For example,
// JSON.stringify on the same object will always produce the same string, and
// JSON.parse(JSON.stringify(obj)) would produce an object with the same key
// ordering as the original (assuming the object is completely
// JSON-serializable)."
const componentsJson = JSON.stringify(components);
return crypto
.createHash("sha256")
.update(componentsJson)
.digest("hex")
.substring(0, 16);
}