mirror of
https://github.com/github/codeql-action.git
synced 2025-12-28 02:00:12 +08:00
This commit updates componentsJson computation to call JSON.stringify() without the replacer array and documents why the result is stable.
461 lines
16 KiB
TypeScript
461 lines
16 KiB
TypeScript
import * as crypto from "crypto";
|
|
import * as fs from "fs";
|
|
import * as path from "path";
|
|
|
|
import * as actionsCache from "@actions/cache";
|
|
|
|
import { getRequiredInput, getTemporaryDirectory } from "./actions-util";
|
|
import { getAutomationID } from "./api-client";
|
|
import { type CodeQL } from "./codeql";
|
|
import { type Config } from "./config-utils";
|
|
import { getCommitOid, getFileOidsUnderPath } from "./git-utils";
|
|
import { Logger, withGroupAsync } from "./logging";
|
|
import { isInTestMode, tryGetFolderBytes, withTimeout } from "./util";
|
|
|
|
export enum OverlayDatabaseMode {
|
|
Overlay = "overlay",
|
|
OverlayBase = "overlay-base",
|
|
None = "none",
|
|
}
|
|
|
|
export const CODEQL_OVERLAY_MINIMUM_VERSION = "2.22.3";
|
|
|
|
/**
|
|
* The maximum (uncompressed) size of the overlay base database that we will
|
|
* upload. Actions Cache has an overall capacity of 10 GB, and the Actions Cache
|
|
* client library uses zstd compression.
|
|
*
|
|
* Ideally we would apply a size limit to the compressed overlay-base database,
|
|
* but we cannot do so because compression is handled transparently by the
|
|
* Actions Cache client library. Instead we place a limit on the uncompressed
|
|
* size of the overlay-base database.
|
|
*
|
|
* Assuming 2.5:1 compression ratio, the 6 GB limit on uncompressed data would
|
|
* translate to a limit of around 2.4 GB after compression.
|
|
*/
|
|
const OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_MB = 6000;
|
|
const OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_BYTES =
|
|
OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_MB * 1_000_000;
|
|
|
|
/**
|
|
* Writes a JSON file containing Git OIDs for all tracked files (represented
|
|
* by path relative to the source root) under the source root. The file is
|
|
* written into the database location specified in the config.
|
|
*
|
|
* @param config The configuration object containing the database location
|
|
* @param sourceRoot The root directory containing the source files to process
|
|
* @throws {Error} If the Git repository root cannot be determined
|
|
*/
|
|
export async function writeBaseDatabaseOidsFile(
|
|
config: Config,
|
|
sourceRoot: string,
|
|
): Promise<void> {
|
|
const gitFileOids = await getFileOidsUnderPath(sourceRoot);
|
|
const gitFileOidsJson = JSON.stringify(gitFileOids);
|
|
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
|
|
await fs.promises.writeFile(baseDatabaseOidsFilePath, gitFileOidsJson);
|
|
}
|
|
|
|
/**
|
|
* Reads and parses the JSON file containing the base database Git OIDs.
|
|
* This file contains the mapping of file paths to their corresponding Git OIDs
|
|
* that was previously written by writeBaseDatabaseOidsFile().
|
|
*
|
|
* @param config The configuration object containing the database location
|
|
* @param logger The logger instance to use for error reporting
|
|
* @returns An object mapping file paths (relative to source root) to their Git OIDs
|
|
* @throws {Error} If the file cannot be read or parsed
|
|
*/
|
|
async function readBaseDatabaseOidsFile(
|
|
config: Config,
|
|
logger: Logger,
|
|
): Promise<{ [key: string]: string }> {
|
|
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
|
|
try {
|
|
const contents = await fs.promises.readFile(
|
|
baseDatabaseOidsFilePath,
|
|
"utf-8",
|
|
);
|
|
return JSON.parse(contents) as { [key: string]: string };
|
|
} catch (e) {
|
|
logger.error(
|
|
"Failed to read overlay-base file OIDs from " +
|
|
`${baseDatabaseOidsFilePath}: ${(e as any).message || e}`,
|
|
);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
function getBaseDatabaseOidsFilePath(config: Config): string {
|
|
return path.join(config.dbLocation, "base-database-oids.json");
|
|
}
|
|
|
|
/**
|
|
* Writes a JSON file containing the source-root-relative paths of files under
|
|
* `sourceRoot` that have changed (added, removed, or modified) from the overlay
|
|
* base database.
|
|
*
|
|
* This function uses the Git index to determine which files have changed, so it
|
|
* requires the following preconditions, both when this function is called and
|
|
* when the overlay-base database was initialized:
|
|
*
|
|
* - It requires that `sourceRoot` is inside a Git repository.
|
|
* - It assumes that all changes in the working tree are staged in the index.
|
|
* - It assumes that all files of interest are tracked by Git, e.g. not covered
|
|
* by `.gitignore`.
|
|
*/
|
|
export async function writeOverlayChangesFile(
|
|
config: Config,
|
|
sourceRoot: string,
|
|
logger: Logger,
|
|
): Promise<string> {
|
|
const baseFileOids = await readBaseDatabaseOidsFile(config, logger);
|
|
const overlayFileOids = await getFileOidsUnderPath(sourceRoot);
|
|
const changedFiles = computeChangedFiles(baseFileOids, overlayFileOids);
|
|
logger.info(
|
|
`Found ${changedFiles.length} changed file(s) under ${sourceRoot}.`,
|
|
);
|
|
|
|
const changedFilesJson = JSON.stringify({ changes: changedFiles });
|
|
const overlayChangesFile = path.join(
|
|
getTemporaryDirectory(),
|
|
"overlay-changes.json",
|
|
);
|
|
logger.debug(
|
|
`Writing overlay changed files to ${overlayChangesFile}: ${changedFilesJson}`,
|
|
);
|
|
await fs.promises.writeFile(overlayChangesFile, changedFilesJson);
|
|
return overlayChangesFile;
|
|
}
|
|
|
|
function computeChangedFiles(
|
|
baseFileOids: { [key: string]: string },
|
|
overlayFileOids: { [key: string]: string },
|
|
): string[] {
|
|
const changes: string[] = [];
|
|
for (const [file, oid] of Object.entries(overlayFileOids)) {
|
|
if (!(file in baseFileOids) || baseFileOids[file] !== oid) {
|
|
changes.push(file);
|
|
}
|
|
}
|
|
for (const file of Object.keys(baseFileOids)) {
|
|
if (!(file in overlayFileOids)) {
|
|
changes.push(file);
|
|
}
|
|
}
|
|
return changes;
|
|
}
|
|
|
|
// Constants for database caching
|
|
const CACHE_VERSION = 1;
|
|
const CACHE_PREFIX = "codeql-overlay-base-database";
|
|
const MAX_CACHE_OPERATION_MS = 120_000; // Two minutes
|
|
|
|
/**
|
|
* Checks that the overlay-base database is valid by checking for the
|
|
* existence of the base database OIDs file.
|
|
*
|
|
* @param config The configuration object
|
|
* @param logger The logger instance
|
|
* @param warningPrefix Prefix for the check failure warning message
|
|
* @returns True if the verification succeeded, false otherwise
|
|
*/
|
|
export function checkOverlayBaseDatabase(
|
|
config: Config,
|
|
logger: Logger,
|
|
warningPrefix: string,
|
|
): boolean {
|
|
// An overlay-base database should contain the base database OIDs file.
|
|
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
|
|
if (!fs.existsSync(baseDatabaseOidsFilePath)) {
|
|
logger.warning(
|
|
`${warningPrefix}: ${baseDatabaseOidsFilePath} does not exist`,
|
|
);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Uploads the overlay-base database to the GitHub Actions cache. If conditions
|
|
* for uploading are not met, the function does nothing and returns false.
|
|
*
|
|
* This function uses the `checkout_path` input to determine the repository path
|
|
* and works only when called from `analyze` or `upload-sarif`.
|
|
*
|
|
* @param codeql The CodeQL instance
|
|
* @param config The configuration object
|
|
* @param logger The logger instance
|
|
* @returns A promise that resolves to true if the upload was performed and
|
|
* successfully completed, or false otherwise
|
|
*/
|
|
export async function uploadOverlayBaseDatabaseToCache(
|
|
codeql: CodeQL,
|
|
config: Config,
|
|
logger: Logger,
|
|
): Promise<boolean> {
|
|
const overlayDatabaseMode = config.overlayDatabaseMode;
|
|
if (overlayDatabaseMode !== OverlayDatabaseMode.OverlayBase) {
|
|
logger.debug(
|
|
`Overlay database mode is ${overlayDatabaseMode}. ` +
|
|
"Skip uploading overlay-base database to cache.",
|
|
);
|
|
return false;
|
|
}
|
|
if (!config.useOverlayDatabaseCaching) {
|
|
logger.debug(
|
|
"Overlay database caching is disabled. " +
|
|
"Skip uploading overlay-base database to cache.",
|
|
);
|
|
return false;
|
|
}
|
|
if (isInTestMode()) {
|
|
logger.debug(
|
|
"In test mode. Skip uploading overlay-base database to cache.",
|
|
);
|
|
return false;
|
|
}
|
|
|
|
const databaseIsValid = checkOverlayBaseDatabase(
|
|
config,
|
|
logger,
|
|
"Abort uploading overlay-base database to cache",
|
|
);
|
|
if (!databaseIsValid) {
|
|
return false;
|
|
}
|
|
|
|
// Clean up the database using the overlay cleanup level.
|
|
await withGroupAsync("Cleaning up databases", async () => {
|
|
await codeql.databaseCleanupCluster(config, "overlay");
|
|
});
|
|
|
|
const dbLocation = config.dbLocation;
|
|
|
|
const databaseSizeBytes = await tryGetFolderBytes(dbLocation, logger);
|
|
if (databaseSizeBytes === undefined) {
|
|
logger.warning(
|
|
"Failed to determine database size. " +
|
|
"Skip uploading overlay-base database to cache.",
|
|
);
|
|
return false;
|
|
}
|
|
|
|
if (databaseSizeBytes > OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_BYTES) {
|
|
const databaseSizeMB = Math.round(databaseSizeBytes / 1_000_000);
|
|
logger.warning(
|
|
`Database size (${databaseSizeMB} MB) ` +
|
|
`exceeds maximum upload size (${OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_MB} MB). ` +
|
|
"Skip uploading overlay-base database to cache.",
|
|
);
|
|
return false;
|
|
}
|
|
|
|
const codeQlVersion = (await codeql.getVersion()).version;
|
|
const checkoutPath = getRequiredInput("checkout_path");
|
|
const cacheKey = await generateCacheKey(config, codeQlVersion, checkoutPath);
|
|
logger.info(
|
|
`Uploading overlay-base database to Actions cache with key ${cacheKey}`,
|
|
);
|
|
|
|
try {
|
|
const cacheId = await withTimeout(
|
|
MAX_CACHE_OPERATION_MS,
|
|
actionsCache.saveCache([dbLocation], cacheKey),
|
|
() => {},
|
|
);
|
|
if (cacheId === undefined) {
|
|
logger.warning("Timed out while uploading overlay-base database");
|
|
return false;
|
|
}
|
|
} catch (error) {
|
|
logger.warning(
|
|
"Failed to upload overlay-base database to cache: " +
|
|
`${error instanceof Error ? error.message : String(error)}`,
|
|
);
|
|
return false;
|
|
}
|
|
logger.info(`Successfully uploaded overlay-base database from ${dbLocation}`);
|
|
return true;
|
|
}
|
|
|
|
export interface OverlayBaseDatabaseDownloadStats {
|
|
databaseSizeBytes: number;
|
|
databaseDownloadDurationMs: number;
|
|
}
|
|
|
|
/**
|
|
* Downloads the overlay-base database from the GitHub Actions cache. If conditions
|
|
* for downloading are not met, the function does nothing and returns false.
|
|
*
|
|
* @param codeql The CodeQL instance
|
|
* @param config The configuration object
|
|
* @param logger The logger instance
|
|
* @returns A promise that resolves to download statistics if an overlay-base
|
|
* database was successfully downloaded, or undefined if the download was
|
|
* either not performed or failed.
|
|
*/
|
|
export async function downloadOverlayBaseDatabaseFromCache(
|
|
codeql: CodeQL,
|
|
config: Config,
|
|
logger: Logger,
|
|
): Promise<OverlayBaseDatabaseDownloadStats | undefined> {
|
|
const overlayDatabaseMode = config.overlayDatabaseMode;
|
|
if (overlayDatabaseMode !== OverlayDatabaseMode.Overlay) {
|
|
logger.debug(
|
|
`Overlay database mode is ${overlayDatabaseMode}. ` +
|
|
"Skip downloading overlay-base database from cache.",
|
|
);
|
|
return undefined;
|
|
}
|
|
if (!config.useOverlayDatabaseCaching) {
|
|
logger.debug(
|
|
"Overlay database caching is disabled. " +
|
|
"Skip downloading overlay-base database from cache.",
|
|
);
|
|
return undefined;
|
|
}
|
|
if (isInTestMode()) {
|
|
logger.debug(
|
|
"In test mode. Skip downloading overlay-base database from cache.",
|
|
);
|
|
return undefined;
|
|
}
|
|
|
|
const dbLocation = config.dbLocation;
|
|
const codeQlVersion = (await codeql.getVersion()).version;
|
|
const restoreKey = await getCacheRestoreKey(config, codeQlVersion);
|
|
|
|
logger.info(
|
|
`Looking in Actions cache for overlay-base database with restore key ${restoreKey}`,
|
|
);
|
|
|
|
let databaseDownloadDurationMs = 0;
|
|
try {
|
|
const databaseDownloadStart = performance.now();
|
|
const foundKey = await withTimeout(
|
|
MAX_CACHE_OPERATION_MS,
|
|
actionsCache.restoreCache([dbLocation], restoreKey),
|
|
() => {
|
|
logger.info("Timed out downloading overlay-base database from cache");
|
|
},
|
|
);
|
|
databaseDownloadDurationMs = Math.round(
|
|
performance.now() - databaseDownloadStart,
|
|
);
|
|
|
|
if (foundKey === undefined) {
|
|
logger.info("No overlay-base database found in Actions cache");
|
|
return undefined;
|
|
}
|
|
|
|
logger.info(
|
|
`Downloaded overlay-base database in cache with key ${foundKey}`,
|
|
);
|
|
} catch (error) {
|
|
logger.warning(
|
|
"Failed to download overlay-base database from cache: " +
|
|
`${error instanceof Error ? error.message : String(error)}`,
|
|
);
|
|
return undefined;
|
|
}
|
|
|
|
const databaseIsValid = checkOverlayBaseDatabase(
|
|
config,
|
|
logger,
|
|
"Downloaded overlay-base database is invalid",
|
|
);
|
|
if (!databaseIsValid) {
|
|
logger.warning("Downloaded overlay-base database failed validation");
|
|
return undefined;
|
|
}
|
|
|
|
const databaseSizeBytes = await tryGetFolderBytes(dbLocation, logger);
|
|
if (databaseSizeBytes === undefined) {
|
|
logger.info(
|
|
"Filesystem error while accessing downloaded overlay-base database",
|
|
);
|
|
// The problem that warrants reporting download failure is not that we are
|
|
// unable to determine the size of the database. Rather, it is that we
|
|
// encountered a filesystem error while accessing the database, which
|
|
// indicates that an overlay analysis will likely fail.
|
|
return undefined;
|
|
}
|
|
|
|
logger.info(`Successfully downloaded overlay-base database to ${dbLocation}`);
|
|
return {
|
|
databaseSizeBytes: Math.round(databaseSizeBytes),
|
|
databaseDownloadDurationMs,
|
|
};
|
|
}
|
|
|
|
async function generateCacheKey(
|
|
config: Config,
|
|
codeQlVersion: string,
|
|
checkoutPath: string,
|
|
): Promise<string> {
|
|
const sha = await getCommitOid(checkoutPath);
|
|
const restoreKey = await getCacheRestoreKey(config, codeQlVersion);
|
|
return `${restoreKey}${sha}`;
|
|
}
|
|
|
|
async function getCacheRestoreKey(
|
|
config: Config,
|
|
codeQlVersion: string,
|
|
): Promise<string> {
|
|
// The restore key (prefix) specifies which cached overlay-base databases are
|
|
// compatible with the current analysis: the cached database must have the
|
|
// same cache version and the same CodeQL bundle version.
|
|
//
|
|
// Actions cache supports using multiple restore keys to indicate preference.
|
|
// Technically we prefer a cached overlay-base database with the same SHA as
|
|
// we are analyzing. However, since overlay-base databases are built from the
|
|
// default branch and used in PR analysis, it is exceedingly unlikely that
|
|
// the commit SHA will ever be the same, so we can just leave it out.
|
|
const languages = [...config.languages].sort().join("_");
|
|
|
|
const cacheKeyComponents = {
|
|
automationID: await getAutomationID(),
|
|
// Add more components here as needed in the future
|
|
};
|
|
const componentsHash = createCacheKeyHash(cacheKeyComponents);
|
|
|
|
// For a cached overlay-base database to be considered compatible for overlay
|
|
// analysis, all components in the cache restore key must match:
|
|
//
|
|
// CACHE_PREFIX: distinguishes overlay-base databases from other cache objects
|
|
// CACHE_VERSION: cache format version
|
|
// componentsHash: hash of additional components (see above for details)
|
|
// languages: the languages included in the overlay-base database
|
|
// codeQlVersion: CodeQL bundle version
|
|
//
|
|
// Technically we can also include languages and codeQlVersion in the
|
|
// componentsHash, but including them explicitly in the cache key makes it
|
|
// easier to debug and understand the cache key structure.
|
|
return `${CACHE_PREFIX}-${CACHE_VERSION}-${componentsHash}-${languages}-${codeQlVersion}-`;
|
|
}
|
|
|
|
/**
|
|
* Creates a SHA-256 hash of the cache key components to ensure uniqueness
|
|
* while keeping the cache key length manageable.
|
|
*
|
|
* @param components Object containing all components that should influence cache key uniqueness
|
|
* @returns A short SHA-256 hash (first 16 characters) of the components
|
|
*/
|
|
function createCacheKeyHash(components: Record<string, any>): string {
|
|
// From https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify
|
|
//
|
|
// "Properties are visited using the same algorithm as Object.keys(), which
|
|
// has a well-defined order and is stable across implementations. For example,
|
|
// JSON.stringify on the same object will always produce the same string, and
|
|
// JSON.parse(JSON.stringify(obj)) would produce an object with the same key
|
|
// ordering as the original (assuming the object is completely
|
|
// JSON-serializable)."
|
|
const componentsJson = JSON.stringify(components);
|
|
return crypto
|
|
.createHash("sha256")
|
|
.update(componentsJson)
|
|
.digest("hex")
|
|
.substring(0, 16);
|
|
}
|