mirror of
https://github.com/github/codeql-action.git
synced 2025-12-23 07:40:10 +08:00
545 lines
18 KiB
TypeScript
545 lines
18 KiB
TypeScript
import * as fs from "fs";
|
|
import * as path from "path";
|
|
|
|
import * as actionsCache from "@actions/cache";
|
|
|
|
import {
|
|
getRequiredInput,
|
|
getTemporaryDirectory,
|
|
getWorkflowRunAttempt,
|
|
getWorkflowRunID,
|
|
} from "./actions-util";
|
|
import { getAutomationID } from "./api-client";
|
|
import { createCacheKeyHash } from "./caching-utils";
|
|
import { type CodeQL } from "./codeql";
|
|
import { type Config } from "./config-utils";
|
|
import { getCommitOid, getFileOidsUnderPath } from "./git-utils";
|
|
import { Logger, withGroupAsync } from "./logging";
|
|
import {
|
|
CleanupLevel,
|
|
getCodeQLDatabasePath,
|
|
getErrorMessage,
|
|
isInTestMode,
|
|
tryGetFolderBytes,
|
|
waitForResultWithTimeLimit,
|
|
} from "./util";
|
|
|
|
export enum OverlayDatabaseMode {
|
|
Overlay = "overlay",
|
|
OverlayBase = "overlay-base",
|
|
None = "none",
|
|
}
|
|
|
|
export const CODEQL_OVERLAY_MINIMUM_VERSION = "2.23.5";
|
|
|
|
/**
|
|
* The maximum (uncompressed) size of the overlay base database that we will
|
|
* upload. By default, the Actions Cache has an overall capacity of 10 GB, and
|
|
* the Actions Cache client library uses zstd compression.
|
|
*
|
|
* Ideally we would apply a size limit to the compressed overlay-base database,
|
|
* but we cannot do so because compression is handled transparently by the
|
|
* Actions Cache client library. Instead we place a limit on the uncompressed
|
|
* size of the overlay-base database.
|
|
*
|
|
* Assuming 2.5:1 compression ratio, the 7.5 GB limit on uncompressed data would
|
|
* translate to a limit of around 3 GB after compression.
|
|
*/
|
|
const OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_MB = 7500;
|
|
const OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_BYTES =
|
|
OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_MB * 1_000_000;
|
|
|
|
/**
|
|
* Writes a JSON file containing Git OIDs for all tracked files (represented
|
|
* by path relative to the source root) under the source root. The file is
|
|
* written into the database location specified in the config.
|
|
*
|
|
* @param config The configuration object containing the database location
|
|
* @param sourceRoot The root directory containing the source files to process
|
|
* @throws {Error} If the Git repository root cannot be determined
|
|
*/
|
|
export async function writeBaseDatabaseOidsFile(
|
|
config: Config,
|
|
sourceRoot: string,
|
|
): Promise<void> {
|
|
const gitFileOids = await getFileOidsUnderPath(sourceRoot);
|
|
const gitFileOidsJson = JSON.stringify(gitFileOids);
|
|
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
|
|
await fs.promises.writeFile(baseDatabaseOidsFilePath, gitFileOidsJson);
|
|
}
|
|
|
|
/**
|
|
* Reads and parses the JSON file containing the base database Git OIDs.
|
|
* This file contains the mapping of file paths to their corresponding Git OIDs
|
|
* that was previously written by writeBaseDatabaseOidsFile().
|
|
*
|
|
* @param config The configuration object containing the database location
|
|
* @param logger The logger instance to use for error reporting
|
|
* @returns An object mapping file paths (relative to source root) to their Git OIDs
|
|
* @throws {Error} If the file cannot be read or parsed
|
|
*/
|
|
async function readBaseDatabaseOidsFile(
|
|
config: Config,
|
|
logger: Logger,
|
|
): Promise<{ [key: string]: string }> {
|
|
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
|
|
try {
|
|
const contents = await fs.promises.readFile(
|
|
baseDatabaseOidsFilePath,
|
|
"utf-8",
|
|
);
|
|
return JSON.parse(contents) as { [key: string]: string };
|
|
} catch (e) {
|
|
logger.error(
|
|
"Failed to read overlay-base file OIDs from " +
|
|
`${baseDatabaseOidsFilePath}: ${(e as any).message || e}`,
|
|
);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
function getBaseDatabaseOidsFilePath(config: Config): string {
|
|
return path.join(config.dbLocation, "base-database-oids.json");
|
|
}
|
|
|
|
/**
|
|
* Writes a JSON file containing the source-root-relative paths of files under
|
|
* `sourceRoot` that have changed (added, removed, or modified) from the overlay
|
|
* base database.
|
|
*
|
|
* This function uses the Git index to determine which files have changed, so it
|
|
* requires the following preconditions, both when this function is called and
|
|
* when the overlay-base database was initialized:
|
|
*
|
|
* - It requires that `sourceRoot` is inside a Git repository.
|
|
* - It assumes that all changes in the working tree are staged in the index.
|
|
* - It assumes that all files of interest are tracked by Git, e.g. not covered
|
|
* by `.gitignore`.
|
|
*/
|
|
export async function writeOverlayChangesFile(
|
|
config: Config,
|
|
sourceRoot: string,
|
|
logger: Logger,
|
|
): Promise<string> {
|
|
const baseFileOids = await readBaseDatabaseOidsFile(config, logger);
|
|
const overlayFileOids = await getFileOidsUnderPath(sourceRoot);
|
|
const changedFiles = computeChangedFiles(baseFileOids, overlayFileOids);
|
|
logger.info(
|
|
`Found ${changedFiles.length} changed file(s) under ${sourceRoot}.`,
|
|
);
|
|
|
|
const changedFilesJson = JSON.stringify({ changes: changedFiles });
|
|
const overlayChangesFile = path.join(
|
|
getTemporaryDirectory(),
|
|
"overlay-changes.json",
|
|
);
|
|
logger.debug(
|
|
`Writing overlay changed files to ${overlayChangesFile}: ${changedFilesJson}`,
|
|
);
|
|
await fs.promises.writeFile(overlayChangesFile, changedFilesJson);
|
|
return overlayChangesFile;
|
|
}
|
|
|
|
function computeChangedFiles(
|
|
baseFileOids: { [key: string]: string },
|
|
overlayFileOids: { [key: string]: string },
|
|
): string[] {
|
|
const changes: string[] = [];
|
|
for (const [file, oid] of Object.entries(overlayFileOids)) {
|
|
if (!(file in baseFileOids) || baseFileOids[file] !== oid) {
|
|
changes.push(file);
|
|
}
|
|
}
|
|
for (const file of Object.keys(baseFileOids)) {
|
|
if (!(file in overlayFileOids)) {
|
|
changes.push(file);
|
|
}
|
|
}
|
|
return changes;
|
|
}
|
|
|
|
// Constants for database caching
|
|
const CACHE_VERSION = 1;
|
|
const CACHE_PREFIX = "codeql-overlay-base-database";
|
|
|
|
// The purpose of this ten-minute limit is to guard against the possibility
|
|
// that the cache service is unresponsive, which would otherwise cause the
|
|
// entire action to hang. Normally we expect cache operations to complete
|
|
// within two minutes.
|
|
const MAX_CACHE_OPERATION_MS = 600_000;
|
|
|
|
/**
|
|
* Checks that the overlay-base database is valid by checking for the
|
|
* existence of the base database OIDs file.
|
|
*
|
|
* @param config The configuration object
|
|
* @param logger The logger instance
|
|
* @param warningPrefix Prefix for the check failure warning message
|
|
* @returns True if the verification succeeded, false otherwise
|
|
*/
|
|
async function checkOverlayBaseDatabase(
|
|
codeql: CodeQL,
|
|
config: Config,
|
|
logger: Logger,
|
|
warningPrefix: string,
|
|
): Promise<boolean> {
|
|
// An overlay-base database should contain the base database OIDs file.
|
|
const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config);
|
|
if (!fs.existsSync(baseDatabaseOidsFilePath)) {
|
|
logger.warning(
|
|
`${warningPrefix}: ${baseDatabaseOidsFilePath} does not exist`,
|
|
);
|
|
return false;
|
|
}
|
|
|
|
for (const language of config.languages) {
|
|
const dbPath = getCodeQLDatabasePath(config, language);
|
|
try {
|
|
const resolveDatabaseOutput = await codeql.resolveDatabase(dbPath);
|
|
if (
|
|
resolveDatabaseOutput === undefined ||
|
|
!("overlayBaseSpecifier" in resolveDatabaseOutput)
|
|
) {
|
|
logger.info(`${warningPrefix}: no overlayBaseSpecifier defined`);
|
|
return false;
|
|
} else {
|
|
logger.debug(
|
|
`Overlay base specifier for ${language} overlay-base database found: ` +
|
|
`${resolveDatabaseOutput.overlayBaseSpecifier}`,
|
|
);
|
|
}
|
|
} catch (e) {
|
|
logger.warning(`${warningPrefix}: failed to resolve database: ${e}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Uploads the overlay-base database to the GitHub Actions cache. If conditions
|
|
* for uploading are not met, the function does nothing and returns false.
|
|
*
|
|
* This function uses the `checkout_path` input to determine the repository path
|
|
* and works only when called from `analyze` or `upload-sarif`.
|
|
*
|
|
* @param codeql The CodeQL instance
|
|
* @param config The configuration object
|
|
* @param logger The logger instance
|
|
* @returns A promise that resolves to true if the upload was performed and
|
|
* successfully completed, or false otherwise
|
|
*/
|
|
export async function cleanupAndUploadOverlayBaseDatabaseToCache(
|
|
codeql: CodeQL,
|
|
config: Config,
|
|
logger: Logger,
|
|
): Promise<boolean> {
|
|
const overlayDatabaseMode = config.overlayDatabaseMode;
|
|
if (overlayDatabaseMode !== OverlayDatabaseMode.OverlayBase) {
|
|
logger.debug(
|
|
`Overlay database mode is ${overlayDatabaseMode}. ` +
|
|
"Skip uploading overlay-base database to cache.",
|
|
);
|
|
return false;
|
|
}
|
|
if (!config.useOverlayDatabaseCaching) {
|
|
logger.debug(
|
|
"Overlay database caching is disabled. " +
|
|
"Skip uploading overlay-base database to cache.",
|
|
);
|
|
return false;
|
|
}
|
|
if (isInTestMode()) {
|
|
logger.debug(
|
|
"In test mode. Skip uploading overlay-base database to cache.",
|
|
);
|
|
return false;
|
|
}
|
|
|
|
const databaseIsValid = await checkOverlayBaseDatabase(
|
|
codeql,
|
|
config,
|
|
logger,
|
|
"Abort uploading overlay-base database to cache",
|
|
);
|
|
if (!databaseIsValid) {
|
|
return false;
|
|
}
|
|
|
|
// Clean up the database using the overlay cleanup level.
|
|
await withGroupAsync("Cleaning up databases", async () => {
|
|
await codeql.databaseCleanupCluster(config, CleanupLevel.Overlay);
|
|
});
|
|
|
|
const dbLocation = config.dbLocation;
|
|
|
|
const databaseSizeBytes = await tryGetFolderBytes(dbLocation, logger);
|
|
if (databaseSizeBytes === undefined) {
|
|
logger.warning(
|
|
"Failed to determine database size. " +
|
|
"Skip uploading overlay-base database to cache.",
|
|
);
|
|
return false;
|
|
}
|
|
|
|
if (databaseSizeBytes > OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_BYTES) {
|
|
const databaseSizeMB = Math.round(databaseSizeBytes / 1_000_000);
|
|
logger.warning(
|
|
`Database size (${databaseSizeMB} MB) ` +
|
|
`exceeds maximum upload size (${OVERLAY_BASE_DATABASE_MAX_UPLOAD_SIZE_MB} MB). ` +
|
|
"Skip uploading overlay-base database to cache.",
|
|
);
|
|
return false;
|
|
}
|
|
|
|
const codeQlVersion = (await codeql.getVersion()).version;
|
|
const checkoutPath = getRequiredInput("checkout_path");
|
|
const cacheSaveKey = await getCacheSaveKey(
|
|
config,
|
|
codeQlVersion,
|
|
checkoutPath,
|
|
logger,
|
|
);
|
|
logger.info(
|
|
`Uploading overlay-base database to Actions cache with key ${cacheSaveKey}`,
|
|
);
|
|
|
|
try {
|
|
const cacheId = await waitForResultWithTimeLimit(
|
|
MAX_CACHE_OPERATION_MS,
|
|
actionsCache.saveCache([dbLocation], cacheSaveKey),
|
|
() => {},
|
|
);
|
|
if (cacheId === undefined) {
|
|
logger.warning("Timed out while uploading overlay-base database");
|
|
return false;
|
|
}
|
|
} catch (error) {
|
|
logger.warning(
|
|
"Failed to upload overlay-base database to cache: " +
|
|
`${error instanceof Error ? error.message : String(error)}`,
|
|
);
|
|
return false;
|
|
}
|
|
logger.info(`Successfully uploaded overlay-base database from ${dbLocation}`);
|
|
return true;
|
|
}
|
|
|
|
export interface OverlayBaseDatabaseDownloadStats {
|
|
databaseSizeBytes: number;
|
|
databaseDownloadDurationMs: number;
|
|
}
|
|
|
|
/**
|
|
* Downloads the overlay-base database from the GitHub Actions cache. If conditions
|
|
* for downloading are not met, the function does nothing and returns false.
|
|
*
|
|
* @param codeql The CodeQL instance
|
|
* @param config The configuration object
|
|
* @param logger The logger instance
|
|
* @returns A promise that resolves to download statistics if an overlay-base
|
|
* database was successfully downloaded, or undefined if the download was
|
|
* either not performed or failed.
|
|
*/
|
|
export async function downloadOverlayBaseDatabaseFromCache(
|
|
codeql: CodeQL,
|
|
config: Config,
|
|
logger: Logger,
|
|
): Promise<OverlayBaseDatabaseDownloadStats | undefined> {
|
|
const overlayDatabaseMode = config.overlayDatabaseMode;
|
|
if (overlayDatabaseMode !== OverlayDatabaseMode.Overlay) {
|
|
logger.debug(
|
|
`Overlay database mode is ${overlayDatabaseMode}. ` +
|
|
"Skip downloading overlay-base database from cache.",
|
|
);
|
|
return undefined;
|
|
}
|
|
if (!config.useOverlayDatabaseCaching) {
|
|
logger.debug(
|
|
"Overlay database caching is disabled. " +
|
|
"Skip downloading overlay-base database from cache.",
|
|
);
|
|
return undefined;
|
|
}
|
|
if (isInTestMode()) {
|
|
logger.debug(
|
|
"In test mode. Skip downloading overlay-base database from cache.",
|
|
);
|
|
return undefined;
|
|
}
|
|
|
|
const dbLocation = config.dbLocation;
|
|
const codeQlVersion = (await codeql.getVersion()).version;
|
|
const cacheRestoreKeyPrefix = await getCacheRestoreKeyPrefix(
|
|
config,
|
|
codeQlVersion,
|
|
);
|
|
|
|
logger.info(
|
|
"Looking in Actions cache for overlay-base database with " +
|
|
`restore key ${cacheRestoreKeyPrefix}`,
|
|
);
|
|
|
|
let databaseDownloadDurationMs = 0;
|
|
try {
|
|
const databaseDownloadStart = performance.now();
|
|
const foundKey = await waitForResultWithTimeLimit(
|
|
// This ten-minute limit for the cache restore operation is mainly to
|
|
// guard against the possibility that the cache service is unresponsive
|
|
// and hangs outside the data download.
|
|
//
|
|
// Data download (which is normally the most time-consuming part of the
|
|
// restore operation) should not run long enough to hit this limit. Even
|
|
// for an extremely large 10GB database, at a download speed of 40MB/s
|
|
// (see below), the download should complete within five minutes. If we
|
|
// do hit this limit, there are likely more serious problems other than
|
|
// mere slow download speed.
|
|
//
|
|
// This is important because we don't want any ongoing file operations
|
|
// on the database directory when we do hit this limit. Hitting this
|
|
// time limit takes us to a fallback path where we re-initialize the
|
|
// database from scratch at dbLocation, and having the cache restore
|
|
// operation continue to write into dbLocation in the background would
|
|
// really mess things up. We want to hit this limit only in the case
|
|
// of a hung cache service, not just slow download speed.
|
|
MAX_CACHE_OPERATION_MS,
|
|
actionsCache.restoreCache(
|
|
[dbLocation],
|
|
cacheRestoreKeyPrefix,
|
|
undefined,
|
|
{
|
|
// Azure SDK download (which is the default) uses 128MB segments; see
|
|
// https://github.com/actions/toolkit/blob/main/packages/cache/README.md.
|
|
// Setting segmentTimeoutInMs to 3000 translates to segment download
|
|
// speed of about 40 MB/s, which should be achievable unless the
|
|
// download is unreliable (in which case we do want to abort).
|
|
segmentTimeoutInMs: 3000,
|
|
},
|
|
),
|
|
() => {
|
|
logger.info("Timed out downloading overlay-base database from cache");
|
|
},
|
|
);
|
|
databaseDownloadDurationMs = Math.round(
|
|
performance.now() - databaseDownloadStart,
|
|
);
|
|
|
|
if (foundKey === undefined) {
|
|
logger.info("No overlay-base database found in Actions cache");
|
|
return undefined;
|
|
}
|
|
|
|
logger.info(
|
|
`Downloaded overlay-base database in cache with key ${foundKey}`,
|
|
);
|
|
} catch (error) {
|
|
logger.warning(
|
|
"Failed to download overlay-base database from cache: " +
|
|
`${error instanceof Error ? error.message : String(error)}`,
|
|
);
|
|
return undefined;
|
|
}
|
|
|
|
const databaseIsValid = await checkOverlayBaseDatabase(
|
|
codeql,
|
|
config,
|
|
logger,
|
|
"Downloaded overlay-base database is invalid",
|
|
);
|
|
if (!databaseIsValid) {
|
|
logger.warning("Downloaded overlay-base database failed validation");
|
|
return undefined;
|
|
}
|
|
|
|
const databaseSizeBytes = await tryGetFolderBytes(dbLocation, logger);
|
|
if (databaseSizeBytes === undefined) {
|
|
logger.info(
|
|
"Filesystem error while accessing downloaded overlay-base database",
|
|
);
|
|
// The problem that warrants reporting download failure is not that we are
|
|
// unable to determine the size of the database. Rather, it is that we
|
|
// encountered a filesystem error while accessing the database, which
|
|
// indicates that an overlay analysis will likely fail.
|
|
return undefined;
|
|
}
|
|
|
|
logger.info(`Successfully downloaded overlay-base database to ${dbLocation}`);
|
|
return {
|
|
databaseSizeBytes: Math.round(databaseSizeBytes),
|
|
databaseDownloadDurationMs,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Computes the cache key for saving the overlay-base database to the GitHub
|
|
* Actions cache.
|
|
*
|
|
* The key consists of the restore key prefix (which does not include the
|
|
* commit SHA) and the commit SHA of the current checkout.
|
|
*/
|
|
export async function getCacheSaveKey(
|
|
config: Config,
|
|
codeQlVersion: string,
|
|
checkoutPath: string,
|
|
logger: Logger,
|
|
): Promise<string> {
|
|
let runId = 1;
|
|
let attemptId = 1;
|
|
try {
|
|
runId = getWorkflowRunID();
|
|
attemptId = getWorkflowRunAttempt();
|
|
} catch (e) {
|
|
logger.warning(
|
|
`Failed to get workflow run ID or attempt ID. Reason: ${getErrorMessage(e)}`,
|
|
);
|
|
}
|
|
const sha = await getCommitOid(checkoutPath);
|
|
const restoreKeyPrefix = await getCacheRestoreKeyPrefix(
|
|
config,
|
|
codeQlVersion,
|
|
);
|
|
return `${restoreKeyPrefix}${sha}-${runId}-${attemptId}`;
|
|
}
|
|
|
|
/**
|
|
* Computes the cache key prefix for restoring the overlay-base database from
|
|
* the GitHub Actions cache.
|
|
*
|
|
* Actions cache supports using multiple restore keys to indicate preference,
|
|
* and this function could in principle take advantage of that feature by
|
|
* returning a list of restore key prefixes. However, since overlay-base
|
|
* databases are built from the default branch and used in PR analysis, it is
|
|
* exceedingly unlikely that the commit SHA will ever be the same.
|
|
*
|
|
* Therefore, this function returns only a single restore key prefix, which does
|
|
* not include the commit SHA. This allows us to restore the most recent
|
|
* compatible overlay-base database.
|
|
*/
|
|
export async function getCacheRestoreKeyPrefix(
|
|
config: Config,
|
|
codeQlVersion: string,
|
|
): Promise<string> {
|
|
const languages = [...config.languages].sort().join("_");
|
|
|
|
const cacheKeyComponents = {
|
|
automationID: await getAutomationID(),
|
|
// Add more components here as needed in the future
|
|
};
|
|
const componentsHash = createCacheKeyHash(cacheKeyComponents);
|
|
|
|
// For a cached overlay-base database to be considered compatible for overlay
|
|
// analysis, all components in the cache restore key must match:
|
|
//
|
|
// CACHE_PREFIX: distinguishes overlay-base databases from other cache objects
|
|
// CACHE_VERSION: cache format version
|
|
// componentsHash: hash of additional components (see above for details)
|
|
// languages: the languages included in the overlay-base database
|
|
// codeQlVersion: CodeQL bundle version
|
|
//
|
|
// Technically we can also include languages and codeQlVersion in the
|
|
// componentsHash, but including them explicitly in the cache key makes it
|
|
// easier to debug and understand the cache key structure.
|
|
return `${CACHE_PREFIX}-${CACHE_VERSION}-${componentsHash}-${languages}-${codeQlVersion}-`;
|
|
}
|