Add RAM and threads options to init action

2026-01-06 14:40:10 +08:00 · 2021-10-28 15:09:59 -07:00
parent 2905689d8a
commit 70b730eb7d
25 changed files with 631 additions and 58 deletions
--- a/src/analyze-action-env.test.ts
+++ b/src/analyze-action-env.test.ts
@@ -0,0 +1,58 @@
+import test from "ava";
+import * as sinon from "sinon";
+
+import * as actionsUtil from "./actions-util";
+import * as analyze from "./analyze";
+import * as configUtils from "./config-utils";
+import { setupTests, setupActionsVars } from "./testing-utils";
+import * as util from "./util";
+
+setupTests(test);
+
+// This test needs to be in its own file so that ava would run it in its own
+// nodejs process. The code being tested is in analyze-action.ts, which runs
+// immediately on load. So the file needs to be loaded during part of the test,
+// and that can happen only once per nodejs process. If multiple such tests are
+// in the same test file, ava would run them in the same nodejs process, and all
+// but the first test would fail.
+
+test("analyze action with RAM & threads from environment variables", async (t) => {
+  await util.withTmpDir(async (tmpDir) => {
+    process.env["GITHUB_SERVER_URL"] = "fake-server-url";
+    process.env["GITHUB_REPOSITORY"] = "fake/repository";
+    sinon
+      .stub(actionsUtil, "createStatusReportBase")
+      .resolves({} as actionsUtil.StatusReportBase);
+    sinon.stub(actionsUtil, "sendStatusReport").resolves(true);
+    sinon.stub(configUtils, "getConfig").resolves({
+      languages: [],
+    } as unknown as configUtils.Config);
+    const requiredInputStub = sinon.stub(actionsUtil, "getRequiredInput");
+    requiredInputStub.withArgs("token").returns("fake-token");
+    requiredInputStub.withArgs("upload-database").returns("false");
+    const optionalInputStub = sinon.stub(actionsUtil, "getOptionalInput");
+    optionalInputStub.withArgs("cleanup-level").returns("none");
+    setupActionsVars(tmpDir, tmpDir);
+
+    // When there are no action inputs for RAM and threads, the action uses
+    // environment variables (passed down from the init action) to set RAM and
+    // threads usage.
+    process.env["CODEQL_THREADS"] = "-1";
+    process.env["CODEQL_RAM"] = "4992";
+
+    const runFinalizeStub = sinon.stub(analyze, "runFinalize");
+    const runQueriesStub = sinon.stub(analyze, "runQueries");
+    const analyzeAction = require("./analyze-action");
+
+    // When analyze-action.ts loads, it runs an async function from the top
+    // level but does not wait for it to finish. To ensure that calls to
+    // runFinalize and runQueries are correctly captured by spies, we explicitly
+    // wait for the action promise to complete before starting verification.
+    await analyzeAction.runPromise;
+
+    t.deepEqual(runFinalizeStub.firstCall.args[1], "--threads=-1");
+    t.deepEqual(runFinalizeStub.firstCall.args[2], "--ram=4992");
+    t.deepEqual(runQueriesStub.firstCall.args[3], "--threads=-1");
+    t.deepEqual(runQueriesStub.firstCall.args[1], "--ram=4992");
+  });
+});
--- a/src/analyze-action-input.test.ts
+++ b/src/analyze-action-input.test.ts
@@ -0,0 +1,59 @@
+import test from "ava";
+import * as sinon from "sinon";
+
+import * as actionsUtil from "./actions-util";
+import * as analyze from "./analyze";
+import * as configUtils from "./config-utils";
+import { setupTests, setupActionsVars } from "./testing-utils";
+import * as util from "./util";
+
+setupTests(test);
+
+// This test needs to be in its own file so that ava would run it in its own
+// nodejs process. The code being tested is in analyze-action.ts, which runs
+// immediately on load. So the file needs to be loaded during part of the test,
+// and that can happen only once per nodejs process. If multiple such tests are
+// in the same test file, ava would run them in the same nodejs process, and all
+// but the first test would fail.
+
+test("analyze action with RAM & threads from action inputs", async (t) => {
+  await util.withTmpDir(async (tmpDir) => {
+    process.env["GITHUB_SERVER_URL"] = "fake-server-url";
+    process.env["GITHUB_REPOSITORY"] = "fake/repository";
+    sinon
+      .stub(actionsUtil, "createStatusReportBase")
+      .resolves({} as actionsUtil.StatusReportBase);
+    sinon.stub(actionsUtil, "sendStatusReport").resolves(true);
+    sinon.stub(configUtils, "getConfig").resolves({
+      languages: [],
+    } as unknown as configUtils.Config);
+    const requiredInputStub = sinon.stub(actionsUtil, "getRequiredInput");
+    requiredInputStub.withArgs("token").returns("fake-token");
+    requiredInputStub.withArgs("upload-database").returns("false");
+    const optionalInputStub = sinon.stub(actionsUtil, "getOptionalInput");
+    optionalInputStub.withArgs("cleanup-level").returns("none");
+    setupActionsVars(tmpDir, tmpDir);
+
+    process.env["CODEQL_THREADS"] = "1";
+    process.env["CODEQL_RAM"] = "4992";
+
+    // Action inputs have precedence over environment variables.
+    optionalInputStub.withArgs("threads").returns("-1");
+    optionalInputStub.withArgs("ram").returns("3012");
+
+    const runFinalizeStub = sinon.stub(analyze, "runFinalize");
+    const runQueriesStub = sinon.stub(analyze, "runQueries");
+    const analyzeAction = require("./analyze-action");
+
+    // When analyze-action.ts loads, it runs an async function from the top
+    // level but does not wait for it to finish. To ensure that calls to
+    // runFinalize and runQueries are correctly captured by spies, we explicitly
+    // wait for the action promise to complete before starting verification.
+    await analyzeAction.runPromise;
+
+    t.deepEqual(runFinalizeStub.firstCall.args[1], "--threads=-1");
+    t.deepEqual(runFinalizeStub.firstCall.args[2], "--ram=3012");
+    t.deepEqual(runQueriesStub.firstCall.args[3], "--threads=-1");
+    t.deepEqual(runQueriesStub.firstCall.args[1], "--ram=3012");
+  });
+});
--- a/src/analyze-action.ts
+++ b/src/analyze-action.ts
@@ -31,7 +31,7 @@ interface FinishStatusReport
  extends actionsUtil.StatusReportBase,
    AnalysisStatusReport {}

-async function sendStatusReport(
+export async function sendStatusReport(
  startedAt: Date,
  stats: AnalysisStatusReport | undefined,
  error?: Error
@@ -91,10 +91,12 @@ async function run() {
    };
    const outputDir = actionsUtil.getRequiredInput("output");
    const threads = util.getThreadsFlag(
-      actionsUtil.getOptionalInput("threads"),
+      actionsUtil.getOptionalInput("threads") || process.env["CODEQL_THREADS"],
      logger
    );
-    const memory = util.getMemoryFlag(actionsUtil.getOptionalInput("ram"));
+    const memory = util.getMemoryFlag(
+      actionsUtil.getOptionalInput("ram") || process.env["CODEQL_RAM"]
+    );
    await runFinalize(outputDir, threads, memory, config, logger);
    if (actionsUtil.getRequiredInput("skip-queries") !== "true") {
      runStats = await runQueries(
@@ -188,9 +190,11 @@ async function run() {
  }
 }

+export const runPromise = run();
+
 async function runWrapper() {
  try {
-    await run();
+    await runPromise;
  } catch (error) {
    core.setFailed(`analyze action failed: ${error}`);
    console.log(error);
--- a/src/init-action.ts
+++ b/src/init-action.ts
@@ -32,6 +32,8 @@ import {
  getGitHubVersion,
  codeQlVersionAbove,
  enrichEnvironment,
+  getMemoryFlagValue,
+  getThreadsFlagValue,
 } from "./util";

 // eslint-disable-next-line import/no-commonjs
@@ -204,9 +206,20 @@ async function run() {
      );
    }

-    // Setup CODEQL_RAM flag (todo improve this https://github.com/github/dsp-code-scanning/issues/935)
-    const codeqlRam = process.env["CODEQL_RAM"] || "6500";
-    core.exportVariable("CODEQL_RAM", codeqlRam);
+    // Limit RAM and threads for extractors. When running extractors, the CodeQL CLI obeys the
+    // CODEQL_RAM and CODEQL_THREADS environment variables to decide how much RAM and how many
+    // threads it would ask extractors to use. See help text for the "--ram" and "--threads"
+    // options at https://codeql.github.com/docs/codeql-cli/manual/database-trace-command/
+    // for details.
+    core.exportVariable(
+      "CODEQL_RAM",
+      process.env["CODEQL_RAM"] ||
+        getMemoryFlagValue(getOptionalInput("ram")).toString()
+    );
+    core.exportVariable(
+      "CODEQL_THREADS",
+      getThreadsFlagValue(getOptionalInput("threads"), logger).toString()
+    );

    const sourceRoot = path.resolve(
      getRequiredEnvParam("GITHUB_WORKSPACE"),
--- a/src/runner.ts
+++ b/src/runner.ts
@@ -25,6 +25,8 @@ import {
  Mode,
  codeQlVersionAbove,
  enrichEnvironment,
+  getMemoryFlagValue,
+  getThreadsFlagValue,
 } from "./util";

 // eslint-disable-next-line import/no-commonjs
@@ -53,11 +55,15 @@ function getToolsDir(userInput: string | undefined): string {

 const codeqlEnvJsonFilename = "codeql-env.json";

+function loadTracerEnvironment(config: Config): { [name: string]: string } {
+  const jsonEnvFile = path.join(config.tempDir, codeqlEnvJsonFilename);
+  return JSON.parse(fs.readFileSync(jsonEnvFile).toString("utf-8"));
+}
+
 // Imports the environment from codeqlEnvJsonFilename if not already present
 function importTracerEnvironment(config: Config) {
  if (!("ODASA_TRACER_CONFIGURATION" in process.env)) {
-    const jsonEnvFile = path.join(config.tempDir, codeqlEnvJsonFilename);
-    const env = JSON.parse(fs.readFileSync(jsonEnvFile).toString("utf-8"));
+    const env = loadTracerEnvironment(config);
    for (const key of Object.keys(env)) {
      process.env[key] = env[key];
    }
@@ -109,6 +115,8 @@ interface InitArgs {
  githubAuth: string;
  githubAuthStdin: boolean;
  debug: boolean;
+  ram: string | undefined;
+  threads: string | undefined;
 }

 program
@@ -167,6 +175,18 @@ program
    "--trace-process-level <number>",
    "(Advanced, windows-only) Inject a windows tracer of this process into a parent process <number> levels up."
  )
+  .option(
+    "--ram <number>",
+    "The amount of memory in MB that can be used by CodeQL extractors. " +
+      "By default, CodeQL extractors will use most of the memory available in the system. " +
+      'This input also sets the amount of memory that can later be used by the "analyze" command.'
+  )
+  .option(
+    "--threads <number>",
+    "The number of threads that can be used by CodeQL extractors. " +
+      "By default, CodeQL extractors will use all the hardware threads available in the system. " +
+      'This input also sets the number of threads that can later be used by the "analyze" command.'
+  )
  .action(async (cmd: InitArgs) => {
    const logger = getRunnerLogger(cmd.debug);

@@ -195,6 +215,17 @@ program
      const gitHubVersion = await getGitHubVersion(apiDetails);
      checkGitHubVersionInRange(gitHubVersion, logger, Mode.runner);

+      // Limit RAM and threads for extractors. When running extractors, the CodeQL CLI obeys the
+      // CODEQL_RAM and CODEQL_THREADS environment variables to decide how much RAM and how many
+      // threads it would ask extractors to use. See help text for the "--ram" and "--threads"
+      // options at https://codeql.github.com/docs/codeql-cli/manual/database-trace-command/
+      // for details.
+      process.env["CODEQL_RAM"] = getMemoryFlagValue(cmd.ram).toString();
+      process.env["CODEQL_THREADS"] = getThreadsFlagValue(
+        cmd.threads,
+        logger
+      ).toString();
+
      let codeql: CodeQL;
      if (cmd.codeqlPath !== undefined) {
        codeql = await getCodeQL(cmd.codeqlPath);
@@ -402,7 +433,10 @@ program
  )
  .option(
    "--ram <ram>",
-    "Amount of memory to use when running queries. Default is to use all available memory."
+    "The amount of memory in MB that can be used by CodeQL for database finalization and query execution. " +
+      'By default, this command will use the same amount of memory as previously set in the "init" command. ' +
+      'If the "init" command also does not have an explicit "ram" flag, this command will use most of the ' +
+      "memory available in the system."
  )
  .option(
    "--no-add-snippets",
@@ -410,8 +444,10 @@ program
  )
  .option(
    "--threads <threads>",
-    "Number of threads to use when running queries. " +
-      "Default is to use all available cores."
+    "The number of threads that can be used by CodeQL for database finalization and query execution. " +
+      'By default, this command will use the same number of threads as previously set in the "init" command. ' +
+      'If the "init" command also does not have an explicit "threads" flag, this command will use all the ' +
+      "hardware threads available in the system."
  )
  .option(
    "--temp-dir <dir>",
@@ -447,8 +483,17 @@ program

      const outputDir =
        cmd.outputDir || path.join(config.tempDir, "codeql-sarif");
-      const threads = getThreadsFlag(cmd.threads, logger);
-      const memory = getMemoryFlag(cmd.ram);
+      let initEnv: { [name: string]: string } = {};
+      try {
+        initEnv = loadTracerEnvironment(config);
+      } catch (err) {
+        // The init command did not generate a tracer environment file
+      }
+      const threads = getThreadsFlag(
+        cmd.threads || initEnv["CODEQL_THREADS"],
+        logger
+      );
+      const memory = getMemoryFlag(cmd.ram || initEnv["CODEQL_RAM"]);
      await runFinalize(outputDir, threads, memory, config, logger);
      await runQueries(
        outputDir,
--- a/src/util.ts
+++ b/src/util.ts
@@ -85,13 +85,13 @@ function getSystemReservedMemoryMegaBytes(): number {
 }

 /**
- * Get the codeql `--ram` flag as configured by the `ram` input. If no value was
- * specified, the total available memory will be used minus a threshold
- * reserved for the OS.
+ * Get the value of the codeql `--ram` flag as configured by the `ram` input.
+ * If no value was specified, the total available memory will be used minus a
+ * threshold reserved for the OS.
 *
- * @returns string
+ * @returns {number} the amount of RAM to use, in megabytes
 */
-export function getMemoryFlag(userInput: string | undefined): string {
+export function getMemoryFlagValue(userInput: string | undefined): number {
  let memoryToUseMegaBytes: number;
  if (userInput) {
    memoryToUseMegaBytes = Number(userInput);
@@ -104,7 +104,18 @@ export function getMemoryFlag(userInput: string | undefined): string {
    const reservedMemoryMegaBytes = getSystemReservedMemoryMegaBytes();
    memoryToUseMegaBytes = totalMemoryMegaBytes - reservedMemoryMegaBytes;
  }
-  return `--ram=${Math.floor(memoryToUseMegaBytes)}`;
+  return Math.floor(memoryToUseMegaBytes);
+}
+
+/**
+ * Get the codeql `--ram` flag as configured by the `ram` input. If no value was
+ * specified, the total available memory will be used minus a threshold
+ * reserved for the OS.
+ *
+ * @returns string
+ */
+export function getMemoryFlag(userInput: string | undefined): string {
+  return `--ram=${getMemoryFlagValue(userInput)}`;
 }

 /**
@@ -123,17 +134,17 @@ export function getAddSnippetsFlag(
 }

 /**
- * Get the codeql `--threads` value specified for the `threads` input.
- * If no value was specified, all available threads will be used.
+ * Get the value of the codeql `--threads` flag specified for the `threads`
+ * input. If no value was specified, all available threads will be used.
 *
 * The value will be capped to the number of available CPUs.
 *
- * @returns string
+ * @returns {number}
 */
-export function getThreadsFlag(
+export function getThreadsFlagValue(
  userInput: string | undefined,
  logger: Logger
-): string {
+): number {
  let numThreads: number;
  const maxThreads = os.cpus().length;
  if (userInput) {
@@ -158,7 +169,22 @@ export function getThreadsFlag(
    // Default to using all threads
    numThreads = maxThreads;
  }
-  return `--threads=${numThreads}`;
+  return numThreads;
+}
+
+/**
+ * Get the codeql `--threads` flag specified for the `threads` input.
+ * If no value was specified, all available threads will be used.
+ *
+ * The value will be capped to the number of available CPUs.
+ *
+ * @returns string
+ */
+export function getThreadsFlag(
+  userInput: string | undefined,
+  logger: Logger
+): string {
+  return `--threads=${getThreadsFlagValue(userInput, logger)}`;
 }

 /**