diff --git a/CHANGELOG.md b/CHANGELOG.md index a0935f632..33b6e962b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## [Unreleased] + +### Added +- **`socket manifest bazel [beta]`** — Generate Bazel JVM SBOM manifests by running `bazel query` against discovered Maven repos in a Bazel workspace. Closes the inline-Maven-declaration gap that lockfile-only parsing misses for repos like envoy, ray, tensorflow, tink-java, and or-tools. Auto-detects Bzlmod and legacy `WORKSPACE`. +- **`socket scan create --auto-manifest`** now covers Bazel workspaces in addition to Gradle/Scala/Kotlin/Conda. Repos with `MODULE.bazel`, `WORKSPACE`, or `WORKSPACE.bazel` are detected automatically and their Maven dependencies extracted as part of the standard scan-create flow. + ## [1.1.93](https://github.com/SocketDev/socket-cli/releases/tag/v1.1.93) - 2026-05-08 ### Changed diff --git a/eslint.config.js b/eslint.config.js index 4c0dbed84..657a4ec20 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -220,7 +220,11 @@ module.exports = [ 'src/*/*.test.mts', // Allow paths like src/commands/optimize/*.test.mts. 'src/*/*/*.test.mts', + // Allow paths like src/commands/manifest/bazel/*.test.mts. + 'src/*/*/*/*.test.mts', 'test/*.mts', + // Allow loose one-off scripts. + 'scripts/*.mts', ], defaultProject: 'tsconfig.json', tsconfigRootDir: rootPath, diff --git a/src/commands/fix/coana-fix.mts b/src/commands/fix/coana-fix.mts index 0cb5c4299..7f9f2fdc8 100644 --- a/src/commands/fix/coana-fix.mts +++ b/src/commands/fix/coana-fix.mts @@ -27,6 +27,7 @@ import { GQL_PR_STATE_OPEN, } from '../../constants.mts' import { handleApiCall } from '../../utils/api.mts' +import { findSocketYmlSync } from '../../utils/config.mts' import { spawnCoanaDlx } from '../../utils/dlx.mts' import { getErrorCause } from '../../utils/errors.mts' import { @@ -44,7 +45,6 @@ import { fetchGhsaDetails, setGitRemoteGithubRepoUrl, } from '../../utils/github.mts' -import { findSocketYmlSync } from '../../utils/config.mts' import { getPackageFilesForScan } from '../../utils/path-resolve.mts' import { setupSdk } from '../../utils/sdk.mts' import { fetchSupportedScanFileNames } from '../scan/fetch-supported-scan-file-names.mts' diff --git a/src/commands/manifest/README.md b/src/commands/manifest/README.md index 5243f9e91..f9874c9f4 100644 --- a/src/commands/manifest/README.md +++ b/src/commands/manifest/README.md @@ -1,6 +1,98 @@ # Manifest -(At the time of writing...) +`socket manifest ` generates declarative dependency manifests +(`pom.xml`, `requirements.txt`, etc.) for ecosystems whose canonical build +system does not ship one out of the box. The resulting files are consumed by +`socket scan create`'s server-side per-ecosystem parsers. + +## Subcommands + +Sections are sorted alphabetically by subcommand name. + +## socket manifest auto + +Auto-detect the build system in the target directory and run the matching +manifest generator. Useful when you do not want to spell out the language. + +## socket manifest bazel [beta] + +Generates Bazel JVM SBOM manifests (`maven_install.json`-shaped) by running +`bazel query` against discovered Maven repos in a Bazel workspace. Output is +consumed by `socket scan create` and closes the +inline-Maven-declaration gap that lockfile-only parsing misses. + +> **Note**: This command generates Maven dependency manifests for Bazel JVM +> workspaces. It does not run reachability analysis. + +### Usage + +```bash +socket manifest bazel [options] [DIR=.] +``` + +### Options + +- `--bazel ` — path to bazel/bazelisk binary; default `$(which bazelisk) || $(which bazel)`. +- `--bazel-rc ` — path to additional `.bazelrc` fragments forwarded to bazel. +- `--bazel-flags ` — flags forwarded to every bazel invocation (single quoted string). +- `--bazel-output-base ` — Bazel `--output_base` for read-only-cache CI environments. +- `--out ` — output directory; default `./.socket/bazel-manifests/`. +- `--dry-run`, `--verbose` — standard diagnostic flags. + +> **Upload**: This subcommand only generates manifests. To generate and +> upload in one step, use `socket scan create --auto-manifest .` — it +> detects the workspace, runs the same extraction this subcommand performs, +> and uploads the result. + +### Examples + +```bash +# Generate maven manifests from the current Bazel workspace. +socket manifest bazel . + +# Use bazelisk explicitly. +socket manifest bazel --bazel=/usr/local/bin/bazelisk . +``` + +### Requirements + +- `bazel` or `bazelisk` on `PATH` (or pass `--bazel `). +- Network access on cold cache. Bazel and `rules_jvm_external` own their own + retry policy for transient Maven resolution failures — `socket manifest bazel` + does not retry on top of them. +- Writable Bazel output base; pass `--bazel-output-base` for read-only-cache CI. + +This is the user-visible entry point for Bazel JVM SBOM support; the [beta] label and "Bazel JVM SBOM support" wording must stay consistent across release notes and docs. + +## socket manifest cdxgen + +Wraps the upstream `cdxgen` CycloneDX BOM generator for repos that already +have a working cdxgen configuration. + +## socket manifest conda [beta] + +Converts a Conda `environment.yml` file to a Python `requirements.txt` so the +Socket scan pipeline can consume the resulting manifest. + +## socket manifest gradle [beta] + +Uses Gradle (via the project's `gradlew`) to emit a `pom.xml` per subproject, +then feeds those files into the Socket scan pipeline. Mirrors the kotlin and +scala flows. + +## socket manifest kotlin [beta] + +Uses Gradle to generate a manifest file (`pom.xml`) for a Kotlin project; the +underlying flow is identical to the gradle subcommand. + +## socket manifest scala [beta] + +Generates a manifest file (`pom.xml`) from Scala's `build.sbt` file. + +## socket manifest setup + +Starts an interactive configurator that writes default flag values for +`socket manifest` into a `socket.json` in the current directory. ## Dev @@ -16,8 +108,8 @@ npm run bs manifest yolo -- --cwd ~/socket/repos/kotlin/kotlinx.coroutines And upload with this: ``` -npm exec socket scan create -- --repo=depscantmp --branch=mastertmp --tmp --cwd ~/socket/repos/scala/akka socketdev . -npm exec socket scan create -- --repo=depscantmp --branch=mastertmp --tmp --cwd ~/socket/repos/kotlin/kotlinx.coroutines . +npm exec socket scan create -- --repo=example-repo --branch=example-branch --tmp --cwd ~/repos/scala/akka example-org . +npm exec socket scan create -- --repo=example-repo --branch=example-branch --tmp --cwd ~/repos/kotlin/kotlinx.coroutines . ``` (The `cwd` option for `create` is necessary because we can't go to the dir and run `npm exec`). @@ -31,5 +123,5 @@ socket manifest scala . socket manifest kotlin . socket manifest yolo -socket scan create --repo=depscantmp --branch=mastertmp --tmp socketdev . +socket scan create --repo=example-repo --branch=example-branch --tmp example-org . ``` diff --git a/src/commands/manifest/bazel/bazel-bin-detect.mts b/src/commands/manifest/bazel/bazel-bin-detect.mts new file mode 100644 index 000000000..55f79e1be --- /dev/null +++ b/src/commands/manifest/bazel/bazel-bin-detect.mts @@ -0,0 +1,41 @@ +import { existsSync } from 'node:fs' + +import { whichBin } from '@socketsecurity/registry/lib/bin' + +import { InputError } from '../../../utils/errors.mts' + +/** + * Resolve the bazel binary to invoke for `socket manifest bazel`. + * + * Resolution order: + * 1. If `explicit` is provided, return it iff it exists on disk; else throw. + * 2. Look up `bazelisk` on PATH (preferred — respects `.bazelversion`). + * 3. Fall back to `bazel` on PATH. + * 4. If neither is found, throw InputError with install instructions. + */ +export async function resolveBazelBinary( + explicit: string | undefined, +): Promise { + if (explicit) { + if (!existsSync(explicit)) { + throw new InputError( + `--bazel path does not exist: ${explicit}. Install bazelisk or bazel, or pass an existing path via --bazel.`, + ) + } + return explicit + } + // Prefer bazelisk: respects .bazelversion in the workspace. + const bazelisk = await whichBin('bazelisk', { nothrow: true }) + if (bazelisk) { + return bazelisk + } + const bazel = await whichBin('bazel', { nothrow: true }) + if (bazel) { + return bazel + } + throw new InputError( + 'Could not find bazelisk or bazel on PATH. ' + + 'Install bazelisk (recommended; https://github.com/bazelbuild/bazelisk) ' + + 'or bazel, or pass --bazel .', + ) +} diff --git a/src/commands/manifest/bazel/bazel-bin-detect.test.mts b/src/commands/manifest/bazel/bazel-bin-detect.test.mts new file mode 100644 index 000000000..42edec93c --- /dev/null +++ b/src/commands/manifest/bazel/bazel-bin-detect.test.mts @@ -0,0 +1,53 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +// Mock whichBin so tests run with no bazel on PATH. +vi.mock('@socketsecurity/registry/lib/bin', () => ({ + whichBin: vi.fn(), +})) + +import { whichBin } from '@socketsecurity/registry/lib/bin' + +import { resolveBazelBinary } from './bazel-bin-detect.mts' + +describe('resolveBazelBinary', () => { + const mocked = vi.mocked(whichBin) + + beforeEach(() => { + mocked.mockReset() + }) + + it('returns explicit path when it exists', async () => { + // Use a path that definitely exists on every dev machine. + const existing = process.execPath + await expect(resolveBazelBinary(existing)).resolves.toBe(existing) + }) + + it('throws InputError when explicit path does not exist', async () => { + await expect( + resolveBazelBinary('/no/such/bazel/binary/xyz'), + ).rejects.toThrow(/--bazel path does not exist/) + }) + + it('returns bazelisk when on PATH', async () => { + mocked.mockResolvedValueOnce('/usr/local/bin/bazelisk') + await expect(resolveBazelBinary(undefined)).resolves.toBe( + '/usr/local/bin/bazelisk', + ) + }) + + it('falls back to bazel when bazelisk is missing', async () => { + mocked + .mockResolvedValueOnce(null) + .mockResolvedValueOnce('/usr/local/bin/bazel') + await expect(resolveBazelBinary(undefined)).resolves.toBe( + '/usr/local/bin/bazel', + ) + }) + + it('throws InputError when neither is on PATH', async () => { + mocked.mockResolvedValue(null) + await expect(resolveBazelBinary(undefined)).rejects.toThrow( + /Could not find bazelisk or bazel/, + ) + }) +}) diff --git a/src/commands/manifest/bazel/bazel-build-parser.mts b/src/commands/manifest/bazel/bazel-build-parser.mts new file mode 100644 index 000000000..af30345b7 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-build-parser.mts @@ -0,0 +1,232 @@ +/** + * Parse `bazel query --output=build` text and `unsorted_deps.json` files + * (rules_jvm_external) into a uniform `ExtractedArtifact` shape consumed by + * the converter. + * + * Security gate: every regex uses bounded character classes to prevent + * catastrophic backtracking on hostile bazel-query output. Rules without + * `maven_coordinates=` are skipped. Caller is responsible for size-capping + * the input string. + */ + +export type ExtractedArtifact = { + ruleKind: 'jvm_import' | 'aar_import' + ruleName: string + mavenCoordinates: string + sourceRepo?: string | undefined + mavenUrl?: string | undefined + mavenSha256?: string | undefined + deps: string[] +} + +// Per-rule block matcher: matches `(...)` where kind is jvm_import or +// aar_import, bounded by `^)` (closing paren on its own line) — Bazel +// `--output=build` output convention. Body length capped at 8 KiB; real +// rules are ~500 bytes, so the cap is 16x normal. Prevents pathological +// backtracking on hostile input. +const RULE_RE = /^(jvm_import|aar_import)\(([\s\S]{0,8192}?)^\)/gm + +// Cache for per-attribute regexes — avoids recompiling the same pattern on +// every rule block. Keyed by attr name; all attr names are safe alphanumeric +// identifiers so no escaping is needed beyond the bounded character class. +const ATTR_RE_CACHE = new Map() + +// Cache for per-tag-key regexes used by extractTagValue. +const TAG_RE_CACHE = new Map() + +function extractAttr(body: string, attr: string): string | undefined { + // Match ` = "VALUE"` — quoted-string attrs only. + // Quoted value capped at 4 KiB; canonical Maven URLs are ~150 bytes. + let re = ATTR_RE_CACHE.get(attr) + if (!re) { + re = new RegExp(`\\b${attr}\\s*=\\s*"([^"\\n]{0,4096})"`) + ATTR_RE_CACHE.set(attr, re) + } + const m = re.exec(body) + return m?.[1] +} + +// Extracts a `key=value` pair from inside a Bazel `tags = [...]` attribute +// (rules_jvm_external encodes maven_sha256, maven_coordinates etc. this way). +// Pattern: `"maven_sha256="` inside the tags list. +// Returns undefined when the tag is absent or malformed. +function extractTagValue(body: string, tagKey: string): string | undefined { + // Match the full tags = [...] block (bounded at 8 KiB). + const tagsM = /\btags\s*=\s*\[([\s\S]{0,8192}?)\]/m.exec(body) + if (!tagsM) { + return undefined + } + const tagsBlob = tagsM[1] as string + // Within the blob, look for "=" inside a quoted string. + // Bounded at 512 bytes per tag entry (sha256 hex is 64 chars; URLs ~150). + let tagRe = TAG_RE_CACHE.get(tagKey) + if (!tagRe) { + tagRe = new RegExp(`"${tagKey}=([^"\\n]{0,512})"`) + TAG_RE_CACHE.set(tagKey, tagRe) + } + const m = tagRe.exec(tagsBlob) + return m?.[1] +} + +function extractDeps(body: string): string[] { + // Match `deps = ["a", "b", ...]`. Body length capped at 16 KiB; real + // dep lists are <2 KiB. + const m = /\bdeps\s*=\s*\[([\s\S]{0,16384}?)\]/m.exec(body) + if (!m) { + return [] + } + const out: string[] = [] + // Per-label cap at 512 bytes; real Bazel labels are <100 bytes. + for (const q of (m[1] as string).matchAll(/"([^"\n]{0,512})"/g)) { + out.push(q[1] as string) + } + return out +} + +/** + * Parse `bazel query --output=build` stdout into `ExtractedArtifact[]`. + * Skips rules without a `maven_coordinates` attribute (those aren't + * rules_jvm_external lockfile rules). + */ +export function parseBazelBuildOutput(text: string): ExtractedArtifact[] { + const results: ExtractedArtifact[] = [] + for (const m of text.matchAll(RULE_RE)) { + const ruleKind = m[1] as 'jvm_import' | 'aar_import' + const body = m[2] as string + const ruleName = extractAttr(body, 'name') + // maven_coordinates can be: + // (a) a top-level rule attribute: `maven_coordinates = "g:a:v"` (newer rje) + // (b) inside tags = [...]: `"maven_coordinates=g:a:v"` (older rje, e.g. ray) + const coords = + extractAttr(body, 'maven_coordinates') ?? + extractTagValue(body, 'maven_coordinates') + if (!ruleName || !coords) { + continue + } + // maven_sha256 is encoded inside tags = [...] as "maven_sha256=" by + // rules_jvm_external; try tags first, fall back to standalone attr for + // older rule shapes that may declare it as a top-level attribute. + const mavenSha256 = + extractTagValue(body, 'maven_sha256') ?? extractAttr(body, 'maven_sha256') + results.push({ + ruleKind, + ruleName, + mavenCoordinates: coords, + mavenUrl: extractAttr(body, 'maven_url'), + mavenSha256, + deps: extractDeps(body), + }) + } + return results +} + +type LegacyUnsortedDepsArtifact = { + coordinates?: string + url?: string + sha256?: string + deps?: unknown +} + +type V2LockArtifact = { + shasums?: Record + version?: string +} + +function ruleNameFromCoordinate(c: string): string { + return c.replace(/[^A-Za-z0-9]/g, '_') +} + +/** + * Parse supported `external//unsorted_deps.json` shapes emitted by + * rules_jvm_external. Older files use an artifact array with full coordinates; + * newer v2 lock-file-shaped files use artifact/dependency maps keyed by + * `group:artifact`. Caller MUST size-cap the input because JSON.parse is + * unbounded by default. + */ +export function parseUnsortedDepsJson(json: string): ExtractedArtifact[] { + let parsed: unknown + try { + parsed = JSON.parse(json) + } catch { + return [] + } + + const maybe = parsed as { + artifacts?: LegacyUnsortedDepsArtifact[] | Record + dependencies?: Record + } + + if (Array.isArray(maybe.artifacts)) { + const out: ExtractedArtifact[] = [] + for (const a of maybe.artifacts) { + if (typeof a?.coordinates !== 'string') { + continue + } + const deps: string[] = [] + if (Array.isArray(a.deps)) { + for (const d of a.deps) { + if (typeof d === 'string') { + deps.push(d) + } + } + } + out.push({ + ruleKind: 'jvm_import', + ruleName: ruleNameFromCoordinate(a.coordinates), + mavenCoordinates: a.coordinates, + mavenUrl: typeof a.url === 'string' ? a.url : undefined, + mavenSha256: typeof a.sha256 === 'string' ? a.sha256 : undefined, + deps, + }) + } + return out + } + + if (!maybe.artifacts || typeof maybe.artifacts !== 'object') { + return [] + } + + const dependencies = maybe.dependencies ?? {} + const out: ExtractedArtifact[] = [] + for (const [groupArtifact, artifact] of Object.entries(maybe.artifacts)) { + if (!artifact || typeof artifact.version !== 'string') { + continue + } + const shasums = artifact.shasums ?? {} + const jarSha = shasums['jar'] + if (typeof jarSha === 'string' || Object.keys(shasums).length === 0) { + out.push( + v2Artifact(groupArtifact, artifact.version, jarSha, dependencies), + ) + } + for (const [classifier, sha256] of Object.entries(shasums)) { + if (classifier === 'jar' || typeof sha256 !== 'string') { + continue + } + const classifierKey = `${groupArtifact}:jar:${classifier}` + out.push( + v2Artifact(classifierKey, artifact.version, sha256, dependencies), + ) + } + } + return out +} + +function v2Artifact( + artifactKey: string, + version: string, + sha256: string | undefined, + dependencies: Record, +): ExtractedArtifact { + return { + ruleKind: 'jvm_import', + ruleName: ruleNameFromCoordinate(artifactKey), + mavenCoordinates: `${artifactKey}:${version}`, + mavenSha256: sha256, + deps: Array.isArray(dependencies[artifactKey]) + ? dependencies[artifactKey].filter( + (d): d is string => typeof d === 'string', + ) + : [], + } +} diff --git a/src/commands/manifest/bazel/bazel-build-parser.test.mts b/src/commands/manifest/bazel/bazel-build-parser.test.mts new file mode 100644 index 000000000..045d9a544 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-build-parser.test.mts @@ -0,0 +1,181 @@ +import { readFileSync } from 'node:fs' +import path from 'node:path' +import { fileURLToPath } from 'node:url' + +import { describe, expect, it } from 'vitest' + +import { + parseBazelBuildOutput, + parseUnsortedDepsJson, +} from './bazel-build-parser.mts' + +// Resolve fixtures relative to this test file. `.mts` ESM has no __dirname. +const HERE = path.dirname(fileURLToPath(import.meta.url)) +const FIXTURES = path.join( + HERE, + '..', + '..', + '..', + '..', + 'test', + 'fixtures', + 'manifest-bazel', + 'query-output', +) + +describe('parseBazelBuildOutput', () => { + it('extracts jvm_import rules with maven_coordinates', () => { + const text = readFileSync( + path.join(FIXTURES, 'jvm-import-sample.txt'), + 'utf8', + ) + const result = parseBazelBuildOutput(text) + expect(result.length).toBe(2) + expect(result[0]).toMatchObject({ + ruleKind: 'jvm_import', + ruleName: 'com_google_guava_guava', + mavenCoordinates: 'com.google.guava:guava:33.0.0-jre', + }) + expect(result[0]?.mavenSha256).toMatch(/^9408c2c4/) + expect(result[0]?.mavenUrl).toContain('repo1.maven.org') + expect(result[0]?.deps).toEqual([ + '@maven//:com_google_guava_failureaccess', + '@maven//:org_checkerframework_checker_qual', + ]) + }) + + it('skips rules without maven_coordinates', () => { + const text = readFileSync( + path.join(FIXTURES, 'jvm-import-sample.txt'), + 'utf8', + ) + const result = parseBazelBuildOutput(text) + expect(result.find(r => r.ruleName === 'no_coords_rule')).toBeUndefined() + }) + + it('extracts aar_import rules', () => { + const text = readFileSync( + path.join(FIXTURES, 'aar-import-sample.txt'), + 'utf8', + ) + const result = parseBazelBuildOutput(text) + expect(result.length).toBe(1) + expect(result[0]?.ruleKind).toBe('aar_import') + expect(result[0]?.mavenCoordinates).toBe( + 'androidx.annotation:annotation:1.7.0', + ) + }) + + it('returns empty array on empty input', () => { + expect(parseBazelBuildOutput('')).toEqual([]) + }) + + it('does not throw on truncated rule body', () => { + const truncated = + 'jvm_import(\n name = "x",\n maven_coordinates = "g:a:v",\n' + expect(() => parseBazelBuildOutput(truncated)).not.toThrow() + }) +}) + +describe('parseUnsortedDepsJson', () => { + it('extracts artifacts in ExtractedArtifact shape', () => { + const json = readFileSync( + path.join(FIXTURES, 'unsorted-deps-sample.json'), + 'utf8', + ) + const result = parseUnsortedDepsJson(json) + expect(result.length).toBe(2) + expect(result[0]?.mavenCoordinates).toBe( + 'com.google.guava:guava:33.0.0-jre', + ) + expect(result[0]?.deps).toEqual([ + 'com.google.guava:failureaccess:1.0.2', + 'org.checkerframework:checker-qual:3.41.0', + ]) + }) + + it('extracts v2 lock-file map artifacts', () => { + const result = parseUnsortedDepsJson( + JSON.stringify({ + artifacts: { + 'com.google.guava:guava': { + shasums: { jar: 'abc123' }, + version: '33.0.0-jre', + }, + 'com.google.guava:failureaccess': { + shasums: {}, + version: '1.0.2', + }, + }, + dependencies: { + 'com.google.guava:guava': ['com.google.guava:failureaccess'], + }, + }), + ) + + expect(result).toEqual([ + { + ruleKind: 'jvm_import', + ruleName: 'com_google_guava_guava', + mavenCoordinates: 'com.google.guava:guava:33.0.0-jre', + mavenSha256: 'abc123', + deps: ['com.google.guava:failureaccess'], + }, + { + ruleKind: 'jvm_import', + ruleName: 'com_google_guava_failureaccess', + mavenCoordinates: 'com.google.guava:failureaccess:1.0.2', + mavenSha256: undefined, + deps: [], + }, + ]) + }) + + it('expands v2 lock-file classifier shasums into classifier artifacts', () => { + const result = parseUnsortedDepsJson( + JSON.stringify({ + artifacts: { + 'io.netty:netty-transport-native-epoll': { + shasums: { + 'linux-aarch_64': 'linux-aarch-sha', + 'linux-x86_64': 'linux-x86-sha', + }, + version: '4.1.115.Final', + }, + }, + dependencies: { + 'io.netty:netty-transport-native-epoll:jar:linux-x86_64': [ + 'io.netty:netty-buffer', + ], + }, + }), + ) + + expect(result).toEqual([ + { + ruleKind: 'jvm_import', + ruleName: 'io_netty_netty_transport_native_epoll_jar_linux_aarch_64', + mavenCoordinates: + 'io.netty:netty-transport-native-epoll:jar:linux-aarch_64:4.1.115.Final', + mavenSha256: 'linux-aarch-sha', + deps: [], + }, + { + ruleKind: 'jvm_import', + ruleName: 'io_netty_netty_transport_native_epoll_jar_linux_x86_64', + mavenCoordinates: + 'io.netty:netty-transport-native-epoll:jar:linux-x86_64:4.1.115.Final', + mavenSha256: 'linux-x86-sha', + deps: ['io.netty:netty-buffer'], + }, + ]) + }) + + it('returns empty array on invalid JSON', () => { + expect(parseUnsortedDepsJson('not json')).toEqual([]) + }) + + it('returns empty array when artifacts field missing', () => { + expect(parseUnsortedDepsJson('{}')).toEqual([]) + }) +}) diff --git a/src/commands/manifest/bazel/bazel-java-shim.mts b/src/commands/manifest/bazel/bazel-java-shim.mts new file mode 100644 index 000000000..a83a9c390 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-java-shim.mts @@ -0,0 +1,30 @@ +import { execSync } from 'node:child_process' + +let probed = false + +// Verifies `java` is functional in the current execution environment. Bazel +// JVM manifest extraction (rules_jvm_external → Coursier) requires a real +// JDK; the CLI does not attempt to discover Homebrew installs or mutate the +// caller's PATH/JAVA_HOME. If `java -version` fails we throw with an +// actionable message so the surfaced error names the prerequisite directly +// instead of relying on Bazel's downstream diagnostic. +export function ensureJavaOnPath(): void { + if (probed) { + return + } + try { + execSync('java -version', { stdio: 'ignore' }) + probed = true + } catch { + throw new Error( + 'Java is required for Bazel JVM manifest extraction ' + + '(rules_jvm_external invokes Coursier, which needs a JDK). ' + + 'Install a JDK (e.g. Temurin or OpenJDK) and ensure `java` is on PATH.', + ) + } +} + +// Test-only: clear the per-process cache so tests can re-mock execSync. +export function _resetJavaShimCacheForTests(): void { + probed = false +} diff --git a/src/commands/manifest/bazel/bazel-java-shim.test.mts b/src/commands/manifest/bazel/bazel-java-shim.test.mts new file mode 100644 index 000000000..d861b8ea1 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-java-shim.test.mts @@ -0,0 +1,72 @@ +import { execSync } from 'node:child_process' + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +import { + _resetJavaShimCacheForTests, + ensureJavaOnPath, +} from './bazel-java-shim.mts' + +vi.mock('node:child_process', async () => { + const actual = + await vi.importActual( + 'node:child_process', + ) + return { ...actual, execSync: vi.fn() } +}) + +describe('ensureJavaOnPath', () => { + const mockedExec = vi.mocked(execSync) + + let originalJavaHome: string | undefined + let originalPath: string | undefined + + beforeEach(() => { + mockedExec.mockReset() + _resetJavaShimCacheForTests() + originalJavaHome = process.env['JAVA_HOME'] + originalPath = process.env['PATH'] + }) + + afterEach(() => { + if (originalJavaHome === undefined) { + delete process.env['JAVA_HOME'] + } else { + process.env['JAVA_HOME'] = originalJavaHome + } + process.env['PATH'] = originalPath + }) + + it('returns silently and leaves the environment untouched when java is on PATH', () => { + mockedExec.mockReturnValue(Buffer.from('')) + expect(() => ensureJavaOnPath()).not.toThrow() + expect(process.env['JAVA_HOME']).toBe(originalJavaHome) + expect(process.env['PATH']).toBe(originalPath) + }) + + it('throws an actionable error when java is missing', () => { + mockedExec.mockImplementation(() => { + throw new Error('java not found') + }) + expect(() => ensureJavaOnPath()).toThrow(/Java is required/) + // No env mutation on failure. + expect(process.env['JAVA_HOME']).toBe(originalJavaHome) + expect(process.env['PATH']).toBe(originalPath) + }) + + it('is idempotent on success: subsequent calls do not re-probe', () => { + mockedExec.mockReturnValue(Buffer.from('')) + ensureJavaOnPath() + ensureJavaOnPath() + expect(mockedExec).toHaveBeenCalledTimes(1) + }) + + it('re-throws on every call when java remains missing', () => { + mockedExec.mockImplementation(() => { + throw new Error('java not found') + }) + expect(() => ensureJavaOnPath()).toThrow(/Java is required/) + expect(() => ensureJavaOnPath()).toThrow(/Java is required/) + expect(mockedExec).toHaveBeenCalledTimes(2) + }) +}) diff --git a/src/commands/manifest/bazel/bazel-output-base-check.mts b/src/commands/manifest/bazel/bazel-output-base-check.mts new file mode 100644 index 000000000..3f616a53f --- /dev/null +++ b/src/commands/manifest/bazel/bazel-output-base-check.mts @@ -0,0 +1,46 @@ +import { + accessSync, + existsSync, + constants as fsConstants, + mkdirSync, +} from 'node:fs' +import path from 'node:path' + +import { InputError } from '../../../utils/errors.mts' + +// Validates that --bazel-output-base is a path we can use as Bazel's output_base. +// Throws InputError if: +// - the input contains `..` segments (path traversal guard) +// - the existing path is not writable +// - the path cannot be created (parent not writable) +export function validateOutputBase(outputBase: string, cwd: string): void { + // Path traversal guard: reject any literal `..` segment in user input. + // After path.resolve these are normalised away, so we check the raw input. + // Split on both separators. On Windows `path.sep === '\\'`, so + // input like `foo/../etc` would not contain a `..` segment under the + // platform-specific split, bypassing the guard — yet path.resolve below + // would still normalise the `..` and a traversal target could materialise. + const segments = outputBase.split(/[\\/]/) + if (segments.includes('..')) { + throw new InputError( + `--bazel-output-base must not contain '..' segments: ${outputBase}`, + ) + } + const resolved = path.resolve(cwd, outputBase) + if (existsSync(resolved)) { + try { + accessSync(resolved, fsConstants.W_OK) + } catch { + throw new InputError(`--bazel-output-base is not writable: ${resolved}`) + } + return + } + // Path does not exist yet — try to create it so bazel can populate it. + try { + mkdirSync(resolved, { recursive: true }) + } catch (e) { + throw new InputError( + `--bazel-output-base could not be created at ${resolved}: ${(e as Error).message}`, + ) + } +} diff --git a/src/commands/manifest/bazel/bazel-output-base-check.test.mts b/src/commands/manifest/bazel/bazel-output-base-check.test.mts new file mode 100644 index 000000000..db4ca244f --- /dev/null +++ b/src/commands/manifest/bazel/bazel-output-base-check.test.mts @@ -0,0 +1,58 @@ +import { chmodSync, mkdirSync, mkdtempSync, rmSync } from 'node:fs' +import os from 'node:os' +import path from 'node:path' + +import { afterEach, beforeEach, describe, expect, it } from 'vitest' + +import { validateOutputBase } from './bazel-output-base-check.mts' + +describe('validateOutputBase', () => { + let tmp: string + + beforeEach(() => { + tmp = mkdtempSync(path.join(os.tmpdir(), 'output-base-check-')) + }) + + afterEach(() => { + // Restore permissions before cleanup in case a test made a dir read-only. + try { + chmodSync(tmp, 0o755) + } catch { + // Ignore errors. + } + rmSync(tmp, { recursive: true, force: true }) + }) + + it('accepts an existing writable directory without throwing', () => { + expect(() => validateOutputBase(tmp, '/anywhere')).not.toThrow() + }) + + it('accepts a nonexistent path under a writable parent and creates it', () => { + const child = path.join(tmp, 'new-output-base') + expect(() => validateOutputBase(child, '/anywhere')).not.toThrow() + }) + + it('throws InputError when path contains `..` segments', () => { + expect(() => validateOutputBase('../../etc', tmp)).toThrow(/'\.\.'/) + }) + + it('throws InputError when existing path is not writable', () => { + // Run only as non-root where chmod actually restricts access. + if (process.getuid?.() === 0) { + return + } + const ro = path.join(tmp, 'readonly') + mkdirSync(ro) + chmodSync(ro, 0o555) + try { + expect(() => validateOutputBase(ro, '/anywhere')).toThrow(/not writable/) + } finally { + chmodSync(ro, 0o755) + } + }) + + it('accepts an absolute path inside /tmp when it contains no `..` segments', () => { + // The tmp dir itself is a writable absolute path with no `..`. + expect(() => validateOutputBase(tmp, '/anywhere')).not.toThrow() + }) +}) diff --git a/src/commands/manifest/bazel/bazel-python-shim.mts b/src/commands/manifest/bazel/bazel-python-shim.mts new file mode 100644 index 000000000..8517fce14 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-python-shim.mts @@ -0,0 +1,89 @@ +import { existsSync, mkdirSync, symlinkSync, unlinkSync } from 'node:fs' +import os from 'node:os' +import path from 'node:path' + +import { whichBin } from '@socketsecurity/registry/lib/bin' + +import { InputError } from '../../../utils/errors.mts' + +export type PythonShimResult = { + // PATH-prefixed env to pass into spawn opts. undefined if no shim needed. + augmentedEnv: NodeJS.ProcessEnv | undefined + shimDir: string | undefined +} + +// Stable shim dir name — same process will get the same dir; concurrent +// socket-cli invocations on the same machine share it. The symlink target +// is whatever python3 resolves to NOW; if PATH changes between invocations +// we replace the symlink. +const SHIM_SUBDIR = 'socket-cli-bazel-python-shim' + +// Cache the result for the lifetime of this process. +let cached: PythonShimResult | null = null + +// Safe wrapper around whichBin that returns null instead of throwing when +// nothrow semantics are broken in older registry versions (realpath 'null' bug). +async function safeWhichBin(name: string): Promise { + try { + return (await whichBin(name, { nothrow: true })) ?? null + } catch { + return null + } +} + +export async function provisionPythonShim(): Promise { + if (cached) { + return cached + } + const pythonOnPath = await safeWhichBin('python') + if (pythonOnPath) { + cached = { augmentedEnv: undefined, shimDir: undefined } + return cached + } + const python3OnPath = await safeWhichBin('python3') + if (!python3OnPath) { + throw new InputError( + 'Neither `python` nor `python3` found on PATH. Older versions of ' + + 'rules_jvm_external require a `python` interpreter for repository ' + + 'rules. Install Python 3 and ensure it is on PATH, then retry.', + ) + } + const shimDir = path.join(os.tmpdir(), SHIM_SUBDIR) + mkdirSync(shimDir, { recursive: true }) + const linkPath = path.join(shimDir, 'python') + // Replace the symlink defensively in case python3's resolved path moved. + if (existsSync(linkPath)) { + try { + unlinkSync(linkPath) + } catch { + // Tolerate races; the next symlinkSync may still succeed. + } + } + // The shim dir is process-shared (os.tmpdir()/socket-cli-bazel-python-shim), + // so a concurrent socket-cli invocation may re-create the link between our + // unlinkSync and symlinkSync. Tolerate EEXIST when the link is back: the + // other process won the race and left a usable shim in place. + try { + symlinkSync(python3OnPath, linkPath) + } catch (e) { + if ( + (e as NodeJS.ErrnoException).code === 'EEXIST' && + existsSync(linkPath) + ) { + // Another process re-created the link; assume it points at a python3. + } else { + throw e + } + } + const augmentedEnv = { + ...process.env, + PATH: `${shimDir}${path.delimiter}${process.env['PATH'] ?? ''}`, + } + cached = { augmentedEnv, shimDir } + return cached +} + +// Test-only: clear the per-process cache so tests can re-mock whichBin. +export function _resetPythonShimCacheForTests(): void { + cached = null +} diff --git a/src/commands/manifest/bazel/bazel-python-shim.test.mts b/src/commands/manifest/bazel/bazel-python-shim.test.mts new file mode 100644 index 000000000..753aceb92 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-python-shim.test.mts @@ -0,0 +1,91 @@ +import { existsSync, readlinkSync, rmSync } from 'node:fs' +import os from 'node:os' +import path from 'node:path' + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +vi.mock('@socketsecurity/registry/lib/bin', () => ({ + whichBin: vi.fn(), +})) + +import { whichBin } from '@socketsecurity/registry/lib/bin' + +import { + _resetPythonShimCacheForTests, + provisionPythonShim, +} from './bazel-python-shim.mts' + +describe('provisionPythonShim', () => { + const mocked = vi.mocked(whichBin) + const SHIM_DIR = path.join(os.tmpdir(), 'socket-cli-bazel-python-shim') + + beforeEach(() => { + mocked.mockReset() + _resetPythonShimCacheForTests() + }) + + afterEach(() => { + // Clean up the shared shim dir between tests to avoid stale symlinks. + if (existsSync(SHIM_DIR)) { + rmSync(SHIM_DIR, { recursive: true, force: true }) + } + }) + + it('returns no-shim result when python is already on PATH', async () => { + mocked.mockResolvedValueOnce('/usr/bin/python') + const result = await provisionPythonShim() + expect(result).toEqual({ augmentedEnv: undefined, shimDir: undefined }) + }) + + it('creates python symlink shim when only python3 is on PATH', async () => { + mocked + .mockResolvedValueOnce(null) // python + .mockResolvedValueOnce(process.execPath) // python3 + const result = await provisionPythonShim() + expect(result.shimDir).toBe(SHIM_DIR) + expect(existsSync(path.join(SHIM_DIR, 'python'))).toBe(true) + expect(readlinkSync(path.join(SHIM_DIR, 'python'))).toBe(process.execPath) + const pathValue = result.augmentedEnv?.['PATH'] ?? '' + expect(pathValue.startsWith(SHIM_DIR)).toBe(true) + }) + + it('throws InputError when neither python nor python3 is found on PATH', async () => { + mocked.mockResolvedValue(null) + await expect(provisionPythonShim()).rejects.toThrow( + /Neither .python. nor .python3/, + ) + }) + + it('is idempotent: returns cached result on second call', async () => { + mocked + .mockResolvedValueOnce(null) // python + .mockResolvedValueOnce(process.execPath) // python3 + const a = await provisionPythonShim() + const b = await provisionPythonShim() + // Same object reference — cached. + expect(b).toBe(a) + // whichBin only called twice (for the first invocation). + expect(mocked).toHaveBeenCalledTimes(2) + }) + + it('uses a stable shim dir under os.tmpdir()', async () => { + mocked + .mockResolvedValueOnce(null) // python + .mockResolvedValueOnce(process.execPath) // python3 + const result = await provisionPythonShim() + expect(result.shimDir).toBe( + path.join(os.tmpdir(), 'socket-cli-bazel-python-shim'), + ) + }) + + it('symlink target is the absolute resolved python3 path', async () => { + mocked + .mockResolvedValueOnce(null) // python + .mockResolvedValueOnce(process.execPath) // python3 + await provisionPythonShim() + const linkTarget = readlinkSync(path.join(SHIM_DIR, 'python')) + // Must be an absolute path. + expect(path.isAbsolute(linkTarget)).toBe(true) + expect(linkTarget).toBe(process.execPath) + }) +}) diff --git a/src/commands/manifest/bazel/bazel-query-runner.mts b/src/commands/manifest/bazel/bazel-query-runner.mts new file mode 100644 index 000000000..64f35f884 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-query-runner.mts @@ -0,0 +1,180 @@ +import { logger } from '@socketsecurity/registry/lib/logger' +import { spawn } from '@socketsecurity/registry/lib/spawn' + +import constants from '../../../constants.mts' + +import type { RepoProbe } from './bazel-repo-discovery.mts' + +export type BazelQueryOptions = { + bin: string + cwd: string + invocationFlags: string[] + bazelRc?: string + bazelFlags?: string + bazelOutputBase?: string + env?: NodeJS.ProcessEnv + verbose?: boolean +} + +export type BazelQueryResult = { + stdout: string + stderr: string + code: number +} + +// Default per-invocation timeout for bazel queries. Bazel cold-cache starts +// can take several minutes; 10 minutes is generous while still bounding CI hangs. +const BAZEL_QUERY_TIMEOUT_MS = 600_000 + +// Splits the user-supplied --bazel-flags string on whitespace. +// Empty / undefined returns []. No shell parsing — quoted args with embedded +// whitespace are not supported (documented limitation; same trust model as +// gradleOpts). +export function splitBazelFlags(flags: string | undefined): string[] { + if (!flags) { + return [] + } + return flags.split(/\s+/).filter(Boolean) +} + +function buildBazelModShowVisibleReposArgv(opts: BazelQueryOptions): string[] { + const startup: string[] = [] + if (opts.bazelRc) { + startup.push(`--bazelrc=${opts.bazelRc}`) + } + if (opts.bazelOutputBase) { + startup.push(`--output_base=${opts.bazelOutputBase}`) + } + const userFlags = splitBazelFlags(opts.bazelFlags) + return [ + ...startup, + 'mod', + 'show_repo', + '--all_visible_repos', + '--output=streamed_jsonproto', + ...userFlags, + ] +} + +function buildBazelArgv(queryStr: string, opts: BazelQueryOptions): string[] { + // Startup flags MUST precede the `query` subcommand. + // Bazel argv shape: query --output=build + const startup: string[] = [] + if (opts.bazelRc) { + startup.push(`--bazelrc=${opts.bazelRc}`) + } + if (opts.bazelOutputBase) { + startup.push(`--output_base=${opts.bazelOutputBase}`) + } + // Keep query output stable and avoid updating Bazel lockfiles while extracting. + const queryFlags = ['--lockfile_mode=off', '--noshow_progress'] + const userFlags = splitBazelFlags(opts.bazelFlags) + return [ + ...startup, + 'query', + ...queryFlags, + ...opts.invocationFlags, + queryStr, + '--output=build', + ...userFlags, + ] +} + +function stringField(value: unknown): string { + return typeof value === 'string' ? value : '' +} + +function numericExitCode(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined +} + +function normalizeSpawnError(error: unknown): BazelQueryResult { + const e = error as { + code?: unknown + status?: unknown + stderr?: unknown + stdout?: unknown + } + return { + code: numericExitCode(e?.code) ?? numericExitCode(e?.status) ?? -1, + stderr: stringField(e?.stderr), + stdout: stringField(e?.stdout), + } +} + +/** + * Run `bazel query` with the standardized argv shape and capture + * stdout/stderr/code. Wraps the call in a spinner that resolves on success + * and fails on non-zero exit. Rejected spawn calls are normalized into a + * BazelQueryResult so retry/skip handling can inspect stderr. + */ +export async function runBazelQuery( + queryStr: string, + opts: BazelQueryOptions, +): Promise { + const argv = buildBazelArgv(queryStr, opts) + if (opts.verbose) { + logger.log('[VERBOSE] Executing:', opts.bin, ', args:', argv) + } + const { spinner } = constants + let result: BazelQueryResult | undefined + try { + spinner.start(`Running bazel query (${queryStr.slice(0, 80)})...`) + const output = await spawn(opts.bin, argv, { + cwd: opts.cwd, + timeout: BAZEL_QUERY_TIMEOUT_MS, + ...(opts.env ? { env: opts.env } : {}), + }) + const { code, stderr, stdout } = output + result = { code, stdout, stderr } + return result + } catch (e) { + result = normalizeSpawnError(e) + return result + } finally { + const truncated = queryStr.slice(0, 80) + if (result?.code === 0) { + spinner.successAndStop(`bazel query completed (${truncated}).`) + } else { + spinner.failAndStop(`bazel query failed (${truncated}).`) + } + } +} + +/** + * Bzlmod-native visible repository enumeration. This is only a candidate + * source; callers must still validate each returned apparent repo name with a + * semantic query for generated JVM Maven rules. + */ +export async function runBazelModShowVisibleRepos( + opts: BazelQueryOptions, +): Promise { + const argv = buildBazelModShowVisibleReposArgv(opts) + if (opts.verbose) { + logger.log('[VERBOSE] Executing:', opts.bin, ', args:', argv) + } + try { + const output = await spawn(opts.bin, argv, { + cwd: opts.cwd, + timeout: BAZEL_QUERY_TIMEOUT_MS, + ...(opts.env ? { env: opts.env } : {}), + }) + const { code, stderr, stdout } = output + return { code, stdout, stderr } + } catch (e) { + return normalizeSpawnError(e) + } +} + +/** + * Build a `RepoProbe` (compatible with bazel-repo-discovery) bound to opts. + * Used by `discoverMavenRepos` to validate candidate Maven repo + * names against the running workspace. + */ +export function buildProbeFor(opts: BazelQueryOptions): RepoProbe { + return async (repoName: string) => { + const queryStr = `kind("jvm_import rule|aar_import rule", @${repoName}//:*)` + const result = await runBazelQuery(queryStr, opts) + return { stdout: result.stdout, code: result.code } + } +} diff --git a/src/commands/manifest/bazel/bazel-query-runner.test.mts b/src/commands/manifest/bazel/bazel-query-runner.test.mts new file mode 100644 index 000000000..fcb0d3680 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-query-runner.test.mts @@ -0,0 +1,220 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +vi.mock('@socketsecurity/registry/lib/spawn', () => ({ + spawn: vi.fn(), +})) + +// Mock the spinner so tests don't render to TTY. +vi.mock('../../../constants.mts', () => ({ + default: { + spinner: { + start: vi.fn(), + successAndStop: vi.fn(), + failAndStop: vi.fn(), + }, + }, +})) + +import { spawn } from '@socketsecurity/registry/lib/spawn' + +import { buildProbeFor, runBazelQuery } from './bazel-query-runner.mts' +import constants from '../../../constants.mts' + +describe('runBazelQuery', () => { + const mocked = vi.mocked(spawn) + + beforeEach(() => { + mocked.mockReset() + vi.mocked(constants.spinner.start).mockClear() + vi.mocked(constants.spinner.successAndStop).mockClear() + vi.mocked(constants.spinner.failAndStop).mockClear() + // @ts-ignore — spawn return type union; tests only use the three fields. + mocked.mockResolvedValue({ code: 0, stdout: 'ok', stderr: '' }) + }) + + it('builds the standard query argv shape', async () => { + await runBazelQuery('kind(jvm_import, @maven//:*)', { + bin: '/usr/local/bin/bazel', + cwd: '/repo', + invocationFlags: [], + }) + const call = mocked.mock.calls[0]! + expect(call[0]).toBe('/usr/local/bin/bazel') + const argv = call[1] as string[] + expect(argv[0]).toBe('query') + expect(argv).toContain('--lockfile_mode=off') + expect(argv).toContain('--noshow_progress') + expect(argv).toContain('kind(jvm_import, @maven//:*)') + expect(argv).toContain('--output=build') + }) + + it('forwards bazelRc as a startup flag BEFORE query', async () => { + await runBazelQuery('q', { + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + bazelRc: '/path/to/.bazelrc', + }) + const argv = mocked.mock.calls[0]![1] as string[] + expect(argv[0]).toBe('--bazelrc=/path/to/.bazelrc') + expect(argv.indexOf('--bazelrc=/path/to/.bazelrc')).toBeLessThan( + argv.indexOf('query'), + ) + }) + + it('forwards bazelOutputBase as a startup flag BEFORE query', async () => { + await runBazelQuery('q', { + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + bazelOutputBase: '/tmp/output-base', + }) + const argv = mocked.mock.calls[0]![1] as string[] + expect(argv).toContain('--output_base=/tmp/output-base') + expect(argv.indexOf('--output_base=/tmp/output-base')).toBeLessThan( + argv.indexOf('query'), + ) + }) + + it('appends invocationFlags after queryFlags', async () => { + await runBazelQuery('q', { + bin: 'bazel', + cwd: '/r', + invocationFlags: ['--noenable_bzlmod', '--enable_workspace'], + }) + const argv = mocked.mock.calls[0]![1] as string[] + expect(argv).toContain('--noenable_bzlmod') + expect(argv).toContain('--enable_workspace') + }) + + it('splits bazelFlags string on whitespace and appends', async () => { + await runBazelQuery('q', { + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + bazelFlags: '--config=ci --keep_going', + }) + const argv = mocked.mock.calls[0]![1] as string[] + expect(argv).toContain('--config=ci') + expect(argv).toContain('--keep_going') + }) + + it('forwards env to spawn when provided', async () => { + const env = { ...process.env, BAZEL_BENCH: 'yes' } + await runBazelQuery('q', { + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + env, + }) + expect(mocked.mock.calls[0]![2]).toMatchObject({ cwd: '/r', env }) + }) + + it('returns spawn result fields', async () => { + // @ts-ignore — narrow return shape for the test's purposes. + mocked.mockResolvedValueOnce({ code: 0, stdout: 'OUT', stderr: 'ERR' }) + const r = await runBazelQuery('q', { + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + }) + expect(r).toEqual({ code: 0, stdout: 'OUT', stderr: 'ERR' }) + }) + + it('stops spinner as failure when spawn resolves with non-zero code', async () => { + // @ts-ignore — narrow return shape for the test's purposes. + mocked.mockResolvedValueOnce({ code: 7, stdout: '', stderr: 'boom' }) + const r = await runBazelQuery('q', { + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + }) + expect(r).toEqual({ code: 7, stdout: '', stderr: 'boom' }) + expect(constants.spinner.successAndStop).not.toHaveBeenCalled() + expect(constants.spinner.failAndStop).toHaveBeenCalled() + }) + + it('normalizes rejected spawn errors with code, stdout, and stderr', async () => { + mocked.mockRejectedValueOnce( + Object.assign(new Error('bazel failed'), { + code: 42, + stderr: 'ERR', + stdout: 'OUT', + }), + ) + const r = await runBazelQuery('q', { + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + }) + expect(r).toEqual({ code: 42, stdout: 'OUT', stderr: 'ERR' }) + expect(constants.spinner.failAndStop).toHaveBeenCalled() + }) + + it('preserves stderr from a rejected spawn so the caller sees the diagnostic', async () => { + mocked.mockRejectedValueOnce( + Object.assign(new Error('bazel resolution failed'), { + code: 1, + stderr: 'download failed: HTTP/2 503', + stdout: '', + }), + ) + const r = await runBazelQuery('q', { + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + }) + expect(r).toEqual({ + code: 1, + stdout: '', + stderr: 'download failed: HTTP/2 503', + }) + }) + + it('normalizes rejected spawn errors without numeric code or status to -1', async () => { + mocked.mockRejectedValueOnce( + Object.assign(new Error('spawn failed'), { + code: 'ENOENT', + stderr: 'missing bazel', + stdout: '', + }), + ) + const r = await runBazelQuery('q', { + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + }) + expect(r).toEqual({ code: -1, stdout: '', stderr: 'missing bazel' }) + }) +}) + +describe('buildProbeFor', () => { + const mocked = vi.mocked(spawn) + + beforeEach(() => { + mocked.mockReset() + // @ts-ignore — narrow return shape for the test's purposes. + mocked.mockResolvedValue({ + code: 0, + stdout: 'jvm_import(\n maven_coordinates="g:a:1",\n)', + stderr: '', + }) + }) + + it('builds the probe query for a repo name', async () => { + const probe = buildProbeFor({ + bin: 'bazel', + cwd: '/r', + invocationFlags: [], + }) + const result = await probe('my_maven_repo') + const argv = mocked.mock.calls[0]![1] as string[] + expect(argv).toContain( + 'kind("jvm_import rule|aar_import rule", @my_maven_repo//:*)', + ) + expect(result).toEqual({ + stdout: expect.stringContaining('maven_coordinates'), + code: 0, + }) + }) +}) diff --git a/src/commands/manifest/bazel/bazel-repo-discovery.mts b/src/commands/manifest/bazel/bazel-repo-discovery.mts new file mode 100644 index 000000000..433174f45 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-repo-discovery.mts @@ -0,0 +1,326 @@ +import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs' +import path from 'node:path' + +import { logger } from '@socketsecurity/registry/lib/logger' + +// Maximum size (bytes) we will read for any single Bazel workspace file. +// Prevents DoS via maliciously large MODULE.bazel / WORKSPACE / .bzl files. +const MAX_WORKSPACE_FILE_BYTES = 5 * 1024 * 1024 + +// Maximum candidate count we will return (deduped) before truncating. +// Real repos have <20; this is a hard ceiling against pathological inputs. +const MAX_CANDIDATES = 256 + +// Regex strategy: anchored, bounded character classes, no nested quantifiers. +// Match `use_repo(maven, "X", "Y", ...)` with a bounded arg-list window to +// avoid catastrophic backtracking on hostile input. + +// Bzlmod use_repo(maven, "name1", "name2"...). +// Bounded: matches up to ~4KB of arg list to avoid catastrophic backtracking. +const USE_REPO_RE = /use_repo\s*\(\s*maven\s*,([^)]{0,4096})\)/g +const BAZEL_REPO_NAME_PATTERN = '[A-Za-z0-9._+-]{1,129}' +const BAZEL_REPO_NAME_RE = new RegExp(`^${BAZEL_REPO_NAME_PATTERN}$`) +// Quoted-name extractor inside the captured argument blob. +const QUOTED_NAME_RE = new RegExp(`"(${BAZEL_REPO_NAME_PATTERN})"`, 'g') + +// Legacy maven_install(name = "X", ...) on a single statement. +// Match the name= keyword arg specifically; bounded. +const MAVEN_INSTALL_NAME_RE = new RegExp( + `maven_install\\s*\\([^)]{0,8192}?\\bname\\s*=\\s*"(${BAZEL_REPO_NAME_PATTERN})"`, + 'g', +) +const MAVEN_COORDINATES_MARKER_RE = /\bmaven_coordinates\s*=/ + +// Reads file contents, refusing files that exceed MAX_WORKSPACE_FILE_BYTES. +// Returns null when the file is missing, oversized, or unreadable. +function safeReadFile(file: string): string | null { + if (!existsSync(file)) { + return null + } + try { + const stat = statSync(file) + if (stat.size > MAX_WORKSPACE_FILE_BYTES) { + return null + } + return readFileSync(file, 'utf8') + } catch { + return null + } +} + +// Walks workspace root for legacy Starlark sources we can scan: WORKSPACE +// (and WORKSPACE.bazel) plus top-level .bzl files. Non-recursive by design; +// Phase 1 explicitly avoids static Starlark parsing at depth. +function listLegacyStarlarkFiles(cwd: string): string[] { + const files: string[] = [] + const candidates = ['WORKSPACE', 'WORKSPACE.bazel'] + for (const c of candidates) { + const p = path.join(cwd, c) + if (existsSync(p)) { + files.push(p) + } + } + // Top-level .bzl files only. + try { + for (const entry of readdirSync(cwd)) { + if (entry.endsWith('.bzl')) { + files.push(path.join(cwd, entry)) + } + } + } catch { + // Ignore unreadable cwd. + } + return files +} + +// Returns deduplicated, sorted list of items, capped at MAX_CANDIDATES. +function uniqueSorted(items: string[]): string[] { + const seen = new Set() + const out: string[] = [] + for (const item of items) { + if (!seen.has(item)) { + seen.add(item) + out.push(item) + if (out.length >= MAX_CANDIDATES) { + break + } + } + } + return out.sort() +} + +function apparentNameFromJsonValue(value: unknown): string | undefined { + if (!value || typeof value !== 'object') { + return undefined + } + const obj = value as Record + const direct = obj['apparentName'] ?? obj['apparent_name'] + if (typeof direct === 'string') { + return direct + } + for (const nested of Object.values(obj)) { + const found = apparentNameFromJsonValue(nested) + if (found) { + return found + } + } + return undefined +} + +function normalizeRepoName(name: string): string | undefined { + const repo = name.startsWith('@') ? name.slice(1) : name + return BAZEL_REPO_NAME_RE.test(repo) ? repo : undefined +} + +// Parse `bazel mod show_repo --all_visible_repos --output=streamed_jsonproto` +// output. Bazel's JSON proto field casing may vary by formatter; accept both +// lowerCamel and snake_case, and tolerate wrapper objects around Repository. +export function parseVisibleRepoCandidates(output: string): string[] { + const candidates: string[] = [] + for (const line of output.split(/\r?\n/)) { + const trimmed = line.trim() + if (!trimmed) { + continue + } + try { + const parsed = JSON.parse(trimmed) as unknown + const apparentName = apparentNameFromJsonValue(parsed) + if (apparentName) { + const repo = normalizeRepoName(apparentName) + if (repo) { + candidates.push(repo) + } + } + } catch { + // Ignore malformed lines; caller will fall back to static discovery when + // no usable visible repo names are found. + } + } + return uniqueSorted(candidates) +} + +// Step 1: parse candidate Maven repo names from Bzlmod and legacy entry points. +export function parseMavenRepoCandidates( + cwd: string, + verbose?: boolean, +): string[] { + const candidates: string[] = [] + + // Bzlmod path: parse MODULE.bazel for use_repo(maven, ...). + const moduleBazel = path.join(cwd, 'MODULE.bazel') + const moduleContent = safeReadFile(moduleBazel) + if (moduleContent) { + const bzlmodHits: string[] = [] + for (const m of moduleContent.matchAll(USE_REPO_RE)) { + const argBlob = m[1] ?? '' + for (const n of argBlob.matchAll(QUOTED_NAME_RE)) { + bzlmodHits.push(n[1] as string) + } + } + candidates.push(...bzlmodHits) + if (verbose) { + logger.log( + '[VERBOSE] discovery: scanned', + moduleBazel, + `(${bzlmodHits.length} use_repo match(es))`, + ) + } + } else if (verbose) { + logger.log( + '[VERBOSE] discovery:', + moduleBazel, + 'not present (skipping bzlmod scan)', + ) + } + + // Legacy path: scan WORKSPACE + top-level .bzl files for maven_install(name=...). + const legacyFiles = listLegacyStarlarkFiles(cwd) + if (verbose) { + logger.log( + '[VERBOSE] discovery: legacy files considered:', + legacyFiles.length ? legacyFiles : '(none)', + ) + } + for (const file of legacyFiles) { + const content = safeReadFile(file) + if (!content) { + continue + } + const fileHits: string[] = [] + for (const m of content.matchAll(MAVEN_INSTALL_NAME_RE)) { + fileHits.push(m[1] as string) + } + candidates.push(...fileHits) + if (verbose) { + logger.log( + '[VERBOSE] discovery: scanned', + file, + `(${fileHits.length} maven_install name match(es))`, + ) + } + } + + const deduped = uniqueSorted(candidates) + if (verbose) { + logger.log('[VERBOSE] discovery: candidate set (pre-seed):', deduped) + } + return deduped +} + +export type RepoProbe = ( + repoName: string, +) => Promise<{ stdout: string; code: number }> + +export type ValidationResult = { + valid: boolean + // Probe stdout — populated whenever the probe was reachable, even when + // validation rejects the repo. Empty string when the probe itself threw. + stdout: string +} + +// Step 2: validate a candidate by running the probe and confirming +// `maven_coordinates=` appears in stdout (the marker emitted by jvm_import / +// aar_import rules generated by rules_jvm_external). Returns the probe +// stdout alongside the verdict so the caller can cache it and reuse it +// instead of running an identical extraction query. +export async function validateMavenRepo( + repoName: string, + probe: RepoProbe, + verbose?: boolean, +): Promise { + try { + const result = await probe(repoName) + if (result.code !== 0) { + if (verbose) { + logger.log( + `[VERBOSE] discovery: probe @${repoName}: REJECT (code=${result.code})`, + ) + } + return { valid: false, stdout: result.stdout } + } + const valid = MAVEN_COORDINATES_MARKER_RE.test(result.stdout) + if (verbose) { + logger.log( + `[VERBOSE] discovery: probe @${repoName}:`, + valid + ? 'ACCEPT (maven_coordinates marker found)' + : 'REJECT (no maven_coordinates marker in probe stdout)', + ) + } + return { valid, stdout: result.stdout } + } catch (e) { + if (verbose) { + logger.log( + `[VERBOSE] discovery: probe @${repoName}: REJECT (probe threw):`, + e instanceof Error ? e.message : String(e), + ) + } + return { valid: false, stdout: '' } + } +} + +// The default maven_install repo name when no explicit `name=` is given. +// Included as a seed so repos that define maven_install in a subdirectory +// .bzl file (not scanned by parseMavenRepoCandidates) are still discovered. +const DEFAULT_MAVEN_REPO_SEED = 'maven' + +// Composition: parse, then validate each candidate; return validated subset +// as a Map keyed by repo name with the validated probe stdout as value. +// Map iteration order matches insertion order, so callers that just want +// the list of repo names can call `Array.from(repos.keys())`. Callers that +// want to skip re-running the same `bazel query` during extraction can read +// the cached stdout off the Map and parse it directly. +// +// Always seeds with the default `@maven` repo name so repos whose +// maven_install is defined in a sub-directory .bzl file (not reachable by +// the top-level static scan) can still be discovered via probe validation. +export async function discoverMavenRepos( + cwd: string, + probe: RepoProbe, + nativeCandidates?: string[], + verbose?: boolean, +): Promise> { + const parsed = + nativeCandidates && nativeCandidates.length + ? nativeCandidates + : parseMavenRepoCandidates(cwd, verbose) + if (verbose) { + logger.log( + '[VERBOSE] discovery: candidate source:', + nativeCandidates && nativeCandidates.length + ? `bzlmod visible-repos (${nativeCandidates.length})` + : `static parse (${parsed.length})`, + ) + } + // Seed with the default repo name first (so it appears first in output if + // validated). Dedup via Set before validation. + const seen = new Set([DEFAULT_MAVEN_REPO_SEED]) + const candidates: string[] = [DEFAULT_MAVEN_REPO_SEED] + for (const c of parsed) { + if (!seen.has(c)) { + seen.add(c) + candidates.push(c) + } + } + if (verbose) { + logger.log( + '[VERBOSE] discovery: candidate set to probe (seed-first, deduped):', + candidates, + ) + } + const validated = new Map() + for (const c of candidates) { + // eslint-disable-next-line no-await-in-loop + const result = await validateMavenRepo(c, probe, verbose) + if (result.valid) { + validated.set(c, result.stdout) + } + } + if (verbose) { + logger.log( + '[VERBOSE] discovery: validated repos:', + Array.from(validated.keys()), + ) + } + return validated +} diff --git a/src/commands/manifest/bazel/bazel-repo-discovery.test.mts b/src/commands/manifest/bazel/bazel-repo-discovery.test.mts new file mode 100644 index 000000000..12d8a9a86 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-repo-discovery.test.mts @@ -0,0 +1,324 @@ +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import os from 'node:os' +import path from 'node:path' +import { fileURLToPath } from 'node:url' + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +import { logger } from '@socketsecurity/registry/lib/logger' + +import { + discoverMavenRepos, + parseMavenRepoCandidates, + parseVisibleRepoCandidates, + validateMavenRepo, +} from './bazel-repo-discovery.mts' + +import type { RepoProbe } from './bazel-repo-discovery.mts' + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) + +// from src/commands/manifest/bazel/ to repo root is four levels up, then into +// test/fixtures/manifest-bazel. +const FIXTURES = path.join( + __dirname, + '..', + '..', + '..', + '..', + 'test', + 'fixtures', + 'manifest-bazel', +) + +const acceptingProbe: RepoProbe = async () => ({ + stdout: + 'jvm_import(\n name = "guava",\n maven_coordinates = "com.google.guava:guava:33.0.0-jre",\n)', + code: 0, +}) + +const compactAcceptingProbe: RepoProbe = async () => ({ + stdout: + 'jvm_import(\n name = "guava",\n maven_coordinates="com.google.guava:guava:33.0.0-jre",\n)', + code: 0, +}) + +const rejectingProbe: RepoProbe = async () => ({ stdout: '', code: 0 }) + +const failingProbe: RepoProbe = async () => ({ stdout: '', code: 1 }) + +const throwingProbe: RepoProbe = async () => { + throw new Error('bazel exploded') +} + +const selectiveProbe: RepoProbe = async name => + name === 'maven' + ? { stdout: 'maven_coordinates=foo', code: 0 } + : { stdout: '', code: 0 } + +describe('bazel-repo-discovery', () => { + describe('parseMavenRepoCandidates', () => { + it('parses single use_repo from bzlmod-only', () => { + expect( + parseMavenRepoCandidates(path.join(FIXTURES, 'bzlmod-only')), + ).toEqual(['maven']) + }) + + it('parses multiple names from multi-repo-bzlmod', () => { + expect( + parseMavenRepoCandidates( + path.join(FIXTURES, 'multi-repo-bzlmod'), + ).sort(), + ).toEqual(['maven', 'maven_test'].sort()) + }) + + it('recovers custom name from custom-name-bzlmod', () => { + expect( + parseMavenRepoCandidates(path.join(FIXTURES, 'custom-name-bzlmod')), + ).toEqual(['maven_rules_kotlin_example']) + }) + + it('parses maven_install name from legacy WORKSPACE', () => { + expect( + parseMavenRepoCandidates(path.join(FIXTURES, 'legacy-only')), + ).toEqual(['maven']) + }) + + it('parses maven_install name from sibling .bzl file (legacy-with-load)', () => { + expect( + parseMavenRepoCandidates(path.join(FIXTURES, 'legacy-with-load')), + ).toEqual(['maven_legacy_app']) + }) + + it('parses repo names containing hyphens and dots from static sources', () => { + const dir = mkdtempSync(path.join(os.tmpdir(), 'bazel-repos-')) + try { + writeFileSync( + path.join(dir, 'MODULE.bazel'), + 'use_repo(maven, "maven-prod", "third.party.maven")\n', + ) + writeFileSync( + path.join(dir, 'WORKSPACE'), + 'maven_install(name = "legacy-maven.prod", artifacts = [])\n', + ) + + expect(parseMavenRepoCandidates(dir)).toEqual([ + 'legacy-maven.prod', + 'maven-prod', + 'third.party.maven', + ]) + } finally { + rmSync(dir, { recursive: true, force: true }) + } + }) + + it('returns empty array on a directory without bazel markers', () => { + // Use the fixtures root itself: no MODULE.bazel/WORKSPACE there. + expect(parseMavenRepoCandidates(FIXTURES)).toEqual([]) + }) + }) + + describe('parseVisibleRepoCandidates', () => { + it('parses apparent repo names from streamed jsonproto output', () => { + const output = [ + JSON.stringify({ + repository: { + apparentName: '@maven', + canonicalName: 'rules_jvm_external~maven~maven', + }, + }), + JSON.stringify({ + repository: { + apparent_name: 'maven_rules_kotlin_example', + canonical_name: 'rules_jvm_external~maven~custom', + }, + }), + JSON.stringify({ + repository: { + apparentName: '@maven-prod', + canonicalName: 'rules_jvm_external~maven~prod', + }, + }), + JSON.stringify({ + repository: { + apparentName: 'third.party.maven', + canonicalName: 'rules_jvm_external~maven~third_party', + }, + }), + 'not json', + ].join('\n') + + expect(parseVisibleRepoCandidates(output)).toEqual([ + 'maven', + 'maven-prod', + 'maven_rules_kotlin_example', + 'third.party.maven', + ]) + }) + }) + + describe('validateMavenRepo', () => { + it('accepts when probe stdout contains spaced maven_coordinates output', async () => { + const r = await validateMavenRepo('maven', acceptingProbe) + expect(r.valid).toBe(true) + expect(r.stdout).toContain('maven_coordinates') + }) + + it('accepts when probe stdout contains compact maven_coordinates output', async () => { + const r = await validateMavenRepo('maven', compactAcceptingProbe) + expect(r.valid).toBe(true) + expect(r.stdout).toContain('maven_coordinates') + }) + + it('rejects when probe stdout lacks maven_coordinates=', async () => { + expect((await validateMavenRepo('not_maven', rejectingProbe)).valid).toBe( + false, + ) + }) + + it('rejects on non-zero exit code', async () => { + expect( + (await validateMavenRepo('also_not_maven', failingProbe)).valid, + ).toBe(false) + }) + + it('rejects when probe throws', async () => { + expect((await validateMavenRepo('crash', throwingProbe)).valid).toBe( + false, + ) + }) + }) + + describe('discoverMavenRepos', () => { + it('returns parsed candidates that the probe validates, with cached probe stdout', async () => { + // multi-repo-bzlmod parses to ['maven', 'maven_test']; the accepting probe + // validates both. The returned Map carries the probe stdout for each. + const result = await discoverMavenRepos( + path.join(FIXTURES, 'multi-repo-bzlmod'), + acceptingProbe, + ) + expect(Array.from(result.keys()).sort()).toEqual( + ['maven', 'maven_test'].sort(), + ) + for (const stdout of result.values()) { + expect(stdout).toContain('maven_coordinates') + } + }) + + it('uses native visible repo candidates instead of static parsing when provided', async () => { + const result = await discoverMavenRepos( + path.join(FIXTURES, 'multi-repo-bzlmod'), + acceptingProbe, + ['native_maven'], + ) + expect(Array.from(result.keys())).toEqual(['maven', 'native_maven']) + }) + + it('filters out candidates the probe rejects', async () => { + // Probe accepts only when repo name === 'maven'; rejects 'maven_test'. + const result = await discoverMavenRepos( + path.join(FIXTURES, 'multi-repo-bzlmod'), + selectiveProbe, + ) + expect(Array.from(result.keys())).toEqual(['maven']) + }) + }) + + describe('verbose diagnostics', () => { + let logSpy: ReturnType + + beforeEach(() => { + logSpy = vi.spyOn(logger, 'log').mockImplementation(() => logger) + }) + + afterEach(() => { + logSpy.mockRestore() + }) + + function loggedLines(): string { + return logSpy.mock.calls + .map(args => args.map(a => String(a)).join(' ')) + .join('\n') + } + + it('parseMavenRepoCandidates stays silent when verbose is unset', () => { + parseMavenRepoCandidates(path.join(FIXTURES, 'multi-repo-bzlmod')) + expect(logSpy).not.toHaveBeenCalled() + }) + + it('parseMavenRepoCandidates emits scanned-files + candidate set when verbose=true', () => { + parseMavenRepoCandidates(path.join(FIXTURES, 'multi-repo-bzlmod'), true) + const text = loggedLines() + expect(text).toContain('discovery: scanned') + expect(text).toContain('MODULE.bazel') + expect(text).toContain('use_repo match') + expect(text).toContain('candidate set (pre-seed)') + }) + + it('validateMavenRepo logs ACCEPT under verbose', async () => { + await validateMavenRepo('maven', acceptingProbe, true) + expect(loggedLines()).toMatch( + /probe @maven:\s*ACCEPT \(maven_coordinates marker found\)/, + ) + }) + + it('validateMavenRepo logs REJECT (no marker) under verbose', async () => { + await validateMavenRepo('not_maven', rejectingProbe, true) + expect(loggedLines()).toMatch(/probe @not_maven:\s*REJECT/) + }) + + it('validateMavenRepo logs REJECT (probe threw) under verbose', async () => { + await validateMavenRepo('crash', throwingProbe, true) + expect(loggedLines()).toMatch(/probe @crash:\s*REJECT \(probe threw\)/) + }) + + it('discoverMavenRepos propagates verbose into the full pipeline', async () => { + await discoverMavenRepos( + path.join(FIXTURES, 'multi-repo-bzlmod'), + selectiveProbe, + undefined, + true, + ) + const text = loggedLines() + // Candidate-source label. + expect(text).toContain('candidate source: static parse') + // Seeded-and-deduped candidate set log. + expect(text).toContain('candidate set to probe') + // Per-candidate probe verdicts. + expect(text).toMatch(/probe @maven:\s*ACCEPT/) + expect(text).toMatch(/probe @maven_test:\s*REJECT/) + // Final validated set. + expect(text).toContain('validated repos') + }) + }) + + describe('DoS guard', () => { + it('completes parse on 1MB pathological input within 1s', () => { + // Synthesize a 1MB Bzlmod-shaped file in a tmp dir and feed it through + // parseMavenRepoCandidates. Exercises the bounded USE_REPO_RE + + // QUOTED_NAME_RE windows. + const dir = mkdtempSync(path.join(os.tmpdir(), 'bazel-discover-')) + try { + // Build the fixture content in a single pass (avoid O(n^2) join-in-loop). + const lines: string[] = [] + let totalLen = 0 + while (totalLen < 1_000_000) { + const line = 'use_repo(maven, "x_' + lines.length + '")' + lines.push(line) + // Plus 1 for the eventual newline separator. + totalLen += line.length + 1 + } + writeFileSync(path.join(dir, 'MODULE.bazel'), lines.join('\n')) + const start = process.hrtime.bigint() + const result = parseMavenRepoCandidates(dir) + const elapsed = process.hrtime.bigint() - start + expect(elapsed).toBeLessThan(1_000_000_000n) + // Verify the cap kicks in (length is bounded by MAX_CANDIDATES). + expect(result.length).toBeLessThanOrEqual(256) + } finally { + rmSync(dir, { recursive: true, force: true }) + } + }) + }) +}) diff --git a/src/commands/manifest/bazel/bazel-workspace-detect.mts b/src/commands/manifest/bazel/bazel-workspace-detect.mts new file mode 100644 index 000000000..a4a6bcacc --- /dev/null +++ b/src/commands/manifest/bazel/bazel-workspace-detect.mts @@ -0,0 +1,39 @@ +import { existsSync } from 'node:fs' +import path from 'node:path' + +import { InputError } from '../../../utils/errors.mts' + +export type WorkspaceMode = { + bzlmod: boolean + workspace: boolean +} + +// Detects whether the given Bazel workspace uses Bzlmod (MODULE.bazel), +// legacy WORKSPACE (WORKSPACE or WORKSPACE.bazel), or both (migration). +// Throws InputError when neither marker file is present. +export function detectWorkspaceMode(cwd: string): WorkspaceMode { + const moduleBazel = existsSync(path.join(cwd, 'MODULE.bazel')) + const workspaceFile = + existsSync(path.join(cwd, 'WORKSPACE')) || + existsSync(path.join(cwd, 'WORKSPACE.bazel')) + + if (!moduleBazel && !workspaceFile) { + throw new InputError( + `No Bazel workspace found at ${cwd} (looked for MODULE.bazel, WORKSPACE, WORKSPACE.bazel).`, + ) + } + + return { bzlmod: moduleBazel, workspace: workspaceFile } +} + +// Returns the bazel CLI flags needed to invoke the correct workspace mode. +// Bzlmod-only or migration-window: rely on Bazel 7+ default (Bzlmod on). +// Legacy-only: explicitly disable Bzlmod and enable WORKSPACE. +export function getBazelInvocationFlags(mode: WorkspaceMode): string[] { + if (mode.bzlmod) { + // Bzlmod-only or migration: Bzlmod wins; no flags needed (Bazel 7+ default). + return [] + } + // Legacy-only: explicitly switch to WORKSPACE mode. + return ['--noenable_bzlmod', '--enable_workspace'] +} diff --git a/src/commands/manifest/bazel/bazel-workspace-detect.test.mts b/src/commands/manifest/bazel/bazel-workspace-detect.test.mts new file mode 100644 index 000000000..c1eb6ffa4 --- /dev/null +++ b/src/commands/manifest/bazel/bazel-workspace-detect.test.mts @@ -0,0 +1,70 @@ +import path from 'node:path' +import { fileURLToPath } from 'node:url' + +import { describe, expect, it } from 'vitest' + +import { + detectWorkspaceMode, + getBazelInvocationFlags, +} from './bazel-workspace-detect.mts' + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) + +// from src/commands/manifest/bazel/ to repo root is four levels up, then into +// test/fixtures/manifest-bazel. +const FIXTURES = path.join( + __dirname, + '..', + '..', + '..', + '..', + 'test', + 'fixtures', + 'manifest-bazel', +) + +describe('bazel-workspace-detect', () => { + describe('detectWorkspaceMode', () => { + it('detects bzlmod-only repo', () => { + const mode = detectWorkspaceMode(path.join(FIXTURES, 'bzlmod-only')) + expect(mode).toEqual({ bzlmod: true, workspace: false }) + }) + + it('detects legacy-only repo', () => { + const mode = detectWorkspaceMode(path.join(FIXTURES, 'legacy-only')) + expect(mode).toEqual({ bzlmod: false, workspace: true }) + }) + + it('detects migration-window repo (both files)', () => { + const mode = detectWorkspaceMode(path.join(FIXTURES, 'migration')) + expect(mode).toEqual({ bzlmod: true, workspace: true }) + }) + + it('throws InputError when neither file present', () => { + expect(() => + detectWorkspaceMode('/tmp/definitely-not-a-bazel-repo-xyz123'), + ).toThrowError(/MODULE\.bazel|WORKSPACE/) + }) + }) + + describe('getBazelInvocationFlags', () => { + it('returns legacy flags for workspace-only', () => { + expect( + getBazelInvocationFlags({ bzlmod: false, workspace: true }), + ).toEqual(['--noenable_bzlmod', '--enable_workspace']) + }) + + it('returns empty array for bzlmod-only', () => { + expect( + getBazelInvocationFlags({ bzlmod: true, workspace: false }), + ).toEqual([]) + }) + + it('returns empty array for migration (Bzlmod wins)', () => { + expect( + getBazelInvocationFlags({ bzlmod: true, workspace: true }), + ).toEqual([]) + }) + }) +}) diff --git a/src/commands/manifest/bazel/cmd-manifest-bazel.mts b/src/commands/manifest/bazel/cmd-manifest-bazel.mts new file mode 100644 index 000000000..3f5f99135 --- /dev/null +++ b/src/commands/manifest/bazel/cmd-manifest-bazel.mts @@ -0,0 +1,215 @@ +import path from 'node:path' + +import { debugFn } from '@socketsecurity/registry/lib/debug' +import { logger } from '@socketsecurity/registry/lib/logger' + +import { extractBazelToMaven } from './extract_bazel_to_maven.mts' +import constants, { SOCKET_JSON } from '../../../constants.mts' +import { commonFlags } from '../../../flags.mts' +import { checkCommandInput } from '../../../utils/check-input.mts' +import { getOutputKind } from '../../../utils/get-output-kind.mts' +import { meowOrExit } from '../../../utils/meow-with-subcommands.mts' +import { getFlagListOutput } from '../../../utils/output-formatting.mts' +import { readOrDefaultSocketJson } from '../../../utils/socket-json.mts' + +import type { + CliCommandConfig, + CliCommandContext, +} from '../../../utils/meow-with-subcommands.mts' + +const config: CliCommandConfig = { + commandName: 'bazel', + description: + '[beta] Bazel JVM SBOM support — generate manifest files (`maven_install.json`) for a Bazel/Maven project', + hidden: false, + flags: { + ...commonFlags, + bazel: { + type: 'string', + description: + 'Path to bazel/bazelisk binary; default: $(which bazelisk) || $(which bazel)', + }, + bazelFlags: { + type: 'string', + description: + 'Flags forwarded to every bazel invocation (single quoted string)', + }, + bazelOutputBase: { + type: 'string', + description: + 'Bazel --output_base for read-only-cache CI environments', + }, + bazelRc: { + type: 'string', + description: 'Path to additional .bazelrc fragments forwarded to bazel', + }, + out: { + type: 'string', + description: + 'Output directory for generated manifests; default: ./.socket/bazel-manifests/', + }, + verbose: { + type: 'boolean', + description: 'Stream bazel stdout/stderr', + }, + }, + help: (command, config) => ` + Usage + $ ${command} [options] [CWD=.] + + Options + ${getFlagListOutput(config.flags)} + + [beta] Generates Bazel JVM SBOM manifests (\`maven_install.json\`-shaped) + by running \`bazel query\` against discovered Maven repos. Output is + consumed by \`socket scan create\`'s server-side parser. + + Note: this command generates Maven dependency manifests for Bazel JVM + workspaces. It does not run reachability analysis. + + To generate AND upload in one step, use \`socket scan create --auto-manifest\` + instead — it detects Bazel workspaces, runs the same extraction, and uploads + the result. This subcommand is for generation only. + + Examples + $ ${command} . + $ ${command} --bazel=/usr/local/bin/bazelisk . + `, +} + +export const cmdManifestBazel = { + description: config.description, + hidden: config.hidden, + run, +} + +async function run( + argv: string[] | readonly string[], + importMeta: ImportMeta, + { parentName }: CliCommandContext, +): Promise { + const cli = meowOrExit({ + argv, + config, + importMeta, + parentName, + }) + + const { json = false, markdown = false } = cli.flags + + const dryRun = !!cli.flags['dryRun'] + + // TODO: Implement json/md further. + const outputKind = getOutputKind(json, markdown) + + let [cwd = '.'] = cli.input + // Note: path.resolve vs .join: + // If given path is absolute then cwd should not affect it. + cwd = path.resolve(process.cwd(), cwd) + + const sockJson = readOrDefaultSocketJson(cwd) + + debugFn( + 'inspect', + `override: ${SOCKET_JSON} bazel`, + sockJson?.defaults?.manifest?.bazel, + ) + + let { bazel, bazelFlags, bazelOutputBase, bazelRc, out, verbose } = cli.flags + + // Set defaults for any flag/arg that is not given. Check socket.json first. + if (!bazel) { + const defaultBazel = + sockJson.defaults?.manifest?.bazel?.bazel ?? + sockJson.defaults?.manifest?.bazel?.bin + if (defaultBazel) { + bazel = defaultBazel + logger.info(`Using default --bazel from ${SOCKET_JSON}:`, bazel) + } + // Otherwise leave undefined; resolveBazelBinary performs the PATH + // lookup for bazelisk/bazel. + } + if (!bazelFlags) { + if (sockJson.defaults?.manifest?.bazel?.bazelFlags) { + bazelFlags = sockJson.defaults?.manifest?.bazel?.bazelFlags + logger.info( + `Using default --bazel-flags from ${SOCKET_JSON}:`, + bazelFlags, + ) + } else { + bazelFlags = '' + } + } + if (!bazelOutputBase) { + if (sockJson.defaults?.manifest?.bazel?.bazelOutputBase) { + bazelOutputBase = sockJson.defaults?.manifest?.bazel?.bazelOutputBase + logger.info( + `Using default --bazel-output-base from ${SOCKET_JSON}:`, + bazelOutputBase, + ) + } + } + if (!bazelRc) { + if (sockJson.defaults?.manifest?.bazel?.bazelRc) { + bazelRc = sockJson.defaults?.manifest?.bazel?.bazelRc + logger.info(`Using default --bazel-rc from ${SOCKET_JSON}:`, bazelRc) + } + } + if (!out) { + if (sockJson.defaults?.manifest?.bazel?.out) { + out = sockJson.defaults?.manifest?.bazel?.out + logger.info(`Using default --out from ${SOCKET_JSON}:`, out) + } else { + out = path.join(cwd, '.socket', 'bazel-manifests') + } + } + if (verbose === undefined) { + if (sockJson.defaults?.manifest?.bazel?.verbose !== undefined) { + verbose = sockJson.defaults?.manifest?.bazel?.verbose + logger.info(`Using default --verbose from ${SOCKET_JSON}:`, verbose) + } else { + verbose = false + } + } + + if (verbose) { + logger.group('- ', parentName, config.commandName, ':') + logger.group('- flags:', cli.flags) + logger.groupEnd() + logger.log('- input:', cli.input) + logger.groupEnd() + } + + const wasValidInput = checkCommandInput(outputKind, { + nook: true, + test: cli.input.length <= 1, + message: 'Can only accept one DIR (make sure to escape spaces!)', + fail: 'received ' + cli.input.length, + }) + if (!wasValidInput) { + return + } + + if (verbose) { + logger.group() + logger.info('- cwd:', cwd) + logger.info('- bazel bin:', bazel) + logger.info('- out:', out) + logger.groupEnd() + } + + if (dryRun) { + logger.log(constants.DRY_RUN_BAILING_NOW) + return + } + + await extractBazelToMaven({ + bazelFlags: bazelFlags as string | undefined, + bazelOutputBase: bazelOutputBase as string | undefined, + bazelRc: bazelRc as string | undefined, + bin: bazel as string | undefined, + cwd, + out: out as string, + verbose: Boolean(verbose), + }) +} diff --git a/src/commands/manifest/bazel/cmd-manifest-bazel.test.mts b/src/commands/manifest/bazel/cmd-manifest-bazel.test.mts new file mode 100644 index 000000000..55f12a423 --- /dev/null +++ b/src/commands/manifest/bazel/cmd-manifest-bazel.test.mts @@ -0,0 +1,20 @@ +import { describe, expect } from 'vitest' + +import constants, { + FLAG_CONFIG, + FLAG_DRY_RUN, +} from '../../../../src/constants.mts' +import { cmdit, spawnSocketCli } from '../../../../test/utils.mts' + +describe('socket manifest bazel', async () => { + const { binCliPath } = constants + + cmdit( + ['manifest', 'bazel', FLAG_DRY_RUN, FLAG_CONFIG, '{}'], + 'should exit 0 with dry-run (no bazel on PATH)', + async cmd => { + const { code } = await spawnSocketCli(binCliPath, cmd) + expect(code, 'dry-run should exit with code 0').toBe(0) + }, + ) +}) diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.mts new file mode 100644 index 000000000..0ecd804d3 --- /dev/null +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.mts @@ -0,0 +1,467 @@ +import { + existsSync, + promises as fs, + mkdirSync, + readFileSync, + realpathSync, +} from 'node:fs' +import path from 'node:path' + +import { logger } from '@socketsecurity/registry/lib/logger' + +import { resolveBazelBinary } from './bazel-bin-detect.mts' +import { + parseBazelBuildOutput, + parseUnsortedDepsJson, +} from './bazel-build-parser.mts' +import { ensureJavaOnPath } from './bazel-java-shim.mts' +import { validateOutputBase } from './bazel-output-base-check.mts' +import { provisionPythonShim } from './bazel-python-shim.mts' +import { + buildProbeFor, + runBazelModShowVisibleRepos, +} from './bazel-query-runner.mts' +import { + discoverMavenRepos, + parseVisibleRepoCandidates, +} from './bazel-repo-discovery.mts' +import { + detectWorkspaceMode, + getBazelInvocationFlags, +} from './bazel-workspace-detect.mts' + +import type { ExtractedArtifact } from './bazel-build-parser.mts' +import type { BazelQueryOptions } from './bazel-query-runner.mts' + +export type ExtractBazelOptions = { + bazelFlags: string | undefined + bazelOutputBase: string | undefined + bazelRc: string | undefined + bin: string | undefined + cwd: string + // Optional env override used for python-shim PATH augmentation. + env?: NodeJS.ProcessEnv + out: string + // Use the auto-manifest sibling directory instead of writing directly to `out`. + outLayout?: 'flat' + verbose: boolean +} + +export type ExtractBazelResult = { + artifactCount: number + manifestPath?: string | undefined + ok: boolean +} + +type CoordPair = { groupArtifact: string; version: string } + +// Splits "g:a:v" -> { groupArtifact: "g:a", version: "v" }. +// Returns null on malformed input. +function splitCoord(c: string): CoordPair | null { + const lastColon = c.lastIndexOf(':') + if (lastColon < 1) { + return null + } + return { + groupArtifact: c.slice(0, lastColon), + version: c.slice(lastColon + 1), + } +} + +type MavenInstallJsonCurrent = { + artifacts: Record + dependencies: Record + repositories?: Record +} + +type LabelCoordIndex = { + fullLabels: Map + suffixToCoords: Map> +} + +// Builds a lookup from rule label suffix (e.g. ":com_google_guava_guava") to canonical coord. +function buildLabelToCoordMap(artifacts: ExtractedArtifact[]): LabelCoordIndex { + const fullLabels = new Map() + const suffixToCoords = new Map>() + for (const a of artifacts) { + // The rule name (e.g. "com_google_guava_guava") becomes the path under @//:. + // We record by ":" suffix so we can look up regardless of repo name. + const suffix = `:${a.ruleName}` + const coords = suffixToCoords.get(suffix) ?? new Set() + coords.add(a.mavenCoordinates) + suffixToCoords.set(suffix, coords) + if (a.sourceRepo) { + fullLabels.set(`@${a.sourceRepo}//${suffix}`, a.mavenCoordinates) + } + } + return { fullLabels, suffixToCoords } +} + +// Converts a Bazel dep label to a Maven coordinate, using the label-to-coord map. +// Returns null when the label is not recognised. +function depLabelToCoord( + label: string, + labelToCoord: LabelCoordIndex, +): string | null { + // label may be "@maven//:com_google_guava_failureaccess". + const colon = label.lastIndexOf(':') + if (colon < 0) { + return null + } + const fullMatch = labelToCoord.fullLabels.get(label) + if (fullMatch) { + return fullMatch + } + const key = label.slice(colon) + const suffixMatches = labelToCoord.suffixToCoords.get(key) + if (!suffixMatches) { + return null + } + if (suffixMatches.size > 1) { + throw new Error( + `Ambiguous Bazel dependency label ${label} maps rule suffix ${key} to multiple Maven coordinates: ${Array.from( + suffixMatches, + ) + .sort() + .join( + ', ', + )}. The generated maven_install.json cannot resolve this dependency label losslessly.`, + ) + } + return Array.from(suffixMatches)[0] ?? null +} + +export function normalizeToMavenInstallJson( + artifacts: ExtractedArtifact[], +): MavenInstallJsonCurrent { + const labelToCoord = buildLabelToCoordMap(artifacts) + const out: MavenInstallJsonCurrent = { + artifacts: {}, + dependencies: {}, + } + const versionsByGroupArtifact = new Map() + const dependencySets = new Map>() + for (const a of artifacts) { + const split = splitCoord(a.mavenCoordinates) + if (!split) { + continue + } + const existingVersion = versionsByGroupArtifact.get(split.groupArtifact) + if (existingVersion && existingVersion !== split.version) { + throw new Error( + `Conflicting versions for ${split.groupArtifact}: ${existingVersion}, ${split.version}. The generated maven_install.json cannot represent multiple versions for the same group:artifact losslessly.`, + ) + } + if (!existingVersion) { + versionsByGroupArtifact.set(split.groupArtifact, split.version) + out.artifacts[split.groupArtifact] = { + shasums: a.mavenSha256 ? { jar: a.mavenSha256 } : {}, + version: split.version, + } + } else if ( + a.mavenSha256 && + !out.artifacts[split.groupArtifact]?.shasums.jar + ) { + out.artifacts[split.groupArtifact] = { + shasums: { jar: a.mavenSha256 }, + version: split.version, + } + } + // Dependency keys in maven_install.json use "g:a" (no version), + // matching the canonical rules_jvm_external lockfile shape. + // Only emit an entry when there are actual dependencies (lockfile omits + // artifacts with an empty dep list). + const depKey = split.groupArtifact + const depCoords = dependencySets.get(depKey) ?? new Set() + for (const depLabel of a.deps) { + // First try our rule-label lookup (the common case for --output=build text). + const c = depLabelToCoord(depLabel, labelToCoord) + if (c) { + // c is "g:a:v"; strip the version to produce "g:a" per lockfile shape. + const cs = splitCoord(c) + depCoords.add(cs ? cs.groupArtifact : c) + } else if ( + depLabel.includes(':') && + !depLabel.startsWith('@') && + !depLabel.startsWith(':') + ) { + // unsorted_deps.json deps may be "g:a:v" in older files or + // "g:a" in v2 lock-file-shaped maps. Strip only when a version is + // present. + const parts = depLabel.split(':') + depCoords.add( + parts.length >= 3 ? parts.slice(0, -1).join(':') : depLabel, + ) + } + } + if (depCoords.size) { + dependencySets.set(depKey, depCoords) + } + } + for (const [depKey, depCoords] of dependencySets) { + out.dependencies[depKey] = Array.from(depCoords) + } + return out +} + +// Resolves the bazel `external/` dir for the given workspace. +// +// Bazel's `bazel-out/` convenience symlink points at +// `/execroot//bazel-out/`; the `external/` dir we +// want is at `/external/`. `path.join` is purely lexical and +// would collapse `bazel-out/..` to the cwd itself, which is the wrong place +// Resolve the symlink at the filesystem level and walk up to +// `` instead. +function bazelExternalDir( + cwd: string, + outputBase: string | undefined, +): string | null { + if (outputBase) { + return path.join(outputBase, 'external') + } + const bazelOutLink = path.join(cwd, 'bazel-out') + if (!existsSync(bazelOutLink)) { + return null + } + try { + // realpath follows symlinks: ...//execroot//bazel-out + const real = realpathSync(bazelOutLink) + // Walk up bazel-out -> -> execroot -> , then into external/. + return path.join(real, '..', '..', '..', 'external') + } catch { + return null + } +} + +// Tries `external//unsorted_deps.json` first; falls back to parsing the +// probe stdout the caller already captured during discovery. Discovery runs +// the same `kind("jvm_import rule|aar_import rule", @//:*)` query that +// extraction needs, so reusing its stdout skips one bazel-query invocation +// per repo on the unpinned path (where unsorted_deps.json isn't on disk). +async function extractFromOneRepo( + repoName: string, + queryOpts: BazelQueryOptions, + cachedProbeStdout: string, +): Promise { + const verbose = queryOpts.verbose + // unsorted_deps.json lives under the bazel external dir. + // When --output_base is set, it's under that; otherwise under the workspace's + // bazel-out symlink (resolved via realpath, NOT lexical path.join — the + // lexical form would collapse `bazel-out/..` to cwd and miss the file). + const externalDir = bazelExternalDir(queryOpts.cwd, queryOpts.bazelOutputBase) + if (verbose) { + logger.log( + `[VERBOSE] @${repoName}: external dir:`, + externalDir ?? '(unresolved — bazel-out symlink absent)', + ) + } + const candidates = externalDir + ? [path.join(externalDir, repoName, 'unsorted_deps.json')] + : [] + for (const c of candidates) { + if (existsSync(c)) { + // Bound the read to 1GB to prevent OOM on hostile content while allowing large real-world lockfiles. + // eslint-disable-next-line no-await-in-loop + const stat = await fs.stat(c) + if (stat.size > 1024 * 1024 * 1024) { + logger.warn( + `Skipping oversized ${c} (${stat.size} bytes); falling back to cached probe stdout.`, + ) + break + } + const json = readFileSync(c, 'utf8') + const parsed = parseUnsortedDepsJson(json) + if (parsed.length) { + if (verbose) { + logger.log( + `[VERBOSE] @${repoName}: source=unsorted_deps.json (${c}, ${parsed.length} artifact(s))`, + ) + } + return parsed.map(a => ({ ...a, sourceRepo: repoName })) + } + } else if (verbose) { + logger.log(`[VERBOSE] @${repoName}: unsorted_deps.json miss at`, c) + } + } + // Reuse the probe stdout that discovery already captured for this repo. + // The probe ran exactly this query during validation and only validated + // repos with code === 0 make it into the cache, so retry is unnecessary + // — if the probe was flaky, the repo wouldn't be in the map. + if (!cachedProbeStdout) { + logger.warn( + `No cached probe stdout for @${repoName}; skipping. (This shouldn't happen — discovery should have populated it.)`, + ) + return [] + } + if (verbose) { + logger.log( + `[VERBOSE] @${repoName}: source=cached probe stdout (${cachedProbeStdout.length} bytes)`, + ) + } + return parseBazelBuildOutput(cachedProbeStdout).map(a => ({ + ...a, + sourceRepo: repoName, + })) +} + +export async function extractBazelToMaven( + opts: ExtractBazelOptions, +): Promise { + const { cwd, out, verbose } = opts + logger.group('bazel2maven:') + logger.info(`- src dir: \`${cwd}\``) + logger.info(`- out dir: \`${out}\``) + if (!existsSync(cwd)) { + logger.warn(`Warning: cwd does not exist: ${cwd}`) + } + logger.groupEnd() + + try { + // Validate caller-provided Bazel filesystem settings before invoking Bazel. + if (opts.bazelOutputBase) { + validateOutputBase(opts.bazelOutputBase, opts.cwd) + } + // Java must be available before rules_jvm_external/Coursier runs; + // python shim follows so its augmented PATH inherits the JDK prefix. + ensureJavaOnPath() + const shim = await provisionPythonShim() + const baseEnv = shim.augmentedEnv ?? opts.env + + // Step 1: workspace detection. + const mode = detectWorkspaceMode(cwd) + logger.info( + `Workspace mode: bzlmod=${mode.bzlmod} workspace=${mode.workspace}`, + ) + const invocationFlags = getBazelInvocationFlags(mode) + + // Step 2: bazel binary resolution. + const bin = await resolveBazelBinary(opts.bin) + logger.info(`Using bazel: ${bin}`) + if (verbose) { + logger.log('[VERBOSE] resolved options:', { + bin, + bazelRc: opts.bazelRc ?? '(unset)', + bazelOutputBase: opts.bazelOutputBase ?? '(unset)', + bazelFlags: opts.bazelFlags ?? '(unset)', + invocationFlags, + }) + } + + // Step 3: build the shared query options object. + const queryOpts: BazelQueryOptions = { + bin, + cwd, + invocationFlags, + ...(opts.bazelRc ? { bazelRc: opts.bazelRc } : {}), + ...(opts.bazelFlags ? { bazelFlags: opts.bazelFlags } : {}), + ...(opts.bazelOutputBase + ? { bazelOutputBase: opts.bazelOutputBase } + : {}), + ...(baseEnv ? { env: baseEnv } : {}), + verbose, + } + + // Step 4: discover validated Maven repos via the two-step recipe. + // Bzlmod has a native visible-repository surface; prefer that over static + // MODULE.bazel parsing and keep bounded parsing as the legacy/fallback path. + let nativeCandidates: string[] | undefined + if (mode.bzlmod) { + const visibleRepos = await runBazelModShowVisibleRepos(queryOpts) + if (visibleRepos.code === 0) { + nativeCandidates = parseVisibleRepoCandidates(visibleRepos.stdout) + if (verbose) { + logger.log( + '[VERBOSE] Bzlmod visible repo candidates:', + nativeCandidates, + ) + } + } else if (verbose) { + logger.log( + '[VERBOSE] bazel mod show_repo failed; falling back to static candidate parsing:', + visibleRepos.stderr, + ) + } + } + // Returns Map so extraction can reuse the probe + // output and skip running an identical bazel-query a second time. + const probe = buildProbeFor(queryOpts) + const repos = await discoverMavenRepos( + cwd, + probe, + nativeCandidates, + verbose, + ) + const repoNames = Array.from(repos.keys()) + logger.info( + `Discovered ${repos.size} Maven repo(s): ${repoNames.join(', ') || '(none)'}`, + ) + + // Step 5: extract artifacts from each repo (preferring unsorted_deps.json). + const allArtifacts: ExtractedArtifact[] = [] + for (const [repo, probeStdout] of repos) { + // eslint-disable-next-line no-await-in-loop + const artifacts = await extractFromOneRepo(repo, queryOpts, probeStdout) + allArtifacts.push(...artifacts) + logger.info(`@${repo}: ${artifacts.length} artifact(s)`) + } + + // Step 6: normalize to maven_install.json shape. + const normalized = normalizeToMavenInstallJson(allArtifacts) + + // Step 7: write outputs. + // Standalone output writes directly to `out`; auto-manifest uses a sibling directory + // to avoid colliding with a repo's checked-in rules_jvm_external lockfile and + // to avoid repo-root gitignore patterns such as `/maven_install.json`. + const layout = opts.outLayout ?? 'standalone' + const manifestDir = + layout === 'flat' ? path.join(out, '.socket-auto-manifest') : out + mkdirSync(manifestDir, { recursive: true }) + const manifestPath = path.join(manifestDir, 'maven_install.json') + await fs.writeFile( + manifestPath, + JSON.stringify(normalized, null, 2), + 'utf8', + ) + + if (verbose) { + logger.log('[VERBOSE] outputs:', { + artifactCount: allArtifacts.length, + generatedManifest: path.relative(out, manifestPath), + layout, + manifest: manifestPath, + mavenRepos: repoNames, + tool: 'socket manifest bazel', + workspace: { bzlmod: mode.bzlmod, legacyWorkspace: mode.workspace }, + }) + } + + if (!allArtifacts.length) { + process.exitCode = 1 + logger.fail('No Maven artifacts extracted. See warnings above.') + return { artifactCount: 0, manifestPath, ok: false } + } + logger.success( + `Wrote ${allArtifacts.length} artifact(s) to ${path.relative(cwd, manifestPath)}.`, + ) + return { + artifactCount: allArtifacts.length, + manifestPath, + ok: true, + } + } catch (e) { + process.exitCode = 1 + // Always surface the error message; users should not have to + // re-run a multi-minute bazel build with --verbose just to see whether + // the failure was a missing dependency, permission error, or network blip. + const msg = e instanceof Error ? e.message : String(e) + logger.fail(`Unexpected error in bazel2maven: ${msg}`) + if (verbose) { + logger.group('[VERBOSE] error:') + logger.log(e) + logger.groupEnd() + } else { + logger.info('Re-run with --verbose for the full stack.') + } + return { artifactCount: 0, ok: false } + } +} diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts new file mode 100644 index 000000000..33f485c9e --- /dev/null +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts @@ -0,0 +1,437 @@ +import { + existsSync, + mkdtempSync, + readFileSync, + readdirSync, + rmSync, +} from 'node:fs' +import os from 'node:os' +import path from 'node:path' + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +// Mock the helpers BEFORE importing the orchestrator. +vi.mock('./bazel-workspace-detect.mts', () => ({ + detectWorkspaceMode: vi.fn(), + getBazelInvocationFlags: vi.fn(() => []), +})) +vi.mock('./bazel-bin-detect.mts', () => ({ + resolveBazelBinary: vi.fn(async () => '/usr/local/bin/bazel'), +})) +vi.mock('./bazel-repo-discovery.mts', () => ({ + discoverMavenRepos: vi.fn(), + parseVisibleRepoCandidates: vi.fn(() => []), + parseMavenRepoCandidates: vi.fn(), + validateMavenRepo: vi.fn(), +})) +const { probe } = vi.hoisted(() => ({ + probe: async () => ({ code: 0, stdout: 'maven_coordinates=' }), +})) +vi.mock('./bazel-query-runner.mts', () => ({ + buildProbeFor: vi.fn(() => probe), + runBazelModShowVisibleRepos: vi.fn(async () => ({ + code: 0, + stderr: '', + stdout: '', + })), + runBazelQuery: vi.fn(), +})) +// Mock hardening helpers so unit tests run without real fs/network side-effects. +vi.mock('./bazel-output-base-check.mts', () => ({ + validateOutputBase: vi.fn(), +})) +vi.mock('./bazel-python-shim.mts', () => ({ + provisionPythonShim: vi.fn(async () => ({ + augmentedEnv: undefined, + shimDir: undefined, + })), +})) +// ensureJavaOnPath now throws when java is missing; unit tests must not +// depend on the host having a JDK installed. +vi.mock('./bazel-java-shim.mts', () => ({ + ensureJavaOnPath: vi.fn(), +})) + +import { validateOutputBase } from './bazel-output-base-check.mts' +import { discoverMavenRepos } from './bazel-repo-discovery.mts' +import { detectWorkspaceMode } from './bazel-workspace-detect.mts' +import { + extractBazelToMaven, + normalizeToMavenInstallJson, +} from './extract_bazel_to_maven.mts' + +const FIXTURES = path.join( + import.meta.dirname, + '..', + '..', + '..', + '..', + 'test', + 'fixtures', + 'manifest-bazel', + 'query-output', +) + +// Walk a directory recursively and return all file paths. +function walk(dir: string): string[] { + const acc: string[] = [] + for (const e of readdirSync(dir, { withFileTypes: true })) { + const p = path.join(dir, e.name) + if (e.isDirectory()) { + acc.push(...walk(p)) + } else { + acc.push(p) + } + } + return acc +} + +describe('extractBazelToMaven', () => { + let tmp: string + + beforeEach(() => { + tmp = mkdtempSync(path.join(os.tmpdir(), 'bazel-extract-')) + vi.mocked(detectWorkspaceMode).mockReturnValue({ + bzlmod: true, + workspace: false, + }) + process.exitCode = 0 + }) + + afterEach(() => { + rmSync(tmp, { recursive: true, force: true }) + vi.resetAllMocks() + process.exitCode = 0 + }) + + it('dedupes exact duplicate coordinates without failing', () => { + const manifest = normalizeToMavenInstallJson([ + { + ruleKind: 'jvm_import', + ruleName: 'com_example_demo', + mavenCoordinates: 'com.example:demo:1.0.0', + deps: [], + }, + { + ruleKind: 'jvm_import', + ruleName: 'com_example_demo', + mavenCoordinates: 'com.example:demo:1.0.0', + deps: [], + }, + ]) + + expect(Object.keys(manifest.artifacts)).toEqual(['com.example:demo']) + expect(manifest.artifacts['com.example:demo']).toEqual({ + shasums: {}, + version: '1.0.0', + }) + }) + + it('fails on duplicate label suffixes when dependency resolution is ambiguous', () => { + expect(() => + normalizeToMavenInstallJson([ + { + ruleKind: 'jvm_import', + ruleName: 'root', + mavenCoordinates: 'com.example:root:1.0.0', + deps: [':shared_rule_name'], + }, + { + ruleKind: 'jvm_import', + ruleName: 'shared_rule_name', + mavenCoordinates: 'com.one:lib:1.0.0', + deps: [], + }, + { + ruleKind: 'jvm_import', + ruleName: 'shared_rule_name', + mavenCoordinates: 'com.two:lib:1.0.0', + deps: [], + }, + ]), + ).toThrow(/Ambiguous Bazel dependency label :shared_rule_name/) + }) + + it('writes maven_install.json directly under out without a summary sidecar', async () => { + const sample = readFileSync( + path.join(FIXTURES, 'jvm-import-sample.txt'), + 'utf8', + ) + vi.mocked(discoverMavenRepos).mockResolvedValue( + new Map([['maven', sample]]), + ) + + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + verbose: false, + }) + expect(result).toEqual({ + artifactCount: 2, + manifestPath: path.join(tmp, 'maven_install.json'), + ok: true, + }) + + const manifestText = readFileSync( + path.join(tmp, 'maven_install.json'), + 'utf8', + ) + const manifest = JSON.parse(manifestText) + expect(manifest.artifacts['com.google.guava:guava']).toEqual({ + shasums: { jar: expect.stringMatching(/^9408c2c4/) }, + version: '33.0.0-jre', + }) + // Per the canonical rules_jvm_external maven_install.json shape (see + // normalizeToMavenInstallJson), dependency keys and values use "g:a" + // (no version) — matching rules_jvm_external lockfile output. + expect(manifest.dependencies['com.google.guava:guava']).toContain( + 'com.google.guava:failureaccess', + ) + + expect(existsSync(path.join(tmp, 'socket-bazel-summary.json'))).toBe(false) + expect(existsSync(path.join(tmp, '_whole_repo'))).toBe(false) + }) + + it('writes outputs to .socket-auto-manifest/ when outLayout is "flat"', async () => { + const sample = readFileSync( + path.join(FIXTURES, 'jvm-import-sample.txt'), + 'utf8', + ) + vi.mocked(discoverMavenRepos).mockResolvedValue( + new Map([['maven', sample]]), + ) + + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result).toEqual({ + artifactCount: 2, + manifestPath: path.join( + tmp, + '.socket-auto-manifest', + 'maven_install.json', + ), + ok: true, + }) + + // Manifest lands inside the sibling dir. + expect( + existsSync(path.join(tmp, '.socket-auto-manifest', 'maven_install.json')), + ).toBe(true) + expect( + existsSync( + path.join(tmp, '.socket-auto-manifest', 'socket-bazel-summary.json'), + ), + ).toBe(false) + // Neither output bleeds into / itself nor a _whole_repo/ wrapper. + expect(existsSync(path.join(tmp, 'maven_install.json'))).toBe(false) + expect(existsSync(path.join(tmp, 'socket-bazel-summary.json'))).toBe(false) + expect(existsSync(path.join(tmp, '_whole_repo'))).toBe(false) + }) + + it('writes NO .socket.facts.json files anywhere under out', async () => { + const sample = readFileSync( + path.join(FIXTURES, 'jvm-import-sample.txt'), + 'utf8', + ) + vi.mocked(discoverMavenRepos).mockResolvedValue( + new Map([['maven', sample]]), + ) + + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + verbose: false, + }) + + const files = walk(tmp) + expect( + files.find(f => path.basename(f) === '.socket.facts.json'), + ).toBeUndefined() + expect(result.ok).toBe(true) + }) + + it('sets process.exitCode = 1 and writes empty maven_install.json when no repos discovered', async () => { + vi.mocked(discoverMavenRepos).mockResolvedValue(new Map()) + + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + verbose: false, + }) + + expect(process.exitCode).toBe(1) + expect(result).toEqual({ + artifactCount: 0, + manifestPath: path.join(tmp, 'maven_install.json'), + ok: false, + }) + // Empty manifest is still written. + const manifestText = readFileSync( + path.join(tmp, 'maven_install.json'), + 'utf8', + ) + const manifest = JSON.parse(manifestText) + expect(manifest.artifacts).toEqual({}) + }) + + it('iterates each discovered repo independently when one has no parseable rules', async () => { + const sample = readFileSync( + path.join(FIXTURES, 'jvm-import-sample.txt'), + 'utf8', + ) + // First repo's probe stdout has the canonical sample (2 artifacts). + // Second repo's probe stdout has no parseable jvm_import / aar_import + // blocks, so the parser yields 0 artifacts for it — the iteration must + // still surface the first repo's results. + vi.mocked(discoverMavenRepos).mockResolvedValue( + new Map([ + ['maven', sample], + ['maven_test', '# no rules here\n'], + ]), + ) + + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + verbose: false, + }) + + const manifest = JSON.parse( + readFileSync(path.join(tmp, 'maven_install.json'), 'utf8'), + ) + // Only the successful repo's artifacts (2); maven_test was skipped. + expect(Object.keys(manifest.artifacts)).toHaveLength(2) + expect(result).toEqual({ + artifactCount: 2, + manifestPath: path.join(tmp, 'maven_install.json'), + ok: true, + }) + }) + + it('sets process.exitCode = 1 when one group:artifact has conflicting versions', async () => { + const conflictingStdout = [ + 'jvm_import(', + ' name = "com_example_demo_v1",', + ' maven_coordinates = "com.example:demo:1.0.0",', + ')', + 'jvm_import(', + ' name = "com_example_demo_v2",', + ' maven_coordinates = "com.example:demo:2.0.0",', + ')', + ].join('\n') + vi.mocked(discoverMavenRepos).mockResolvedValue( + new Map([['maven', conflictingStdout]]), + ) + + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + verbose: false, + }) + + expect(process.exitCode).toBe(1) + expect(result).toEqual({ + artifactCount: 0, + ok: false, + }) + expect(existsSync(path.join(tmp, 'maven_install.json'))).toBe(false) + }) + + it('calls validateOutputBase when bazelOutputBase is set', async () => { + vi.mocked(discoverMavenRepos).mockResolvedValue(new Map()) + await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: tmp, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + verbose: false, + }) + // validateOutputBase is mocked; verify it was called with the provided path. + expect(vi.mocked(validateOutputBase)).toHaveBeenCalledWith(tmp, tmp) + }) + + it('propagates verbose into discovery and emits resolved-options / outputs diagnostics', async () => { + const sample = readFileSync( + path.join(FIXTURES, 'jvm-import-sample.txt'), + 'utf8', + ) + vi.mocked(discoverMavenRepos).mockResolvedValue( + new Map([['maven', sample]]), + ) + const { logger } = await import('@socketsecurity/registry/lib/logger') + const logSpy = vi.spyOn(logger, 'log').mockImplementation(() => logger) + + try { + await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + verbose: true, + }) + + const text = logSpy.mock.calls + .map(args => + args + .map(a => (typeof a === 'string' ? a : JSON.stringify(a))) + .join(' '), + ) + .join('\n') + // Resolved-options block — names a few known load-bearing fields. + expect(text).toContain('[VERBOSE] resolved options:') + expect(text).toContain('bin') + expect(text).toContain('bazelRc') + expect(text).toContain('bazelOutputBase') + // Outputs block names manifest path and extracted summary fields. + expect(text).toContain('[VERBOSE] outputs:') + expect(text).toContain('manifest') + expect(text).toContain('artifactCount') + expect(text).toContain('generatedManifest') + expect(text).toContain('mavenRepos') + + // Discovery was called with verbose=true as the 4th positional. The + // 3rd positional reflects whatever parseVisibleRepoCandidates returned + // (an empty array in this mocked setup). + expect(vi.mocked(discoverMavenRepos)).toHaveBeenCalledWith( + expect.any(String), + expect.any(Function), + expect.any(Array), + true, + ) + } finally { + logSpy.mockRestore() + } + }) +}) diff --git a/src/commands/manifest/cmd-manifest.mts b/src/commands/manifest/cmd-manifest.mts index 266352828..cd617dd79 100644 --- a/src/commands/manifest/cmd-manifest.mts +++ b/src/commands/manifest/cmd-manifest.mts @@ -1,3 +1,4 @@ +import { cmdManifestBazel } from './bazel/cmd-manifest-bazel.mts' import { cmdManifestAuto } from './cmd-manifest-auto.mts' import { cmdManifestCdxgen } from './cmd-manifest-cdxgen.mts' import { cmdManifestConda } from './cmd-manifest-conda.mts' @@ -37,8 +38,8 @@ const config: CliCommandConfig = { configurations available. See \`manifest --help\` for usage details per language. - Currently supported language: scala [beta], gradle [beta], kotlin (through - gradle) [beta]. + Currently supported language: bazel [beta], gradle [beta], kotlin (through + gradle) [beta], scala [beta]. Examples @@ -68,6 +69,7 @@ async function run( importMeta, subcommands: { auto: cmdManifestAuto, + bazel: cmdManifestBazel, cdxgen: cmdManifestCdxgen, conda: cmdManifestConda, gradle: cmdManifestGradle, diff --git a/src/commands/manifest/cmd-manifest.test.mts b/src/commands/manifest/cmd-manifest.test.mts index b463d5245..2973eba1e 100644 --- a/src/commands/manifest/cmd-manifest.test.mts +++ b/src/commands/manifest/cmd-manifest.test.mts @@ -24,6 +24,7 @@ describe('socket manifest', async () => { Commands auto Auto-detect build and attempt to generate manifest file + bazel [beta] Bazel JVM SBOM support \\u2014 generate manifest files (\`maven_install.json\`) for a Bazel/Maven project cdxgen Run cdxgen for SBOM generation conda [beta] Convert a Conda environment.yml file to a python requirements.txt gradle [beta] Use Gradle to generate a manifest file (\`pom.xml\`) for a Gradle/Java/Kotlin/etc project diff --git a/src/commands/manifest/detect-manifest-actions.mts b/src/commands/manifest/detect-manifest-actions.mts index 1eaff8a5c..b03309bd0 100644 --- a/src/commands/manifest/detect-manifest-actions.mts +++ b/src/commands/manifest/detect-manifest-actions.mts @@ -15,6 +15,7 @@ import { import type { SocketJson } from '../../utils/socket-json.mts' export interface GeneratableManifests { + bazel: boolean cdxgen: boolean count: number conda: boolean @@ -29,6 +30,7 @@ export async function detectManifestActions( cwd = process.cwd(), ): Promise { const output = { + bazel: false, cdxgen: false, // TODO count: 0, conda: false, @@ -36,6 +38,21 @@ export async function detectManifestActions( sbt: false, } + if (sockJson?.defaults?.manifest?.bazel?.disabled) { + debugLog( + 'notice', + `[DEBUG] - bazel auto-detection is disabled in ${SOCKET_JSON}`, + ) + } else if ( + existsSync(path.join(cwd, 'MODULE.bazel')) || + existsSync(path.join(cwd, 'WORKSPACE')) || + existsSync(path.join(cwd, 'WORKSPACE.bazel')) + ) { + debugLog('notice', '[DEBUG] - Detected a Bazel workspace') + output.bazel = true + output.count += 1 + } + if (sockJson?.defaults?.manifest?.sbt?.disabled) { debugLog( 'notice', diff --git a/src/commands/manifest/detect-manifest-actions.test.mts b/src/commands/manifest/detect-manifest-actions.test.mts new file mode 100644 index 000000000..116d50d70 --- /dev/null +++ b/src/commands/manifest/detect-manifest-actions.test.mts @@ -0,0 +1,75 @@ +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import os from 'node:os' +import path from 'node:path' + +import { afterEach, beforeEach, describe, expect, it } from 'vitest' + +import { detectManifestActions } from './detect-manifest-actions.mts' + +import type { SocketJson } from '../../utils/socket-json.mts' + +function mkTmp(): string { + return mkdtempSync(path.join(os.tmpdir(), 'detect-manifest-bazel-')) +} + +function touch(dir: string, name: string): void { + writeFileSync(path.join(dir, name), '') +} + +describe('detectManifestActions — bazel detector', () => { + let cwd: string + + beforeEach(() => { + cwd = mkTmp() + }) + + afterEach(() => { + rmSync(cwd, { recursive: true, force: true }) + }) + + it('skips bazel when defaults.manifest.bazel.disabled is true', async () => { + touch(cwd, 'MODULE.bazel') + const result = await detectManifestActions( + { + defaults: { manifest: { bazel: { disabled: true } } }, + } as SocketJson, + cwd, + ) + expect(result.bazel).toBe(false) + expect(result.count).toBe(0) + }) + + it('detects MODULE.bazel', async () => { + touch(cwd, 'MODULE.bazel') + const result = await detectManifestActions(null, cwd) + expect(result.bazel).toBe(true) + expect(result.count).toBe(1) + }) + + it('detects WORKSPACE', async () => { + touch(cwd, 'WORKSPACE') + const result = await detectManifestActions(null, cwd) + expect(result.bazel).toBe(true) + }) + + it('detects WORKSPACE.bazel', async () => { + touch(cwd, 'WORKSPACE.bazel') + const result = await detectManifestActions(null, cwd) + expect(result.bazel).toBe(true) + }) + + it('does not detect bazel when no marker present', async () => { + const result = await detectManifestActions(null, cwd) + expect(result.bazel).toBe(false) + expect(result.count).toBe(0) + }) + + it('co-detects bazel and gradle when both markers are present', async () => { + touch(cwd, 'MODULE.bazel') + touch(cwd, 'gradlew') + const result = await detectManifestActions(null, cwd) + expect(result.bazel).toBe(true) + expect(result.gradle).toBe(true) + expect(result.count).toBe(2) + }) +}) diff --git a/src/commands/manifest/generate_auto_manifest.mts b/src/commands/manifest/generate_auto_manifest.mts index ecb2fe19f..b2d059821 100644 --- a/src/commands/manifest/generate_auto_manifest.mts +++ b/src/commands/manifest/generate_auto_manifest.mts @@ -2,6 +2,7 @@ import path from 'node:path' import { logger } from '@socketsecurity/registry/lib/logger' +import { extractBazelToMaven } from './bazel/extract_bazel_to_maven.mts' import { convertGradleToMaven } from './convert_gradle_to_maven.mts' import { convertSbtToMaven } from './convert_sbt_to_maven.mts' import { handleManifestConda } from './handle-manifest-conda.mts' @@ -11,6 +12,10 @@ import { readOrDefaultSocketJson } from '../../utils/socket-json.mts' import type { GeneratableManifests } from './detect-manifest-actions.mts' import type { OutputKind } from '../../types.mts' +export type GenerateAutoManifestResult = { + generatedFiles: string[] +} + export async function generateAutoManifest({ cwd, detected, @@ -21,8 +26,9 @@ export async function generateAutoManifest({ cwd: string outputKind: OutputKind verbose: boolean -}) { +}): Promise { const sockJson = readOrDefaultSocketJson(cwd) + const generatedFiles: string[] = [] if (verbose) { logger.info(`Using this ${SOCKET_JSON} for defaults:`, sockJson) @@ -77,4 +83,32 @@ export async function generateAutoManifest({ verbose: Boolean(sockJson.defaults?.manifest?.conda?.verbose), }) } + + if (!sockJson?.defaults?.manifest?.bazel?.disabled && detected.bazel) { + const bazelConfig = sockJson?.defaults?.manifest?.bazel + logger.log( + 'Detected a Bazel workspace, extracting Maven dependencies via bazel query...', + ) + const bazelResult = await extractBazelToMaven({ + bazelFlags: bazelConfig?.bazelFlags, + bazelOutputBase: bazelConfig?.bazelOutputBase, + bazelRc: bazelConfig?.bazelRc, + bin: bazelConfig?.bazel ?? bazelConfig?.bin, + cwd, + // Auto-manifest writes into a sibling directory instead of the repo root + // so scan discovery can pick it up without colliding with a checked-in + // rules_jvm_external lockfile or repo-root gitignore patterns. + out: bazelConfig?.out ?? cwd, + outLayout: 'flat', + verbose: Boolean(bazelConfig?.verbose) || verbose, + }) + if (!bazelResult.ok) { + throw new Error('Bazel auto-manifest generation failed') + } + if (bazelResult.manifestPath) { + generatedFiles.push(bazelResult.manifestPath) + } + } + + return { generatedFiles } } diff --git a/src/commands/manifest/generate_auto_manifest.test.mts b/src/commands/manifest/generate_auto_manifest.test.mts new file mode 100644 index 000000000..7f803b9fc --- /dev/null +++ b/src/commands/manifest/generate_auto_manifest.test.mts @@ -0,0 +1,194 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +// Mock the helpers BEFORE importing generateAutoManifest. +vi.mock('./bazel/extract_bazel_to_maven.mts', () => ({ + extractBazelToMaven: vi.fn(async () => ({ + artifactCount: 1, + manifestPath: '/tmp/repo/.socket-auto-manifest/maven_install.json', + ok: true, + })), +})) +vi.mock('./convert_gradle_to_maven.mts', () => ({ + convertGradleToMaven: vi.fn(async () => undefined), +})) +vi.mock('./convert_sbt_to_maven.mts', () => ({ + convertSbtToMaven: vi.fn(async () => undefined), +})) +vi.mock('./handle-manifest-conda.mts', () => ({ + handleManifestConda: vi.fn(async () => undefined), +})) +vi.mock('../../utils/socket-json.mts', () => ({ + readOrDefaultSocketJson: vi.fn(() => ({})), +})) + +import { extractBazelToMaven } from './bazel/extract_bazel_to_maven.mts' +import { convertGradleToMaven } from './convert_gradle_to_maven.mts' +import { generateAutoManifest } from './generate_auto_manifest.mts' +import { readOrDefaultSocketJson } from '../../utils/socket-json.mts' + +import type { SocketJson } from '../../utils/socket-json.mts' + +const baseDetected = { + bazel: false, + cdxgen: false, + conda: false, + count: 0, + gradle: false, + sbt: false, +} + +describe('generateAutoManifest — bazel branch', () => { + beforeEach(() => { + vi.mocked(extractBazelToMaven).mockClear() + vi.mocked(convertGradleToMaven).mockClear() + vi.mocked(readOrDefaultSocketJson).mockReturnValue({} as SocketJson) + vi.mocked(extractBazelToMaven).mockResolvedValue({ + artifactCount: 1, + manifestPath: '/tmp/repo/.socket-auto-manifest/maven_install.json', + ok: true, + }) + }) + + it('calls extractBazelToMaven with outLayout: "flat" and out===cwd when bazel detected and not disabled', async () => { + await generateAutoManifest({ + cwd: '/tmp/repo', + detected: { ...baseDetected, bazel: true, count: 1 }, + outputKind: 'text', + verbose: false, + }) + expect(extractBazelToMaven).toHaveBeenCalledTimes(1) + expect(extractBazelToMaven).toHaveBeenCalledWith( + expect.objectContaining({ + cwd: '/tmp/repo', + out: '/tmp/repo', + outLayout: 'flat', + }), + ) + }) + + it('does NOT call extractBazelToMaven when defaults.manifest.bazel.disabled is true', async () => { + vi.mocked(readOrDefaultSocketJson).mockReturnValue({ + defaults: { manifest: { bazel: { disabled: true } } }, + } as SocketJson) + await generateAutoManifest({ + cwd: '/tmp/repo', + detected: { ...baseDetected, bazel: true, count: 1 }, + outputKind: 'text', + verbose: false, + }) + expect(extractBazelToMaven).not.toHaveBeenCalled() + }) + + it('plumbs bazel and bazelFlags from socket.json defaults', async () => { + vi.mocked(readOrDefaultSocketJson).mockReturnValue({ + defaults: { + manifest: { + bazel: { + bazel: '/usr/local/bin/bazelisk', + bazelFlags: '--config=ci', + }, + }, + }, + } as SocketJson) + await generateAutoManifest({ + cwd: '/tmp/repo', + detected: { ...baseDetected, bazel: true, count: 1 }, + outputKind: 'text', + verbose: false, + }) + expect(extractBazelToMaven).toHaveBeenCalledWith( + expect.objectContaining({ + bin: '/usr/local/bin/bazelisk', + bazelFlags: '--config=ci', + }), + ) + }) + + it('falls back to defaults.manifest.bazel.bin for compatibility', async () => { + vi.mocked(readOrDefaultSocketJson).mockReturnValue({ + defaults: { + manifest: { + bazel: { + bin: '/compat/bin/bazelisk', + }, + }, + }, + } as SocketJson) + await generateAutoManifest({ + cwd: '/tmp/repo', + detected: { ...baseDetected, bazel: true, count: 1 }, + outputKind: 'text', + verbose: false, + }) + expect(extractBazelToMaven).toHaveBeenCalledWith( + expect.objectContaining({ + bin: '/compat/bin/bazelisk', + }), + ) + }) + + it('returns generated Bazel sidecar manifests', async () => { + const result = await generateAutoManifest({ + cwd: '/tmp/repo', + detected: { ...baseDetected, bazel: true, count: 1 }, + outputKind: 'text', + verbose: false, + }) + + expect(result.generatedFiles).toEqual([ + '/tmp/repo/.socket-auto-manifest/maven_install.json', + ]) + }) + + it('throws when Bazel extraction fails', async () => { + vi.mocked(extractBazelToMaven).mockResolvedValueOnce({ + artifactCount: 0, + ok: false, + }) + + await expect( + generateAutoManifest({ + cwd: '/tmp/repo', + detected: { ...baseDetected, bazel: true, count: 1 }, + outputKind: 'text', + verbose: false, + }), + ).rejects.toThrow('Bazel auto-manifest generation failed') + }) + + it('runs BOTH bazel and gradle branches when both are detected', async () => { + await generateAutoManifest({ + cwd: '/tmp/repo', + detected: { + ...baseDetected, + bazel: true, + gradle: true, + count: 2, + }, + outputKind: 'text', + verbose: false, + }) + expect(extractBazelToMaven).toHaveBeenCalledTimes(1) + expect(convertGradleToMaven).toHaveBeenCalledTimes(1) + }) + + it('honors socket.json out override (user-supplied .socket-auto-manifest dir)', async () => { + vi.mocked(readOrDefaultSocketJson).mockReturnValue({ + defaults: { + manifest: { bazel: { out: './.socket-auto-manifest' } }, + }, + } as SocketJson) + await generateAutoManifest({ + cwd: '/tmp/repo', + detected: { ...baseDetected, bazel: true, count: 1 }, + outputKind: 'text', + verbose: false, + }) + expect(extractBazelToMaven).toHaveBeenCalledWith( + expect.objectContaining({ + out: './.socket-auto-manifest', + outLayout: 'flat', + }), + ) + }) +}) diff --git a/src/commands/scan/handle-create-new-scan.mts b/src/commands/scan/handle-create-new-scan.mts index aa660f58f..33d8b1df6 100644 --- a/src/commands/scan/handle-create-new-scan.mts +++ b/src/commands/scan/handle-create-new-scan.mts @@ -111,6 +111,8 @@ export async function handleCreateNewScan({ tmp, workspace, }: HandleCreateNewScanConfig): Promise { + let scanTargets = targets + debugFn( 'notice', `Creating new scan for ${orgSlug}/${workspace ? `${workspace}/` : ''}${repoName}`, @@ -137,12 +139,17 @@ export async function handleCreateNewScan({ const sockJson = readOrDefaultSocketJson(cwd) const detected = await detectManifestActions(sockJson, cwd) debugDir('inspect', { detected }) - await generateAutoManifest({ + const autoManifestResult = await generateAutoManifest({ detected, cwd, outputKind, verbose: false, }) + if (autoManifestResult.generatedFiles.length) { + scanTargets = Array.from( + new Set([...targets, ...autoManifestResult.generatedFiles]), + ) + } logger.info('Auto-generation finished. Proceeding with Scan creation.') } @@ -180,11 +187,15 @@ export async function handleCreateNewScan({ target: targets[0]!, }) - const packagePaths = await getPackageFilesForScan(targets, supportedFiles, { - additionalIgnores: additionalScaIgnores, - config: socketConfig, - cwd, - }) + const packagePaths = await getPackageFilesForScan( + scanTargets, + supportedFiles, + { + additionalIgnores: additionalScaIgnores, + config: socketConfig, + cwd, + }, + ) spinner.successAndStop( `Found ${packagePaths.length} ${pluralize('file', packagePaths.length)} to include in scan.`, diff --git a/src/commands/scan/handle-create-new-scan.test.mts b/src/commands/scan/handle-create-new-scan.test.mts index 29f5f9974..0998e9530 100644 --- a/src/commands/scan/handle-create-new-scan.test.mts +++ b/src/commands/scan/handle-create-new-scan.test.mts @@ -2,10 +2,13 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' import { handleCreateNewScan } from './handle-create-new-scan.mts' +import type { HandleCreateNewScanConfig } from './handle-create-new-scan.mts' + const { mockFetchCreateOrgFullScan, mockFetchSupportedScanFileNames, mockFindSocketYmlSync, + mockGenerateAutoManifest, mockGetPackageFilesForScan, mockPerformReachabilityAnalysis, mockReadOrDefaultSocketJson, @@ -13,6 +16,7 @@ const { mockFetchCreateOrgFullScan: vi.fn(), mockFetchSupportedScanFileNames: vi.fn(), mockFindSocketYmlSync: vi.fn(), + mockGenerateAutoManifest: vi.fn(), mockGetPackageFilesForScan: vi.fn(), mockPerformReachabilityAnalysis: vi.fn(), mockReadOrDefaultSocketJson: vi.fn(), @@ -59,9 +63,57 @@ vi.mock('../manifest/detect-manifest-actions.mts', () => ({ })) vi.mock('../manifest/generate_auto_manifest.mts', () => ({ - generateAutoManifest: vi.fn(), + generateAutoManifest: mockGenerateAutoManifest, })) +function createConfig( + overrides: Partial = {}, +): HandleCreateNewScanConfig { + return { + autoManifest: false, + branchName: 'main', + commitHash: '', + commitMessage: '', + committers: '', + cwd: '/repo', + defaultBranch: false, + interactive: false, + orgSlug: 'fakeOrg', + outputKind: 'text', + pendingHead: false, + pullRequest: 0, + reach: { + excludePaths: [], + reachAnalysisMemoryLimit: 8192, + reachAnalysisTimeout: 0, + reachConcurrency: 1, + reachContinueOnAnalysisErrors: false, + reachContinueOnInstallErrors: false, + reachContinueOnMissingLockFiles: false, + reachContinueOnNoSourceFiles: false, + reachDebug: false, + reachDetailedAnalysisLogFile: false, + reachDisableAnalytics: false, + reachDisableExternalToolChecks: false, + reachEcosystems: [], + reachEnableAnalysisSplitting: false, + reachExcludePaths: [], + reachLazyMode: false, + reachSkipCache: false, + reachUseOnlyPregeneratedSboms: false, + reachVersion: undefined, + runReachabilityAnalysis: false, + }, + readOnly: false, + repoName: 'repo', + report: false, + reportLevel: 'error', + targets: ['/repo'], + tmp: false, + ...overrides, + } +} + describe('handleCreateNewScan excludePaths', () => { beforeEach(() => { vi.clearAllMocks() @@ -78,6 +130,7 @@ describe('handleCreateNewScan excludePaths', () => { data: { parsed: { projectIgnorePaths: ['fixtures/**'] } }, ok: true, }) + mockGenerateAutoManifest.mockResolvedValue({ generatedFiles: [] }) mockGetPackageFilesForScan.mockResolvedValue(['package.json']) mockPerformReachabilityAnalysis.mockResolvedValue({ data: { @@ -89,6 +142,43 @@ describe('handleCreateNewScan excludePaths', () => { mockReadOrDefaultSocketJson.mockReturnValue({}) }) + it('includes generated auto-manifest files in SCA discovery targets', async () => { + mockGenerateAutoManifest.mockResolvedValueOnce({ + generatedFiles: ['/repo/.socket-auto-manifest/maven_install.json'], + }) + + await handleCreateNewScan( + createConfig({ + autoManifest: true, + targets: ['/repo/apps/api'], + }), + ) + + expect(mockGetPackageFilesForScan).toHaveBeenCalledWith( + ['/repo/apps/api', '/repo/.socket-auto-manifest/maven_install.json'], + { size: 1 }, + { + additionalIgnores: [], + config: { projectIgnorePaths: ['fixtures/**'] }, + cwd: '/repo', + }, + ) + expect(mockFetchCreateOrgFullScan).toHaveBeenCalled() + }) + + it('aborts before scan creation when auto-manifest generation fails', async () => { + mockGenerateAutoManifest.mockRejectedValueOnce( + new Error('Bazel auto-manifest generation failed'), + ) + + await expect( + handleCreateNewScan(createConfig({ autoManifest: true })), + ).rejects.toThrow('Bazel auto-manifest generation failed') + + expect(mockFetchSupportedScanFileNames).not.toHaveBeenCalled() + expect(mockFetchCreateOrgFullScan).not.toHaveBeenCalled() + }) + it('adds excludePaths to manifest discovery and reachability excludes', async () => { await handleCreateNewScan({ autoManifest: false, diff --git a/src/utils/socket-json.mts b/src/utils/socket-json.mts index c3bde9e87..331c0be05 100644 --- a/src/utils/socket-json.mts +++ b/src/utils/socket-json.mts @@ -39,6 +39,16 @@ export interface SocketJson { defaults?: { manifest?: { + bazel?: { + bazel?: string | undefined + bazelFlags?: string | undefined + bazelOutputBase?: string | undefined + bazelRc?: string | undefined + bin?: string | undefined + disabled?: boolean | undefined + out?: string | undefined + verbose?: boolean | undefined + } conda?: { disabled?: boolean | undefined infile?: string | undefined diff --git a/test/fixtures/manifest-bazel/bzlmod-only/MODULE.bazel b/test/fixtures/manifest-bazel/bzlmod-only/MODULE.bazel new file mode 100644 index 000000000..1907cd3ff --- /dev/null +++ b/test/fixtures/manifest-bazel/bzlmod-only/MODULE.bazel @@ -0,0 +1,12 @@ +module(name = "bzlmod_only_fixture", version = "0.0.0") + +bazel_dep(name = "rules_jvm_external", version = "6.0") + +maven = use_extension("@rules_jvm_external//:extensions.bzl", "maven") +maven.install( + artifacts = [ + "com.google.guava:guava:33.0.0-jre", + "junit:junit:4.13.2", + ], +) +use_repo(maven, "maven") diff --git a/test/fixtures/manifest-bazel/custom-name-bzlmod/MODULE.bazel b/test/fixtures/manifest-bazel/custom-name-bzlmod/MODULE.bazel new file mode 100644 index 000000000..0bb370f3c --- /dev/null +++ b/test/fixtures/manifest-bazel/custom-name-bzlmod/MODULE.bazel @@ -0,0 +1,10 @@ +module(name = "custom_name_fixture", version = "0.0.0") + +bazel_dep(name = "rules_jvm_external", version = "6.0") + +maven = use_extension("@rules_jvm_external//:extensions.bzl", "maven") +maven.install( + name = "maven_rules_kotlin_example", + artifacts = ["com.google.dagger:dagger:2.51.1"], +) +use_repo(maven, "maven_rules_kotlin_example") diff --git a/test/fixtures/manifest-bazel/legacy-only/WORKSPACE b/test/fixtures/manifest-bazel/legacy-only/WORKSPACE new file mode 100644 index 000000000..77b100dcf --- /dev/null +++ b/test/fixtures/manifest-bazel/legacy-only/WORKSPACE @@ -0,0 +1,22 @@ +workspace(name = "legacy_only_fixture") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "rules_jvm_external", + sha256 = "0000000000000000000000000000000000000000000000000000000000000000", + strip_prefix = "rules_jvm_external-6.0", + url = "https://example.invalid/rules_jvm_external-6.0.tar.gz", +) + +load("@rules_jvm_external//:defs.bzl", "maven_install") + +maven_install( + name = "maven", + artifacts = [ + "com.google.guava:guava:33.0.0-jre", + ], + repositories = [ + "https://repo1.maven.org/maven2", + ], +) diff --git a/test/fixtures/manifest-bazel/legacy-with-load/WORKSPACE b/test/fixtures/manifest-bazel/legacy-with-load/WORKSPACE new file mode 100644 index 000000000..ce4db6905 --- /dev/null +++ b/test/fixtures/manifest-bazel/legacy-with-load/WORKSPACE @@ -0,0 +1,13 @@ +workspace(name = "legacy_with_load_fixture") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "rules_jvm_external", + sha256 = "0000000000000000000000000000000000000000000000000000000000000000", + strip_prefix = "rules_jvm_external-6.0", + url = "https://example.invalid/rules_jvm_external-6.0.tar.gz", +) + +load("//:maven_deps.bzl", "load_maven_deps") +load_maven_deps() diff --git a/test/fixtures/manifest-bazel/legacy-with-load/maven_deps.bzl b/test/fixtures/manifest-bazel/legacy-with-load/maven_deps.bzl new file mode 100644 index 000000000..9214d8678 --- /dev/null +++ b/test/fixtures/manifest-bazel/legacy-with-load/maven_deps.bzl @@ -0,0 +1,11 @@ +load("@rules_jvm_external//:defs.bzl", "maven_install") + +def load_maven_deps(): + maven_install( + name = "maven_legacy_app", + artifacts = [ + "com.google.guava:guava:33.0.0-jre", + "junit:junit:4.13.2", + ], + repositories = ["https://repo1.maven.org/maven2"], + ) diff --git a/test/fixtures/manifest-bazel/migration/MODULE.bazel b/test/fixtures/manifest-bazel/migration/MODULE.bazel new file mode 100644 index 000000000..027f04c4b --- /dev/null +++ b/test/fixtures/manifest-bazel/migration/MODULE.bazel @@ -0,0 +1,9 @@ +module(name = "migration_fixture", version = "0.0.0") + +bazel_dep(name = "rules_jvm_external", version = "6.0") + +maven = use_extension("@rules_jvm_external//:extensions.bzl", "maven") +maven.install( + artifacts = ["com.google.guava:guava:33.0.0-jre"], +) +use_repo(maven, "maven") diff --git a/test/fixtures/manifest-bazel/migration/WORKSPACE b/test/fixtures/manifest-bazel/migration/WORKSPACE new file mode 100644 index 000000000..fd700b261 --- /dev/null +++ b/test/fixtures/manifest-bazel/migration/WORKSPACE @@ -0,0 +1,2 @@ +workspace(name = "migration_fixture") +# Stub — repo is mid-migration; Bzlmod is authoritative. diff --git a/test/fixtures/manifest-bazel/multi-repo-bzlmod/MODULE.bazel b/test/fixtures/manifest-bazel/multi-repo-bzlmod/MODULE.bazel new file mode 100644 index 000000000..87d98e891 --- /dev/null +++ b/test/fixtures/manifest-bazel/multi-repo-bzlmod/MODULE.bazel @@ -0,0 +1,14 @@ +module(name = "multi_repo_fixture", version = "0.0.0") + +bazel_dep(name = "rules_jvm_external", version = "6.0") + +maven = use_extension("@rules_jvm_external//:extensions.bzl", "maven") +maven.install( + name = "maven", + artifacts = ["com.google.guava:guava:33.0.0-jre"], +) +maven.install( + name = "maven_test", + artifacts = ["junit:junit:4.13.2"], +) +use_repo(maven, "maven", "maven_test") diff --git a/test/fixtures/manifest-bazel/query-output/aar-import-sample.txt b/test/fixtures/manifest-bazel/query-output/aar-import-sample.txt new file mode 100644 index 000000000..dbe9c0a84 --- /dev/null +++ b/test/fixtures/manifest-bazel/query-output/aar-import-sample.txt @@ -0,0 +1,8 @@ +aar_import( + name = "androidx_annotation_annotation", + aar = "@maven//:v1/https/maven.google.com/androidx/annotation/annotation/1.7.0/annotation-1.7.0.aar", + maven_coordinates = "androidx.annotation:annotation:1.7.0", + maven_url = "https://maven.google.com/androidx/annotation/annotation/1.7.0/annotation-1.7.0.aar", + maven_sha256 = "0000000000000000000000000000000000000000000000000000000000000002", + deps = [], +) diff --git a/test/fixtures/manifest-bazel/query-output/jvm-import-sample.txt b/test/fixtures/manifest-bazel/query-output/jvm-import-sample.txt new file mode 100644 index 000000000..aacc91fc0 --- /dev/null +++ b/test/fixtures/manifest-bazel/query-output/jvm-import-sample.txt @@ -0,0 +1,24 @@ +# /private/var/tmp/_bazel_user/abc/external/maven/BUILD.bazel:1:11 +jvm_import( + name = "com_google_guava_guava", + jars = ["@maven//:v1/https/repo1.maven.org/maven2/com/google/guava/guava/33.0.0-jre/guava-33.0.0-jre.jar"], + maven_coordinates = "com.google.guava:guava:33.0.0-jre", + maven_url = "https://repo1.maven.org/maven2/com/google/guava/guava/33.0.0-jre/guava-33.0.0-jre.jar", + maven_sha256 = "9408c2c43dad97e2e3a86f5c6caf9a7c7fe83f44ce11b34d9d40b9f2eee3e3a4", + deps = ["@maven//:com_google_guava_failureaccess", "@maven//:org_checkerframework_checker_qual"], +) +# /private/var/tmp/_bazel_user/abc/external/maven/BUILD.bazel:11:11 +jvm_import( + name = "com_google_guava_failureaccess", + jars = ["@maven//:v1/https/repo1.maven.org/maven2/com/google/guava/failureaccess/1.0.2/failureaccess-1.0.2.jar"], + maven_coordinates = "com.google.guava:failureaccess:1.0.2", + maven_url = "https://repo1.maven.org/maven2/com/google/guava/failureaccess/1.0.2/failureaccess-1.0.2.jar", + maven_sha256 = "0000000000000000000000000000000000000000000000000000000000000001", + deps = [], +) +# /private/var/tmp/_bazel_user/abc/external/maven/BUILD.bazel:21:11 +jvm_import( + name = "no_coords_rule", + jars = ["@maven//:something.jar"], + deps = [], +) diff --git a/test/fixtures/manifest-bazel/query-output/unsorted-deps-sample.json b/test/fixtures/manifest-bazel/query-output/unsorted-deps-sample.json new file mode 100644 index 000000000..94234e87b --- /dev/null +++ b/test/fixtures/manifest-bazel/query-output/unsorted-deps-sample.json @@ -0,0 +1,19 @@ +{ + "artifacts": [ + { + "coordinates": "com.google.guava:guava:33.0.0-jre", + "url": "https://repo1.maven.org/maven2/com/google/guava/guava/33.0.0-jre/guava-33.0.0-jre.jar", + "sha256": "9408c2c43dad97e2e3a86f5c6caf9a7c7fe83f44ce11b34d9d40b9f2eee3e3a4", + "deps": [ + "com.google.guava:failureaccess:1.0.2", + "org.checkerframework:checker-qual:3.41.0" + ] + }, + { + "coordinates": "com.google.guava:failureaccess:1.0.2", + "url": "https://repo1.maven.org/maven2/com/google/guava/failureaccess/1.0.2/failureaccess-1.0.2.jar", + "sha256": "0000000000000000000000000000000000000000000000000000000000000001", + "deps": [] + } + ] +} diff --git a/vitest.config.mts b/vitest.config.mts index 7e330f00c..fc26f18cf 100644 --- a/vitest.config.mts +++ b/vitest.config.mts @@ -14,6 +14,10 @@ export default defineConfig({ '**/{karma,rollup,webpack,vite,vitest,jest,ava,babel,nyc,cypress,tsup,build,eslint,prettier}.config.*', // Exclude E2E tests from regular test runs. '**/*.e2e.test.mts', + // Exclude test fixtures — directories like test/fixtures/commands/optimize/pnpm{8,9}/ + // contain full repo copies populated by `socket optimize` runs; their .test.mts + // files would otherwise be globbed and run as if they were the real suite. + 'test/fixtures/**', ], coverage: { exclude: [