diff --git a/.github/workflows/_test-units.yml b/.github/workflows/_test-units.yml index f93c6a4f8..aa866154d 100644 --- a/.github/workflows/_test-units.yml +++ b/.github/workflows/_test-units.yml @@ -42,6 +42,9 @@ jobs: - name: Test code run: npm run test + - name: Test code v2 (incl. CLI) + run: npm run test-v2 + run-tests-without-optional-dependencies: name: Tests Without Optional Dependencies strategy: @@ -74,3 +77,6 @@ jobs: - name: Test code run: npm run test-light + + - name: Test code v2 (incl. CLI) + run: npm run test-v2 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fd59d72b..8cd10d1f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ # Mindee Node.js API Library Changelog +## Unreleased +### Changes +* :sparkles: :boom: unify CLI into a single `mindee` binary (replaces + `mindeeV1` / `mindeeV2`); V2 product commands are top-level + (`extraction`, `classification`, `crop`, `ocr`, `split`) and V1 product + commands live under the `v1` group, mirroring the canonical .NET CLI shape +* :sparkles: add `--output {summary,full,raw}`, `--rag/-g`, `--raw-text/-r`, + `--confidence/-c`, `--polygon/-p`, `--text-context/-t`, `--alias/-a` + options to V2 extraction; add `--full-text/-f`, `--async`, `--output` + options to V1 commands +* :sparkles: add `Client.searchModels()` to list models available to the + current API key, with optional `name` / `modelType` filters; add the + matching `search-models` CLI command + + ## v5.4.0 - 2026-06-22 ### Fixes * :bug: :boom: harmonize Crop and Split extraction (now ready for public use) diff --git a/README.md b/README.md index 2d39446af..6464389f6 100644 --- a/README.md +++ b/README.md @@ -39,3 +39,69 @@ Consult the Copyright © Mindee Available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). + +## Command Line Interface + +A `mindee` command line interface is shipped with the library: + +```bash +npm install -g mindee +mindee -h +``` + +Or, from a checkout: + +```bash +node ./dist/bin/mindee.js -h +``` + +### Authentication + +Provide an API key via `--api-key`: + +- V2: `mindee --verbose --api-key ...` +- V1: `mindee v1 --api-key ...` + +### V2 commands (top-level) + +``` +mindee --api-key --model-id [options] +``` + +Available commands: `extraction`, `classification`, `crop`, `ocr`, `split`. + +Extraction-only options: `--rag/-g`, `--raw-text/-r`, `--confidence/-c`, +`--polygon/-p`, `--text-context/-t`. +Common options: `--output/-o {summary,full,raw}`, `--alias/-a`. + +### Search models + +List models available to the current API key: + +``` +mindee search-models --api-key [--name ] [--model-type ] [--raw-json] +``` + +Filter by partial name match (case-insensitive) and / or by exact model +type (one of `extraction`, `crop`, `classification`, `ocr`, `split`). + +### V1 commands (under `v1`) + +``` +mindee v1 --api-key [options] +``` + +Where `` is one of `barcode-reader`, `cropper`, `driver-license`, +`financial-document`, `fr-bank-account-details`, `fr-carte-grise`, +`fr-carte-nationale-d-identite`, `generated`, `international-id`, `invoice`, +`invoice-splitter`, `multi-receipts-detector`, `passport`, `receipt`, +`resume`, `us-bank-check`. + +Common options: `--output/-o {summary,full,raw}`. Depending on the product: +`--all-words/-w`, `--full-text/-f`, `--async`. + +### Output modes + +- `summary` (default): brief prediction summary. +- `full`: detailed result including raw text / OCR sections when applicable. +- `raw`: full JSON response. diff --git a/bin/mindee.ts b/bin/mindee.ts new file mode 100644 index 000000000..d3ba9e42b --- /dev/null +++ b/bin/mindee.ts @@ -0,0 +1,5 @@ +#!/usr/bin/env node + +import { cli } from "@/cli/index.js"; + +cli(); diff --git a/bin/mindeeV1.ts b/bin/mindeeV1.ts deleted file mode 100755 index 12fa8168a..000000000 --- a/bin/mindeeV1.ts +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env node - -import { cli } from "@/v1/cli.js"; - -cli(); diff --git a/bin/mindeeV2.ts b/bin/mindeeV2.ts deleted file mode 100755 index ad38cd00d..000000000 --- a/bin/mindeeV2.ts +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env node - -import { cli } from "@/v2/cli.js"; - -cli(); diff --git a/docs/code_samples/v2_search_models.txt b/docs/code_samples/v2_search_models.txt new file mode 100644 index 000000000..cc1345ac3 --- /dev/null +++ b/docs/code_samples/v2_search_models.txt @@ -0,0 +1,28 @@ +import * as mindee from "mindee"; +// If you're on CommonJS: +// const mindee = require("mindee"); + +const apiKey = "MY_API_KEY"; + +// Init a new client +const mindeeClient = new mindee.Client( + { apiKey: apiKey } +); + +// Search models, optionally filtered by name partial match +// (case-insensitive) and / or by exact model type +// ("extraction", "crop", "classification", "ocr", "split"). +const response = await mindeeClient.searchModels( + // name partial match (optional) + undefined, + // model type (optional) + undefined, +); + +// print a string summary +console.log(response.toString()); + +// Access individual models +for (const model of response.models) { + console.log(`${model.id} - ${model.name}`); +} diff --git a/package.json b/package.json index 988c15232..c6d1130ce 100644 --- a/package.json +++ b/package.json @@ -19,8 +19,7 @@ "type": "module", "main": "src/index.js", "bin": { - "mindeeV1": "bin/mindeeV1.js", - "mindeeV2": "bin/mindeeV2.js" + "mindee": "bin/mindee.js" }, "scripts": { "build": "tsc --build && tsc-alias", @@ -30,7 +29,7 @@ "test-light": "mocha --grep '#OptionalDepsRequired' --invert 'tests/**/*.spec.ts'", "test-integration": "mocha --grep '#OptionalDepsRemoved' --invert 'tests/**/*.integration.ts'", "test-integration-light": "mocha --grep '#OptionalDepsRequired' --invert 'tests/**/*.integration.ts'", - "test-v2": "tsc --noEmit && node --import tsx --test 'tests/v2/**/*.spec.ts'", + "test-v2": "tsc --noEmit && node --import tsx --test 'tests/v2/**/*.spec.ts' 'tests/cli/**/*.spec.ts'", "lint": "tsc --noEmit && eslint --report-unused-disable-directives './src/**/*.ts' './tests/**/*.ts'", "lint-fix": "tsc --noEmit && eslint --fix --report-unused-disable-directives './src/**/*.ts' './tests/**/*.ts'", "docs": "typedoc --out docs/_build ./src/index.ts", diff --git a/src/cli/index.ts b/src/cli/index.ts new file mode 100644 index 000000000..ad1a6273b --- /dev/null +++ b/src/cli/index.ts @@ -0,0 +1,51 @@ +import { buildV1Command } from "@/cli/v1/buildV1Command.js"; +import { Command } from "commander"; +import { ExtractionCommand } from "./v2/extractionCommand.js"; +import { ClassificationCommand } from "./v2/classificationCommand.js"; +import { CropCommand } from "./v2/cropCommand.js"; +import { OcrCommand } from "./v2/ocrCommand.js"; +import { SplitCommand } from "./v2/splitCommand.js"; +import { SearchModelsCommand } from "./v2/searchModelsCommand.js"; + +/** + * Build the root `mindee` command line. + * + * The shape mirrors the canonical `.NET` CLI + * (`mindee-api-dotnet/src/Mindee.Cli`): + * - V2 product commands are top-level (`extraction`, `classification`, + * `crop`, `ocr`, `split`), plus the `search-models` tool. + * - V1 product commands live under the `v1` sub-command. + */ +export function buildCli(): Command { + const program = new Command(); + program + .name("mindee") + .description("Command line interface for Mindee products.") + .option("--verbose", "Enables diagnostics output."); + + // V2 top-level commands + program.addCommand(new ExtractionCommand()); + program.addCommand(new ClassificationCommand()); + program.addCommand(new CropCommand()); + program.addCommand(new OcrCommand()); + program.addCommand(new SplitCommand()); + program.addCommand(new SearchModelsCommand()); + + // V1 commands grouped under `v1` + const v1 = buildV1Command(); + program.addCommand(v1); + + return program; +} + +/** + * Entry point for the `mindee` binary. + * + * Parses `process.argv` and dispatches to the matching command. + * + * @param argv command-line arguments to parse (defaults to `process.argv`). + * @returns a Promise resolving to the root command once parsing completes. + */ +export function cli(argv: string[] = process.argv): Promise { + return buildCli().parseAsync(argv); +} diff --git a/src/cli/output.ts b/src/cli/output.ts new file mode 100644 index 000000000..7987d5411 --- /dev/null +++ b/src/cli/output.ts @@ -0,0 +1,35 @@ +/** + * How to output a CLI response. + */ +export enum OutputType { + /** Document-level summary, in rST format. (Default) */ + summary = "summary", + /** Complete response in rST format. */ + full = "full", + /** Raw JSON. */ + raw = "raw", +} + +export const OUTPUT_CHOICES: readonly OutputType[] = [ + OutputType.summary, + OutputType.full, + OutputType.raw, +] as const; + +export const OUTPUT_DESCRIPTION = + "Specify how to output the data:\n" + + "- summary: a basic summary (default)\n" + + "- full: detailed extraction results, including options\n" + + "- raw: full JSON response"; + +export function parseOutput(value: string): OutputType { + const normalized = value.toLowerCase(); + for (const choice of OUTPUT_CHOICES) { + if (choice === normalized) { + return choice; + } + } + throw new Error( + `Invalid output type '${value}'. Valid values are: ${OUTPUT_CHOICES.join(", ")}.` + ); +} diff --git a/src/v1/cli.ts b/src/cli/v1/buildV1Command.ts similarity index 94% rename from src/v1/cli.ts rename to src/cli/v1/buildV1Command.ts index 6a628dcd4..51665508c 100644 --- a/src/v1/cli.ts +++ b/src/cli/v1/buildV1Command.ts @@ -6,17 +6,15 @@ import { } from "@/v1/parsing/common/index.js"; import { Client, PredictOptions, -} from "./client.js"; +} from "@/v1/client.js"; import { PageOptions, PageOptionsOperation, PathInput, } from "@/input/index.js"; import * as console from "console"; import { CLI_COMMAND_CONFIG, COMMAND_GENERATED, ProductConfig, -} from "./product/cliProducts.js"; -import { Endpoint } from "./http/index.js"; - -const program = new Command(); +} from "./cliProducts.js"; +import { Endpoint } from "@/v1/http/index.js"; // @@ -209,6 +207,7 @@ function addPredictAction(prog: Command) { command: Command ) { const allOptions = { + ...prog.parent?.parent?.parent?.opts(), ...prog.parent?.parent?.opts(), ...prog.parent?.opts(), ...prog.opts(), @@ -233,13 +232,13 @@ function addPredictAction(prog: Command) { } } -export function cli() { - program.name("mindee") - .description("Command line interface for Mindee products.") +export function buildV1Command(): Command { + const v1Program = new Command("v1") + .description("Mindee V1 product commands.") .option("-d, --debug", "high verbosity mode"); CLI_COMMAND_CONFIG.forEach((info, name) => { - const productCmd: Command = program.command(name) + const productCmd: Command = v1Program.command(name) .description(info.displayName); if (info.async) { @@ -251,7 +250,7 @@ export function cli() { await callGetDocument( docClass, documentId, - { ...options, ...productCmd.opts(), ...program.opts() } + { ...options, ...productCmd.opts(), ...v1Program.opts() } ); }); addMainOptions(getDocProductCmd); @@ -279,5 +278,5 @@ export function cli() { addPostOptions(predictProductCmd, info); addPredictAction(predictProductCmd); }); - program.parse(process.argv); + return v1Program; } diff --git a/src/v1/product/cliProducts.ts b/src/cli/v1/cliProducts.ts similarity index 100% rename from src/v1/product/cliProducts.ts rename to src/cli/v1/cliProducts.ts diff --git a/src/cli/v2/classificationCommand.ts b/src/cli/v2/classificationCommand.ts new file mode 100644 index 000000000..61b69481c --- /dev/null +++ b/src/cli/v2/classificationCommand.ts @@ -0,0 +1,26 @@ +import { OptionValues } from "commander"; +import { InferenceCommand } from "./inferenceCommand.js"; +import { Classification } from "@/v2/product/classification/classification.js"; +import { BaseProduct } from "@/v2/product/baseProduct.js"; + +/** CLI command for the V2 `classification` product. */ +export class ClassificationCommand extends InferenceCommand { + constructor() { + super("classification", "Classification utility."); + } + + protected get productSlug(): string { + return Classification.slug; + } + + protected get productClass(): typeof BaseProduct { + return Classification; + } + + protected buildParameters(options: OptionValues) { + return { + modelId: options.modelId as string, + alias: options.alias as string | undefined, + }; + } +} diff --git a/src/cli/v2/cropCommand.ts b/src/cli/v2/cropCommand.ts new file mode 100644 index 000000000..1a04155b8 --- /dev/null +++ b/src/cli/v2/cropCommand.ts @@ -0,0 +1,26 @@ +import { OptionValues } from "commander"; +import { InferenceCommand } from "./inferenceCommand.js"; +import { Crop } from "@/v2/product/crop/index.js"; +import { BaseProduct } from "@/v2/product/baseProduct.js"; + +/** CLI command for the V2 `crop` product. */ +export class CropCommand extends InferenceCommand { + constructor() { + super("crop", "Crop utility."); + } + + protected get productSlug(): string { + return Crop.slug; + } + + protected get productClass(): typeof BaseProduct { + return Crop; + } + + protected buildParameters(options: OptionValues) { + return { + modelId: options.modelId as string, + alias: options.alias as string | undefined, + }; + } +} diff --git a/src/cli/v2/extractionCommand.ts b/src/cli/v2/extractionCommand.ts new file mode 100644 index 000000000..bddb9384e --- /dev/null +++ b/src/cli/v2/extractionCommand.ts @@ -0,0 +1,85 @@ +import { OptionValues } from "commander"; +import { InferenceCommand } from "./inferenceCommand.js"; +import { Extraction } from "@/v2/product/extraction/extraction.js"; +import { BaseProduct } from "@/v2/product/baseProduct.js"; +import { BaseInference } from "@/v2/parsing/inference/index.js"; +import { ExtractionInference } from "@/v2/product/extraction/extractionInference.js"; + +/** + * CLI command for the V2 `extraction` product. + * + * Owns all extraction-specific flags (`--rag`, `--raw-text`, + * `--confidence`, `--polygon`, `--text-context`). These are + * deliberately *not* shared with other inference commands. + */ +export class ExtractionCommand extends InferenceCommand { + constructor() { + super("extraction", "Generic all-purpose extraction."); + } + + protected get productSlug(): string { + return Extraction.slug; + } + + protected get productClass(): typeof BaseProduct { + return Extraction; + } + + protected addProductOptions(): void { + this.option( + "-g, --rag", + "Enable Retrieval-Augmented Generation context. Extraction only.", + false + ); + this.option( + "-r, --raw-text", + "Extract all the words in the document.", + false + ); + this.option( + "-c, --confidence", + "Retrieve confidence scores from the extraction.", + false + ); + this.option( + "-p, --polygon", + "Retrieve bounding-box polygons from the extraction.", + false + ); + this.option( + "-t, --text-context ", + "Add text context to the API call.", + undefined as unknown as string + ); + } + + protected buildParameters(options: OptionValues) { + return { + modelId: options.modelId as string, + alias: options.alias as string | undefined, + rag: options.rag as boolean, + rawText: options.rawText as boolean, + confidence: options.confidence as boolean, + polygon: options.polygon as boolean, + textContext: options.textContext as string | undefined, + }; + } + + protected printFull(inference: BaseInference): void { + const extraction = inference as ExtractionInference; + if (extraction.activeOptions?.rawText && extraction.result?.rawText) { + console.log("#############\nRaw Text\n#############\n::\n"); + const rawText = extraction.result.rawText.toString().replace(/\n/g, "\n "); + console.log(` ${rawText}\n`); + } + if (extraction.activeOptions?.rag && extraction.result?.rag) { + console.log("#############\nRetrieval-Augmented Generation\n#############\n::\n"); + const ragString = typeof extraction.result.rag.toString === "function" + && extraction.result.rag.toString !== Object.prototype.toString + ? extraction.result.rag.toString() + : JSON.stringify(extraction.result.rag, null, 2); + console.log(` ${ragString.replace(/\n/g, "\n ")}\n`); + } + console.log(`\n${extraction.toString()}`); + } +} diff --git a/src/cli/v2/inferenceCommand.ts b/src/cli/v2/inferenceCommand.ts new file mode 100644 index 000000000..5a131094d --- /dev/null +++ b/src/cli/v2/inferenceCommand.ts @@ -0,0 +1,156 @@ +import { Command, Option, OptionValues } from "commander"; +import { Client as ClientV2 } from "@/v2/client.js"; +import { PathInput, UrlInput, InputSource } from "@/input/index.js"; +import { BaseProduct } from "@/v2/product/baseProduct.js"; +import { BaseResponse } from "@/v2/parsing/baseResponse.js"; +import { BaseInference } from "@/v2/parsing/inference/index.js"; +import { OUTPUT_DESCRIPTION, OutputType, parseOutput } from "../output.js"; + +/** + * Abstract base class for all V2 inference commands. + * + * Each product (extraction, classification, crop, ocr, split) extends this + * class and adds its own product-specific options through `addProductOptions` + * and turns parsed CLI options into the corresponding product parameters + * via `buildParameters`. + * + * This deliberately avoids using a shared configuration object for options + * such as `rag`, `rawText`, `polygon`, `confidence`, `textContext`; each + * product owns its CLI surface. + */ +export abstract class InferenceCommand extends Command { + protected constructor(name: string, description: string) { + super(name); + this.description(description); + this.addBaseOptions(); + this.addProductOptions(); + this.addOutputOptions(); + this.argument("", "Full path or URL to the file to parse"); + + this.action(async (inputPath: string, options: OptionValues, command: Command) => { + await this.run(inputPath, this.mergeOptions(command, options)); + }); + } + + /** + * Slug of the product the command targets (must match `BaseProduct.slug`). + */ + protected abstract get productSlug(): string; + + /** + * The Product class registered in `@/v2/product`. Used to obtain the + * `parametersClass` and `responseClass` constructors. + */ + protected abstract get productClass(): typeof BaseProduct; + + /** + * Add product-specific options (e.g. `--rag`, `--raw-text`). + * + * By default, no extra option is added. Subclasses override this to add + * the flags they support. + */ + protected addProductOptions(): void { + // No-op by default. Subclasses override. + } + + /** + * Build the product parameters from parsed CLI options. + * + * Each subclass returns the typed parameter constructor argument for + * its product, so we avoid a shared options bag. + */ + protected abstract buildParameters( + options: OptionValues + ): ConstructorParameters typeof BaseProduct>["parametersClass"]>[0]; + + private addBaseOptions(): void { + this.requiredOption("-m, --model-id ", "ID of the model to use"); + this.option("-k, --api-key ", "Mindee V2 API key"); + this.option("-a, --alias ", "Alias for the file"); + } + + private addOutputOptions(): void { + this.addOption( + new Option("-o, --output ", OUTPUT_DESCRIPTION) + .argParser(parseOutput) + .default(OutputType.summary, "summary") + ); + } + + private mergeOptions(command: Command, options: OptionValues): OptionValues { + let merged: OptionValues = { ...options }; + let cursor: Command | null = command.parent; + while (cursor) { + merged = { ...cursor.opts(), ...merged }; + cursor = cursor.parent; + } + return merged; + } + + private async run(inputPath: string, options: OptionValues): Promise { + const client = new ClientV2({ + apiKey: options.apiKey, + debug: options.verbose, + }); + + const inputSource: InputSource = inputPath.startsWith("https://") + ? new UrlInput({ url: inputPath }) + : new PathInput({ inputPath: inputPath }); + + const product = this.productClass; + const params = this.buildParameters(options); + + const response = await client.enqueueAndGetResult(product, inputSource, params, { + initialDelaySec: 2, + delaySec: 1.5, + maxRetries: 80, + }); + + if (!response.inference) { + throw new Error("Inference could not be retrieved"); + } + + this.printResponse(response, options.output as OutputType); + } + + protected printResponse(response: BaseResponse, output: OutputType): void { + const inference = (response as unknown as { inference: BaseInference }).inference; + switch (output) { + case OutputType.full: + this.printFull(inference); + break; + case OutputType.summary: + this.printSummary(inference); + break; + case OutputType.raw: + console.log(JSON.stringify(response.getRawHttp(), null, 2)); + break; + default: + throw new Error(`Unknown output type: ${output}.`); + } + } + + /** + * Print the full representation of the inference (used for `--output full`). + * + * By default this prints the inference's own `toString`. Subclasses + * override to add product-specific sections (such as raw text or RAG). + */ + protected printFull(inference: BaseInference): void { + console.log(`\n${inference.toString()}`); + } + + /** + * Print the summary representation of the inference (used for `--output summary`). + * + * Subclasses override when their result type has a `toString` worth printing. + */ + protected printSummary(inference: BaseInference): void { + const result = (inference as unknown as { result?: { toString(): string } }).result; + if (result && typeof result.toString === "function") { + console.log(`\n${result.toString()}`); + } else { + console.log(`\n${inference.toString()}`); + } + } +} diff --git a/src/cli/v2/ocrCommand.ts b/src/cli/v2/ocrCommand.ts new file mode 100644 index 000000000..46d1ff71e --- /dev/null +++ b/src/cli/v2/ocrCommand.ts @@ -0,0 +1,26 @@ +import { OptionValues } from "commander"; +import { InferenceCommand } from "./inferenceCommand.js"; +import { Ocr } from "@/v2/product/ocr/index.js"; +import { BaseProduct } from "@/v2/product/baseProduct.js"; + +/** CLI command for the V2 `ocr` product. */ +export class OcrCommand extends InferenceCommand { + constructor() { + super("ocr", "OCR utility."); + } + + protected get productSlug(): string { + return Ocr.slug; + } + + protected get productClass(): typeof BaseProduct { + return Ocr; + } + + protected buildParameters(options: OptionValues) { + return { + modelId: options.modelId as string, + alias: options.alias as string | undefined, + }; + } +} diff --git a/src/cli/v2/searchModelsCommand.ts b/src/cli/v2/searchModelsCommand.ts new file mode 100644 index 000000000..e1e083d5d --- /dev/null +++ b/src/cli/v2/searchModelsCommand.ts @@ -0,0 +1,66 @@ +import { Command, Option, OptionValues } from "commander"; +import { Client as ClientV2 } from "@/v2/client.js"; +import { SearchResponse } from "@/v2/parsing/index.js"; + +const MODEL_TYPES = ["extraction", "crop", "classification", "ocr", "split"] as const; + +/** + * CLI command for the `v2/search/models` endpoint. + * + * Mirrors the canonical `.NET` `SearchModelsCommand`. Exposes the same + * flag set: `--name/-n`, `--model-type/-m`, `--raw-json/-r`, + * `--api-key/-k`. + */ +export class SearchModelsCommand extends Command { + constructor() { + super("search-models"); + this.description("Search available models."); + this.option("-k, --api-key ", "Mindee V2 API key"); + this.option( + "-n, --name ", + "Filter by model name partial match (case insensitive)." + ); + this.addOption( + new Option( + "-m, --model-type ", + "Filter by exact model type (case sensitive). " + + `Available options: ${MODEL_TYPES.join(", ")}.` + ) + ); + this.option( + "-r, --raw-json", + "Output the raw JSON response.", + false + ); + + this.action(async (options: OptionValues, command: Command) => { + await this.run(this.mergeOptions(command, options)); + }); + } + + private mergeOptions(command: Command, options: OptionValues): OptionValues { + let merged: OptionValues = { ...options }; + let cursor: Command | null = command.parent; + while (cursor) { + merged = { ...cursor.opts(), ...merged }; + cursor = cursor.parent; + } + return merged; + } + + private async run(options: OptionValues): Promise { + const client = new ClientV2({ + apiKey: options.apiKey, + debug: options.verbose, + }); + const response: SearchResponse = await client.searchModels( + options.name as string | undefined, + options.modelType as string | undefined + ); + if (options.rawJson) { + console.log(JSON.stringify(response.getRawHttp(), null, 2)); + } else { + console.log(response.toString()); + } + } +} diff --git a/src/cli/v2/splitCommand.ts b/src/cli/v2/splitCommand.ts new file mode 100644 index 000000000..9bea0f01e --- /dev/null +++ b/src/cli/v2/splitCommand.ts @@ -0,0 +1,26 @@ +import { OptionValues } from "commander"; +import { InferenceCommand } from "./inferenceCommand.js"; +import { Split } from "@/v2/product/split/index.js"; +import { BaseProduct } from "@/v2/product/baseProduct.js"; + +/** CLI command for the V2 `split` product. */ +export class SplitCommand extends InferenceCommand { + constructor() { + super("split", "Split utility."); + } + + protected get productSlug(): string { + return Split.slug; + } + + protected get productClass(): typeof BaseProduct { + return Split; + } + + protected buildParameters(options: OptionValues) { + return { + modelId: options.modelId as string, + alias: options.alias as string | undefined, + }; + } +} diff --git a/src/v2/cli.ts b/src/v2/cli.ts deleted file mode 100644 index 8135f2d7b..000000000 --- a/src/v2/cli.ts +++ /dev/null @@ -1,111 +0,0 @@ -import { Command, OptionValues } from "commander"; -import * as console from "console"; -import { Client, InputSource, PathInput, UrlInput } from "@/index.js"; -import { BaseInference } from "@/v2/parsing/inference/index.js"; -import { BaseProduct } from "@/v2/product/baseProduct.js"; -import { - Classification, - Crop, - Extraction, - Ocr, - Split, -} from "@/v2/product/index.js"; - -interface CliProduct { - name: string; - description: string; - productClass: typeof BaseProduct; -} - -const program = new Command(); - -// -// EXECUTE THE COMMANDS -// - -function initClient(options: OptionValues): Client { - return new Client({ - apiKey: options.apiKey, - debug: options.debug, - }); -} - -async function enqueueAndGetInference( - product: typeof BaseProduct, - inputPath: string, - options: OptionValues -): Promise { - const mindeeClient = initClient(options); - let inputSource: InputSource; - if (inputPath.startsWith("https://")) { - inputSource = new UrlInput({ url: inputPath }); - } else { - inputSource = new PathInput({ inputPath: inputPath }); - } - const response = await mindeeClient.enqueueAndGetResult( - product, - inputSource, - { modelId: options.model }, - { - initialDelaySec: 2, - delaySec: 1.5, - maxRetries: 80, - } - ); - if (!response.inference) { - throw Error("Inference could not be retrieved"); - } - printResponse(response.inference); -} - -function printResponse( - document: BaseInference, -): void { - if (document) { - console.log(`\n${document}`); - } -} - -// -// BUILD THE COMMANDS -// - -function addMainOptions(prog: Command) { - prog.requiredOption( - "-m, --model ", - "Model ID (required)" - ); - prog.argument("", "full path or URL to the file"); -} - -export function cli() { - program.name("mindee") - .description("Command line interface for Mindee V2 products.") - .option("-d, --debug", "high verbosity mode") - .option("-k, --api-key ", "your Mindee API key"); - - const inferenceTypes: CliProduct[] = [ - { name: "extraction", description: "Extract data from a document.", productClass: Extraction }, - { name: "crop", description: "Crop a document.", productClass: Crop }, - { name: "split", description: "Split a document into pages.", productClass: Split }, - { name: "ocr", description: "Read text from a document.", productClass: Ocr }, - { name: "classification", description: "Classify a document.", productClass: Classification }, - ]; - - for (const inference of inferenceTypes) { - const inferenceCmd: Command = program.command(inference.name) - .description(inference.description); - - addMainOptions(inferenceCmd); - - inferenceCmd.action(function ( - inputPath: string, - options: OptionValues, - ) { - const allOptions = { ...program.opts(), ...options }; - return enqueueAndGetInference(inference.productClass, inputPath, allOptions); - }); - } - - program.parse(process.argv); -} diff --git a/src/v2/client.ts b/src/v2/client.ts index 32cf06100..0eeec4af5 100644 --- a/src/v2/client.ts +++ b/src/v2/client.ts @@ -4,7 +4,7 @@ import { InputSource } from "@/input/index.js"; import { MindeeError } from "@/errors/index.js"; import { errorHandler } from "@/errors/handler.js"; import { LOG_LEVELS, logger } from "@/logger.js"; -import { ErrorResponse, JobResponse } from "./parsing/index.js"; +import { ErrorResponse, JobResponse, SearchResponse } from "./parsing/index.js"; import { MindeeApiV2 } from "./http/mindeeApiV2.js"; import { MindeeHttpErrorV2 } from "./http/errors.js"; import { PollingOptions, PollingOptionsConstructor } from "./clientOptions/index.js"; @@ -129,6 +129,18 @@ export class Client { return await this.mindeeApi.getJob(jobId); } + /** + * Search models available to the current API key. + * + * @param name optional model name partial match (case insensitive). + * @param modelType optional model type exact match (case sensitive). + * One of `extraction`, `crop`, `classification`, `ocr`, `split`. + * @returns a `Promise` containing a `SearchResponse`. + */ + async searchModels(name?: string, modelType?: string): Promise { + return await this.mindeeApi.searchModels(name, modelType); + } + /** * Enqueue a request and poll the server until the result is sent or * until the maximum number of tries is reached. diff --git a/src/v2/http/mindeeApiV2.ts b/src/v2/http/mindeeApiV2.ts index 30d7013a3..e453ae48d 100644 --- a/src/v2/http/mindeeApiV2.ts +++ b/src/v2/http/mindeeApiV2.ts @@ -6,6 +6,7 @@ import { ErrorResponse, ResponseConstructor, JobResponse, + SearchResponse, } from "@/v2/parsing/index.js"; import { sendRequestAndReadResponse, @@ -62,6 +63,18 @@ export class MindeeApiV2 { return this.#processResponse(response, JobResponse); } + /** + * Search for models available to the current API key. + * Throws an error if the server's response contains an error. + * @param name Optional name partial-match filter (case insensitive). + * @param modelType Optional model-type exact-match filter (case sensitive). + * @returns a `Promise` containing the search response. + */ + async searchModels(name?: string, modelType?: string): Promise { + const response = await this.#reqGetSearchModels(name, modelType); + return this.#processResponse(response, SearchResponse); + } + /** * Get the result of a queued document from the API. * Throws an error if the server's response contains an error. @@ -166,6 +179,25 @@ export class MindeeApiV2 { return await sendRequestAndReadResponse(this.settings.dispatcher, options); } + async #reqGetSearchModels(name?: string, modelType?: string): Promise { + const query = new URLSearchParams(); + if (name && name.length > 0) { + query.set("name", name); + } + if (modelType && modelType.length > 0) { + query.set("model_type", modelType); + } + const qs = query.toString(); + const options: RequestOptions = { + method: "GET", + headers: this.settings.baseHeaders, + hostname: this.settings.hostname, + path: `/v2/search/models${qs.length > 0 ? `?${qs}` : ""}`, + timeoutSecs: this.settings.timeoutSecs, + }; + return await sendRequestAndReadResponse(this.settings.dispatcher, options); + } + /** * Make a request to GET the status of a document in the queue. * @param url URL path to the result. diff --git a/src/v2/index.ts b/src/v2/index.ts index d58f6d47c..4e3b62949 100644 --- a/src/v2/index.ts +++ b/src/v2/index.ts @@ -6,6 +6,10 @@ export { JobResponse, ErrorResponse, LocalResponse, + SearchResponse, + SearchModel, + ModelWebhook, + PaginationMetadata, } from "./parsing/index.js"; export type { BaseParameters, TimerOptions } from "./clientOptions/index.js"; export { PollingOptions } from "./clientOptions/index.js"; diff --git a/src/v2/parsing/index.ts b/src/v2/parsing/index.ts index b449079d2..692c7e679 100644 --- a/src/v2/parsing/index.ts +++ b/src/v2/parsing/index.ts @@ -17,3 +17,10 @@ export { LocalResponse } from "./localResponse.js"; export { BaseResponse } from "./baseResponse.js"; export type { ResponseConstructor } from "./baseResponse.js"; export * as field from "./inference/field/index.js"; +export * as search from "./search/index.js"; +export { + ModelWebhook, + SearchModel, + PaginationMetadata, + SearchResponse, +} from "./search/index.js"; diff --git a/src/v2/parsing/search/index.ts b/src/v2/parsing/search/index.ts new file mode 100644 index 000000000..7fba7531e --- /dev/null +++ b/src/v2/parsing/search/index.ts @@ -0,0 +1,4 @@ +export { ModelWebhook } from "./modelWebhook.js"; +export { SearchModel } from "./searchModel.js"; +export { PaginationMetadata } from "./paginationMetadata.js"; +export { SearchResponse } from "./searchResponse.js"; diff --git a/src/v2/parsing/search/modelWebhook.ts b/src/v2/parsing/search/modelWebhook.ts new file mode 100644 index 000000000..ad2777925 --- /dev/null +++ b/src/v2/parsing/search/modelWebhook.ts @@ -0,0 +1,26 @@ +import { StringDict } from "@/parsing/stringDict.js"; + +/** + * Information about a model's webhook. + */ +export class ModelWebhook { + /** ID of the webhook. */ + public id: string; + /** Name of the webhook. */ + public name: string; + /** URL of the webhook. */ + public url: string; + + constructor(serverResponse: StringDict) { + this.id = serverResponse["id"]; + this.name = serverResponse["name"]; + this.url = serverResponse["url"]; + } + + /** + * String representation of the webhook. + */ + toString(): string { + return `:Name: ${this.name}\n:ID: ${this.id}\n:URL: ${this.url}`; + } +} diff --git a/src/v2/parsing/search/paginationMetadata.ts b/src/v2/parsing/search/paginationMetadata.ts new file mode 100644 index 000000000..197096d20 --- /dev/null +++ b/src/v2/parsing/search/paginationMetadata.ts @@ -0,0 +1,32 @@ +import { StringDict } from "@/parsing/stringDict.js"; + +/** + * Pagination metadata associated with model search. + */ +export class PaginationMetadata { + /** Number of items per page. */ + public perPage: number; + /** 1-indexed page number. */ + public page: number; + /** Total items returned by the query. */ + public totalItems: number; + /** Total number of pages. */ + public totalPages: number; + + constructor(serverResponse: StringDict) { + this.perPage = serverResponse?.["per_page"] ?? 0; + this.page = serverResponse?.["page"] ?? 0; + this.totalItems = serverResponse?.["total_items"] ?? 0; + this.totalPages = serverResponse?.["total_pages"] ?? 0; + } + + /** + * String representation of the pagination metadata. + */ + toString(): string { + return `:Per Page: ${this.perPage}\n` + + `:Page: ${this.page}\n` + + `:Total Items: ${this.totalItems}\n` + + `:Total Pages: ${this.totalPages}\n`; + } +} diff --git a/src/v2/parsing/search/searchModel.ts b/src/v2/parsing/search/searchModel.ts new file mode 100644 index 000000000..78995323c --- /dev/null +++ b/src/v2/parsing/search/searchModel.ts @@ -0,0 +1,32 @@ +import { StringDict } from "@/parsing/stringDict.js"; +import { ModelWebhook } from "./modelWebhook.js"; + +/** + * Individual model information returned by the `search/models` endpoint. + */ +export class SearchModel { + /** ID of the model. */ + public id: string; + /** Name of the model. */ + public name: string; + /** Type of the model. */ + public modelType: string; + /** List of webhooks associated with the model. */ + public webhooks: ModelWebhook[]; + + constructor(serverResponse: StringDict) { + this.id = serverResponse["id"]; + this.name = serverResponse["name"]; + this.modelType = serverResponse["model_type"]; + this.webhooks = Array.isArray(serverResponse["webhooks"]) + ? serverResponse["webhooks"].map((w: StringDict) => new ModelWebhook(w)) + : []; + } + + /** + * String representation of the model. + */ + toString(): string { + return `:Name: ${this.name}\n:ID: ${this.id}\n:Model Type: ${this.modelType}`; + } +} diff --git a/src/v2/parsing/search/searchResponse.ts b/src/v2/parsing/search/searchResponse.ts new file mode 100644 index 000000000..2d994e5f4 --- /dev/null +++ b/src/v2/parsing/search/searchResponse.ts @@ -0,0 +1,38 @@ +import { StringDict } from "@/parsing/stringDict.js"; +import { BaseResponse } from "@/v2/parsing/baseResponse.js"; +import { SearchModel } from "./searchModel.js"; +import { PaginationMetadata } from "./paginationMetadata.js"; + +/** + * Response of the `v2/search/models` endpoint. + */ +export class SearchResponse extends BaseResponse { + /** List of all models matching the search query. */ + public models: SearchModel[]; + /** Pagination metadata. */ + public pagination: PaginationMetadata; + + constructor(serverResponse: StringDict) { + super(serverResponse); + this.models = Array.isArray(serverResponse["models"]) + ? serverResponse["models"].map((m: StringDict) => new SearchModel(m)) + : []; + this.pagination = new PaginationMetadata(serverResponse["pagination"]); + } + + /** + * String representation of the response. + */ + toString(): string { + const modelsBlock = this.models.length === 0 + ? "\n" + : this.models.map(m => + `* :Name: ${m.name}\n :ID: ${m.id}\n :Model Type: ${m.modelType}\n` + ).join(""); + return "Models\n######\n" + + modelsBlock + "\n" + + "Pagination Metadata\n" + + "###################\n" + + this.pagination.toString(); + } +} diff --git a/src/v2/product/classification/classificationClassifier.ts b/src/v2/product/classification/classificationClassifier.ts index 3344a1b39..77f59785b 100644 --- a/src/v2/product/classification/classificationClassifier.ts +++ b/src/v2/product/classification/classificationClassifier.ts @@ -1,5 +1,5 @@ import { StringDict } from "@/parsing/index.js"; -import { ExtractionResponse } from "@/v2/product/index.js"; +import { ExtractionResponse } from "@/v2/product/extraction/index.js"; /** * Document level classification. diff --git a/tests/cli/cli.spec.ts b/tests/cli/cli.spec.ts new file mode 100644 index 000000000..20a62510a --- /dev/null +++ b/tests/cli/cli.spec.ts @@ -0,0 +1,123 @@ +import path from "path"; +import assert from "node:assert/strict"; +import { describe, it, before, after, beforeEach, afterEach } from "node:test"; +import { MockAgent, setGlobalDispatcher } from "undici"; +import fs from "node:fs/promises"; + +import { buildCli } from "@/cli/index.js"; +import { V2_RESOURCE_PATH } from "../index.js"; + +const mockAgent = new MockAgent(); +setGlobalDispatcher(mockAgent); +const mockPool = mockAgent.get("https://v2-cli-host"); + +interface CapturedConsole { + stdout: string[]; + restore(): void; + text(): string; +} + +function captureConsole(): CapturedConsole { + const stdout: string[] = []; + const orig = console.log; + console.log = (...args: unknown[]) => { + stdout.push(args.map(String).join(" ")); + }; + return { + stdout, + restore() { + console.log = orig; + }, + text() { + return stdout.join("\n"); + }, + }; +} + +describe("Mindee CLI - end-to-end with mocked HTTP", () => { + before(() => { + process.env.MINDEE_V2_API_KEY = "dummy"; + process.env.MINDEE_V2_API_HOST = "v2-cli-host"; + process.env.MINDEE_API_KEY = "dummy-v1"; + }); + + after(() => { + delete process.env.MINDEE_V2_API_KEY; + delete process.env.MINDEE_V2_API_HOST; + delete process.env.MINDEE_API_KEY; + }); + + let capture: CapturedConsole; + beforeEach(() => { + capture = captureConsole(); + }); + afterEach(() => { + capture.restore(); + }); + + describe("search-models", () => { + it("renders the human summary by default", async () => { + mockPool + .intercept({ path: /\/v2\/search\/models/, method: "GET" }) + .reply( + 200, + await fs.readFile(path.join(V2_RESOURCE_PATH, "search/models.json"), { encoding: "utf-8" }) + ); + + const program = buildCli(); + await program.parseAsync( + ["search-models", "--api-key", "dummy"], + { from: "user" } + ); + + const output = capture.text(); + assert.match(output, /Models\n######\n/); + assert.match(output, /:Name: Extraction With Webhooks/); + assert.match(output, /Pagination Metadata\n###################\n/); + }); + + it("renders raw JSON when --raw-json is passed", async () => { + mockPool + .intercept({ path: /\/v2\/search\/models/, method: "GET" }) + .reply( + 200, + await fs.readFile(path.join(V2_RESOURCE_PATH, "search/models.json"), { encoding: "utf-8" }) + ); + + const program = buildCli(); + await program.parseAsync( + ["search-models", "--api-key", "dummy", "--raw-json"], + { from: "user" } + ); + + const output = capture.text(); + const parsed = JSON.parse(output); + assert.ok(Array.isArray(parsed.models)); + assert.strictEqual(parsed.models.length, 5); + }); + + it("forwards --name and --model-type as query parameters", async () => { + mockPool + .intercept({ + path: "/v2/search/models?name=invoice&model_type=extraction", + method: "GET", + }) + .reply( + 200, + JSON.parse('{"models":[],"pagination":{"per_page":50,"page":1,"total_items":0,"total_pages":0}}') + ); + + const program = buildCli(); + await program.parseAsync( + ["search-models", + "--api-key", "dummy", + "--name", "invoice", + "--model-type", "extraction"], + { from: "user" } + ); + + const output = capture.text(); + assert.match(output, /Pagination Metadata/); + }); + }); +}); diff --git a/tests/cli/commandTree.spec.ts b/tests/cli/commandTree.spec.ts new file mode 100644 index 000000000..71f3c58bc --- /dev/null +++ b/tests/cli/commandTree.spec.ts @@ -0,0 +1,167 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { buildCli } from "@/cli/index.js"; + +/** + * Structural / help-text checks for the unified `mindee` CLI. + * + * These tests exercise the command tree built by `buildCli()` without + * actually invoking the network — they validate the CLI surface alone. + */ +describe("Mindee CLI - command tree", () => { + it("registers all V2 top-level commands plus the `v1` group", () => { + const program = buildCli(); + const names = program.commands.map(c => c.name()).sort(); + assert.deepStrictEqual(names, [ + "classification", + "crop", + "extraction", + "ocr", + "search-models", + "split", + "v1", + ]); + }); + + it("registers all V1 product subcommands under `v1`", () => { + const program = buildCli(); + const v1 = program.commands.find(c => c.name() === "v1"); + assert.ok(v1, "v1 group should exist"); + const v1Names = v1.commands.map(c => c.name()).sort(); + assert.deepStrictEqual(v1Names, [ + "barcode-reader", + "cropper", + "driver-license", + "financial-document", + "fr-bank-account-details", + "fr-carte-grise", + "fr-carte-nationale-d-identite", + "generated", + "international-id", + "invoice", + "invoice-splitter", + "multi-receipts-detector", + "passport", + "receipt", + "resume", + "us-bank-check", + ]); + }); + + it("exposes a top-level `--verbose` option", () => { + const program = buildCli(); + const optionNames = program.options.map(o => o.long); + assert.ok(optionNames.includes("--verbose")); + }); + + describe("extraction command", () => { + it("exposes the canonical V2 inference options", () => { + const program = buildCli(); + const extraction = program.commands.find(c => c.name() === "extraction"); + assert.ok(extraction); + const longs = extraction.options.map(o => o.long); + for (const expected of [ + "--api-key", + "--model-id", + "--alias", + "--rag", + "--raw-text", + "--confidence", + "--polygon", + "--text-context", + "--output", + ]) { + assert.ok(longs.includes(expected), `extraction must expose ${expected}`); + } + }); + + it("requires `--model-id`", () => { + const program = buildCli(); + const extraction = program.commands.find(c => c.name() === "extraction"); + assert.ok(extraction); + const modelId = extraction.options.find(o => o.long === "--model-id"); + assert.ok(modelId); + assert.strictEqual(modelId.required, true); + }); + }); + + describe("non-extraction V2 commands", () => { + for (const name of ["classification", "crop", "ocr", "split"]) { + it(`${name} does not expose extraction-only flags`, () => { + const program = buildCli(); + const cmd = program.commands.find(c => c.name() === name); + assert.ok(cmd); + const longs = cmd.options.map(o => o.long); + for (const flag of ["--rag", "--raw-text", "--confidence", "--polygon", "--text-context"]) { + assert.ok(!longs.includes(flag), `${name} must NOT expose ${flag}`); + } + assert.ok(longs.includes("--model-id")); + assert.ok(longs.includes("--output")); + }); + } + }); + + describe("search-models command", () => { + it("exposes the canonical flags", () => { + const program = buildCli(); + const search = program.commands.find(c => c.name() === "search-models"); + assert.ok(search); + const longs = search.options.map(o => o.long).sort(); + assert.deepStrictEqual(longs, [ + "--api-key", + "--model-type", + "--name", + "--raw-json", + ]); + }); + + it("does not take a path argument", () => { + const program = buildCli(); + const search = program.commands.find(c => c.name() === "search-models"); + assert.ok(search); + assert.strictEqual(search.registeredArguments.length, 0); + }); + }); + + describe("V1 commands", () => { + function v1(name: string) { + const program = buildCli(); + const group = program.commands.find(c => c.name() === "v1"); + assert.ok(group); + const cmd = group.commands.find(c => c.name() === name); + assert.ok(cmd, `v1 ${name} should exist`); + return cmd; + } + + it("invoice exposes all-words, full-text and async flags", () => { + const longs = v1("invoice").options.map(o => o.long); + for (const flag of ["--api-key", "--all-words", "--full-text", "--async", "--output"]) { + assert.ok(longs.includes(flag), `invoice must expose ${flag}`); + } + }); + + it("barcode-reader is sync-only and does not expose --async or --all-words", () => { + const longs = v1("barcode-reader").options.map(o => o.long); + assert.ok(!longs.includes("--async")); + assert.ok(!longs.includes("--all-words")); + assert.ok(!longs.includes("--full-text")); + assert.ok(longs.includes("--output")); + }); + + it("driver-license is async-only", () => { + const longs = v1("driver-license").options.map(o => o.long); + assert.ok(!longs.includes("--async")); + }); + + it("generated requires endpoint and account", () => { + const cmd = v1("generated"); + const endpoint = cmd.options.find(o => o.long === "--endpoint"); + const account = cmd.options.find(o => o.long === "--account"); + assert.ok(endpoint); + assert.ok(account); + assert.strictEqual(endpoint.required, true); + assert.strictEqual(account.required, true); + }); + }); +}); diff --git a/tests/dependency/missingDependencies.spec.ts b/tests/dependency/missingDependencies.spec.ts index 9073daeea..dd6de3232 100644 --- a/tests/dependency/missingDependencies.spec.ts +++ b/tests/dependency/missingDependencies.spec.ts @@ -1,5 +1,5 @@ import assert from "node:assert/strict"; -import { describe, it } from "node:test"; +import { describe, it } from "mocha"; describe("MindeeV1 - Optional Dependencies #OptionalDepsRemoved", function () { diff --git a/tests/test_v2_cli.sh b/tests/test_v2_cli.sh index 6a07b1d8c..c5043e8b7 100755 --- a/tests/test_v2_cli.sh +++ b/tests/test_v2_cli.sh @@ -14,27 +14,32 @@ do if echo "${f}" | grep -q "v2_classification.txt" then - node ./dist/bin/mindeeV2.js -d classification -m "${MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID}" "${TEST_FILE}" + node ./dist/bin/mindee.js --verbose classification -m "${MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID}" "${TEST_FILE}" fi if echo "${f}" | grep -q "v2_crop.txt" then - node ./dist/bin/mindeeV2.js -d crop -m "${MINDEE_V2_SE_TESTS_CROP_MODEL_ID}" "${TEST_FILE}" + node ./dist/bin/mindee.js --verbose crop -m "${MINDEE_V2_SE_TESTS_CROP_MODEL_ID}" "${TEST_FILE}" fi if echo "${f}" | grep -q "v2_extraction.txt" then - node ./dist/bin/mindeeV2.js -d extraction -m "${MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID}" "${TEST_FILE}" + node ./dist/bin/mindee.js --verbose extraction -m "${MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID}" "${TEST_FILE}" fi if echo "${f}" | grep -q "v2_ocr.txt" then - node ./dist/bin/mindeeV2.js -d ocr -m "${MINDEE_V2_SE_TESTS_OCR_MODEL_ID}" "${TEST_FILE}" + node ./dist/bin/mindee.js --verbose ocr -m "${MINDEE_V2_SE_TESTS_OCR_MODEL_ID}" "${TEST_FILE}" fi if echo "${f}" | grep -q "v2_split.txt" then - node ./dist/bin/mindeeV2.js -d split -m "${MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID}" "${TEST_FILE}" + node ./dist/bin/mindee.js --verbose split -m "${MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID}" "${TEST_FILE}" + fi + + if echo "${f}" | grep -q "v2_search_models.txt" + then + node ./dist/bin/mindee.js --verbose search-models fi sleep 0.5 # avoid too many request errors diff --git a/tests/v2/client/searchModels.spec.ts b/tests/v2/client/searchModels.spec.ts new file mode 100644 index 000000000..4d1415c57 --- /dev/null +++ b/tests/v2/client/searchModels.spec.ts @@ -0,0 +1,70 @@ +import path from "path"; +import assert from "node:assert/strict"; +import { after, before, beforeEach, describe, it } from "node:test"; +import { MockAgent, setGlobalDispatcher } from "undici"; +import fs from "node:fs/promises"; + +import { Client } from "@/index.js"; +import { MindeeHttpErrorV2 } from "@/v2/http/index.js"; +import { V2_RESOURCE_PATH } from "../../index.js"; + +/** + * Standalone test file (separate from `client.spec.ts`) to avoid + * leftover catch-all interceptors registered there leaking into these + * tests. We use our own MockAgent + host so the registered interceptors + * are matched deterministically. + */ +const mockAgent = new MockAgent(); +setGlobalDispatcher(mockAgent); +const mockPool = mockAgent.get("https://v2-search-host"); + +describe("MindeeV2 - Client.searchModels", () => { + let client: Client; + + before(() => { + process.env.MINDEE_V2_API_KEY = "dummy"; + process.env.MINDEE_V2_API_HOST = "v2-search-host"; + }); + + after(() => { + delete process.env.MINDEE_V2_API_KEY; + delete process.env.MINDEE_V2_API_HOST; + }); + + beforeEach(() => { + client = new Client({ apiKey: "dummy", dispatcher: mockAgent }); + }); + + it("returns a fully-formed SearchResponse", async () => { + mockPool + .intercept({ path: /\/v2\/search\/models/, method: "GET" }) + .reply( + 200, + await fs.readFile(path.join(V2_RESOURCE_PATH, "search/models.json"), { encoding: "utf-8" }) + ); + + const resp = await client.searchModels("extraction", "extraction"); + assert.strictEqual(resp.models.length, 5); + assert.strictEqual(resp.models[0].name, "Extraction With Webhooks"); + assert.strictEqual(resp.models[0].webhooks.length, 2); + assert.strictEqual(resp.pagination.totalItems, 5); + }); + + it("rejects with MindeeHttpErrorV2 on 401", async () => { + mockPool + .intercept({ path: /\/v2\/search\/models/, method: "GET" }) + .reply( + 401, + { status: 401, detail: "unauthorized", title: "Unauthorized", code: "401-001" } + ); + + await assert.rejects( + client.searchModels(), + (error: unknown) => { + assert.ok(error instanceof MindeeHttpErrorV2); + assert.strictEqual(error.status, 401); + return true; + } + ); + }); +}); diff --git a/tests/v2/parsing/search.spec.ts b/tests/v2/parsing/search.spec.ts new file mode 100644 index 000000000..61aaa2665 --- /dev/null +++ b/tests/v2/parsing/search.spec.ts @@ -0,0 +1,80 @@ +import path from "path"; +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { LocalResponse, SearchResponse } from "@/v2/index.js"; +import { V2_RESOURCE_PATH } from "../../index.js"; + +const searchPath = path.join(V2_RESOURCE_PATH, "search"); + +async function loadSearchResponse(resourcePath: string): Promise { + const localResponse = new LocalResponse(resourcePath); + await localResponse.init(); + return localResponse.deserializeResponse(SearchResponse); +} + +describe("MindeeV2 - Search Models Response", async () => { + it("should load a search response with models and pagination", async () => { + const response = await loadSearchResponse( + path.join(searchPath, "models.json") + ); + + assert.ok(response); + assert.strictEqual(response.models.length, 5); + assert.strictEqual(response.pagination.totalItems, 5); + assert.strictEqual(response.pagination.page, 1); + assert.strictEqual(response.pagination.perPage, 50); + assert.strictEqual(response.pagination.totalPages, 1); + + const first = response.models[0]; + assert.strictEqual(first.name, "Extraction With Webhooks"); + assert.strictEqual(first.id, "afde5151-aa11-aa11-9289-fa04e50ca3b9"); + assert.strictEqual(first.modelType, "extraction"); + assert.strictEqual(first.webhooks.length, 2); + assert.strictEqual(first.webhooks[0].id, "a2286ed9-aa11-aa11-bdc5-2f8496c5641a"); + assert.strictEqual(first.webhooks[0].name, "FAILURE"); + assert.strictEqual(first.webhooks[0].url, "https://failure.mindee.com"); + + const last = response.models[response.models.length - 1]; + assert.strictEqual(last.name, "Extraction Without Webhooks Key"); + assert.strictEqual(last.id, "e14e0923-ee55-ee55-a335-8d2110917d7b"); + assert.deepStrictEqual(last.webhooks, []); + }); + + it("should render a human-readable summary", async () => { + const response = await loadSearchResponse( + path.join(searchPath, "models.json") + ); + const rendered = response.toString(); + + assert.match(rendered, /^Models\n######\n/); + assert.match(rendered, /\* :Name: Extraction With Webhooks/); + assert.match(rendered, /:ID: afde5151-aa11-aa11-9289-fa04e50ca3b9/); + assert.match(rendered, /:Model Type: extraction/); + assert.match(rendered, /Pagination Metadata\n###################\n/); + assert.match(rendered, /:Per Page: 50/); + assert.match(rendered, /:Page: 1/); + assert.match(rendered, /:Total Items: 5/); + assert.match(rendered, /:Total Pages: 1/); + }); + + it("should expose the raw HTTP payload", async () => { + const response = await loadSearchResponse( + path.join(searchPath, "models.json") + ); + const raw = response.getRawHttp(); + assert.ok(Array.isArray(raw["models"])); + assert.strictEqual(raw["models"].length, 5); + }); + + it("should handle empty models gracefully via direct instantiation", () => { + const response = new SearchResponse( + JSON.parse( + '{"models":[],"pagination":{"per_page":50,"page":1,"total_items":0,"total_pages":0}}' + ) + ); + assert.strictEqual(response.models.length, 0); + assert.strictEqual(response.pagination.totalItems, 0); + assert.match(response.toString(), /Models\n######\n\n/); + }); +});