Press n or j to go to the next uncovered block, b, p or k for the previous block.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | 1x 1x 1x 1x 16x 16x 5x 5x 5x 5x 5x 5x 2x 2x 1x 1x 1x 1x 1x 1x 1x 1x 4x 4x 4x 7x 7x 4x 7x 4x 4x 15x 15x 15x 15x 15x 15x 15x 13x 13x 13x 2x 2x | import { TestCase, TestSuite, TestResult, TestRunResult, AgentLabConfig, } from "./types"; import { createEvaluator } from "./evaluator"; import { parseTestFile } from "./parser"; import * as glob from "glob"; import * as path from "path"; /** * Test runner for executing test suites */ export class TestRunner { private config: AgentLabConfig; private evaluator; constructor(config: AgentLabConfig) { this.config = config; this.evaluator = createEvaluator(config); } /** * Find all test files matching the pattern */ async findTestFiles(): Promise<string[]> { const patterns = this.config.testMatch || [ "**/*.test.yaml", "**/*.test.yml", ]; const files: string[] = []; for (const pattern of patterns) { const matches = glob.sync(pattern, { cwd: process.cwd(), absolute: true, ignore: ["**/node_modules/**", "**/dist/**", "**/build/**"], }); files.push(...matches); } return [...new Set(files)]; // Remove duplicates } /** * Run all test suites */ async runAll(): Promise<TestRunResult[]> { const testFiles = await this.findTestFiles(); if (testFiles.length === 0) { throw new Error("No test files found"); } const results: TestRunResult[] = []; for (const file of testFiles) { const result = await this.runFile(file); results.push(result); } return results; } /** * Run a specific test file */ async runFile(filePath: string): Promise<TestRunResult> { const suite = parseTestFile(filePath); return this.runSuite(suite); } /** * Run a test suite */ async runSuite(suite: TestSuite): Promise<TestRunResult> { const startTime = Date.now(); const results: TestResult[] = []; for (const test of suite.tests) { const result = await this.runTest(test); results.push(result); } const duration = Date.now() - startTime; const passed = results.filter((r) => r.passed).length; const failed = results.length - passed; return { suiteName: suite.suite, totalTests: results.length, passed, failed, duration, results, }; } /** * Run a single test case */ async runTest(testCase: TestCase): Promise<TestResult> { const startTime = Date.now(); try { // Set timeout const timeout = testCase.timeout || this.config.timeout || 30000; // Call the user's agent const agentPromise = this.config.agent(testCase.input); const timeoutPromise = new Promise<never>((_, reject) => setTimeout(() => reject(new Error("Test timeout")), timeout) ); const actualResponse = (await Promise.race([ agentPromise, timeoutPromise, ])) as string | Record<string, any>; // Evaluate the response using LLM const evaluation = await this.evaluator.evaluate( testCase.input, actualResponse, testCase.expectedBehavior, testCase.exampleResponses ); const duration = Date.now() - startTime; return { testName: testCase.name, passed: evaluation.passed, actualResponse, evaluationReasoning: evaluation.reasoning, duration, }; } catch (error: any) { const duration = Date.now() - startTime; return { testName: testCase.name, passed: false, actualResponse: "", error: error.message, duration, }; } } } |