Comprehensive testing approaches for AI agents with governance monitoring
// test-setup.js
import { AgentMonitor } from '@agent-governance/node';
// Test configuration with isolated settings
const createTestMonitor = (overrides = {}) => {
return new AgentMonitor({
apiKey: process.env.TEST_AGENT_GOVERNANCE_API_KEY || 'test-api-key',
organizationId: process.env.TEST_AGENT_GOVERNANCE_ORG_ID || 'test-org',
endpoint: process.env.TEST_ENDPOINT || 'http://localhost:3001',
environment: 'testing',
enableComplianceChecks: true,
enableLogging: false, // Reduce noise in tests
batchSize: 1, // Immediate flushing for predictable test behavior
flushInterval: 100,
retryAttempts: 1, // Fail fast in tests
retryDelay: 10,
...overrides
});
};
// Test agent configuration
const createTestAgent = (overrides = {}) => ({
id: 'test-agent-' + Date.now(),
name: 'Test Banking Agent',
category: 'tool_calling',
specialty: 'personal_banking',
version: '1.0.0',
llmProvider: 'anthropic',
model: 'claude-3-5-sonnet-20241022',
description: 'Test agent for banking operations',
complianceSettings: {
sr11_7_enabled: true,
fair_lending_monitoring: true,
bsa_aml_checks: true
},
...overrides
});
// Test data generators
const generateTestSession = () => `test-session-${Date.now()}-${Math.random().toString(36).slice(2)}`;
const generateTestUser = () => `test-user-${Date.now()}`;
module.exports = {
createTestMonitor,
createTestAgent,
generateTestSession,
generateTestUser
};
// test-database.js
class TestDatabase {
constructor() {
this.events = [];
this.violations = [];
this.agents = [];
}
// Mock database operations for testing
async saveEvent(event) {
this.events.push({ ...event, id: this.events.length + 1 });
}
async saveViolation(violation) {
this.violations.push({ ...violation, id: this.violations.length + 1 });
}
async getEventsForSession(sessionId) {
return this.events.filter(e => e.sessionId === sessionId);
}
async getViolationsForAgent(agentId) {
return this.violations.filter(v => v.agentId === agentId);
}
async clear() {
this.events = [];
this.violations = [];
this.agents = [];
}
// Test utilities
getEventCount() {
return this.events.length;
}
getViolationCount() {
return this.violations.length;
}
}
module.exports = { TestDatabase };
// tests/unit/agent-monitor.test.js
import { describe, it, beforeEach, afterEach, expect } from '@jest/globals';
import { createTestMonitor, createTestAgent, generateTestSession } from '../test-setup';
describe('AgentMonitor', () => {
let monitor;
let agent;
let sessionId;
beforeEach(async () => {
monitor = createTestMonitor();
agent = createTestAgent();
sessionId = generateTestSession();
await monitor.registerAgent(agent);
});
afterEach(async () => {
if (monitor) {
await monitor.shutdown();
}
});
describe('Event Tracking', () => {
it('should track conversation lifecycle events', async () => {
// Track conversation start
monitor.trackConversationStart(agent.id, sessionId);
// Track user message
monitor.trackUserMessage(agent.id, sessionId, 'Hello, I need help with my account');
// Track agent response
monitor.trackAgentResponse(agent.id, sessionId, 'I can help you with your account. What do you need?');
// Track conversation end
monitor.trackConversationEnd(agent.id, sessionId, {
duration: 30000,
messageCount: 2,
resolutionStatus: 'resolved'
});
// Verify events were tracked
await monitor.flush();
// This would integrate with your test database
const events = await getEventsForSession(sessionId);
expect(events).toHaveLength(4);
expect(events.map(e => e.interactionType)).toEqual([
'conversation_start',
'user_message',
'agent_response',
'conversation_end'
]);
});
it('should track tool calls with results', async () => {
const toolParams = { accountId: 'test-account-123' };
const toolResult = { balance: 1250.50, currency: 'USD' };
const executionTime = 150;
monitor.trackToolCall(
agent.id,
sessionId,
'get_account_balance',
toolParams,
toolResult,
executionTime
);
await monitor.flush();
const events = await getEventsForSession(sessionId);
const toolCallEvent = events.find(e => e.interactionType === 'tool_call');
const toolResultEvent = events.find(e => e.interactionType === 'tool_result');
expect(toolCallEvent).toBeDefined();
expect(toolCallEvent.toolName).toBe('get_account_balance');
expect(toolCallEvent.toolParameters).toEqual(toolParams);
expect(toolResultEvent).toBeDefined();
expect(toolResultEvent.toolResult).toEqual(toolResult);
expect(toolResultEvent.executionTime).toBe(executionTime);
});
it('should handle error tracking', async () => {
const error = new Error('Database connection failed');
const errorMetadata = {
errorType: 'DatabaseError',
severity: 'high',
recoverable: false
};
monitor.trackError(agent.id, sessionId, error, errorMetadata);
await monitor.flush();
const events = await getEventsForSession(sessionId);
const errorEvent = events.find(e => e.interactionType === 'error');
expect(errorEvent).toBeDefined();
expect(errorEvent.errorMessage).toBe('Database connection failed');
expect(errorEvent.errorType).toBe('DatabaseError');
expect(errorEvent.severity).toBe('high');
});
});
describe('Configuration Validation', () => {
it('should validate required configuration fields', () => {
expect(() => {
new AgentMonitor({
// Missing required fields
apiKey: '',
organizationId: ''
});
}).toThrow('apiKey is required');
});
it('should apply default configuration values', () => {
const monitor = createTestMonitor();
// Test that defaults are applied
expect(monitor.config.batchSize).toBeDefined();
expect(monitor.config.flushInterval).toBeDefined();
expect(monitor.config.environment).toBeDefined();
});
it('should validate batch size limits', () => {
expect(() => {
createTestMonitor({ batchSize: 0 });
}).toThrow('Number must be greater than or equal to 1');
expect(() => {
createTestMonitor({ batchSize: 2000 });
}).toThrow('Number must be less than or equal to 1000');
});
});
});
// tests/unit/compliance-engine.test.js
import { describe, it, beforeEach, expect } from '@jest/globals';
import { ComplianceEngine } from '@agent-governance/node';
describe('ComplianceEngine', () => {
let engine;
let mockLogger;
beforeEach(() => {
mockLogger = {
debug: jest.fn(),
info: jest.fn(),
warn: jest.fn(),
error: jest.fn()
};
engine = new ComplianceEngine({
organizationId: 'test-org',
logger: mockLogger
});
});
describe('PII Detection', () => {
it('should detect SSN patterns', () => {
const context = {
agentId: 'test-agent',
agentCategory: 'tool_calling',
sessionId: 'test-session',
timestamp: Date.now(),
agentResponse: 'Your Social Security Number is 123-45-6789.'
};
const result = engine.evaluateInteraction(context);
expect(result.isCompliant).toBe(false);
expect(result.violations).toHaveLength(1);
expect(result.violations[0].rule).toBe('ssn-validation');
expect(result.violations[0].severity).toBe('violation');
expect(result.riskScore).toBeGreaterThan(0);
});
it('should detect email addresses', () => {
const context = {
agentId: 'test-agent',
agentCategory: 'tool_calling',
sessionId: 'test-session',
timestamp: Date.now(),
agentResponse: 'Contact us at hello@aiagentshouse.com for assistance.'
};
const result = engine.evaluateInteraction(context);
expect(result.isCompliant).toBe(false);
expect(result.violations.some(v => v.description.includes('Email'))).toBe(true);
});
it('should detect phone numbers', () => {
const context = {
agentId: 'test-agent',
agentCategory: 'tool_calling',
sessionId: 'test-session',
timestamp: Date.now(),
agentResponse: 'Please call us at (555) 123-4567.'
};
const result = engine.evaluateInteraction(context);
expect(result.isCompliant).toBe(false);
expect(result.violations.some(v => v.description.includes('phone'))).toBe(true);
});
it('should not flag non-PII numbers', () => {
const context = {
agentId: 'test-agent',
agentCategory: 'tool_calling',
sessionId: 'test-session',
timestamp: Date.now(),
agentResponse: 'Your account balance is $1,234.56 and your account number ends in 5678.'
};
const result = engine.evaluateInteraction(context);
expect(result.isCompliant).toBe(true);
expect(result.violations).toHaveLength(0);
});
});
describe('Fair Lending Detection', () => {
it('should detect discriminatory language', () => {
const context = {
agentId: 'test-agent',
agentCategory: 'tool_calling',
sessionId: 'test-session',
timestamp: Date.now(),
agentResponse: 'People like you typically have trouble getting approved.'
};
const result = engine.evaluateInteraction(context);
expect(result.isCompliant).toBe(false);
expect(result.violations.some(v => v.rule === 'fair-lending-keywords')).toBe(true);
});
it('should allow appropriate banking language', () => {
const context = {
agentId: 'test-agent',
agentCategory: 'tool_calling',
sessionId: 'test-session',
timestamp: Date.now(),
agentResponse: 'Based on your credit score and income, you qualify for our premium rates.'
};
const result = engine.evaluateInteraction(context);
expect(result.isCompliant).toBe(true);
});
});
describe('Custom Rules', () => {
it('should allow adding custom compliance rules', () => {
const customRule = {
id: 'test-custom-rule',
name: 'Test Custom Rule',
description: 'Test rule for validation',
category: 'custom',
severity: 'warning',
isActive: true,
ruleFunction: (context) => ({
isCompliant: !context.agentResponse?.includes('test-keyword'),
violations: context.agentResponse?.includes('test-keyword') ? [{
rule: 'test-custom-rule',
severity: 'warning',
description: 'Test keyword detected'
}] : [],
riskScore: context.agentResponse?.includes('test-keyword') ? 25 : 0,
requiresReview: false
})
};
engine.addRule(customRule);
const context = {
agentId: 'test-agent',
agentCategory: 'tool_calling',
sessionId: 'test-session',
timestamp: Date.now(),
agentResponse: 'This contains test-keyword in the response.'
};
const result = engine.evaluateInteraction(context);
expect(result.violations.some(v => v.rule === 'test-custom-rule')).toBe(true);
});
it('should allow deactivating rules', () => {
const ruleId = 'privacy-pii-detection';
// Deactivate the rule
engine.setRuleActive(ruleId, false);
const context = {
agentId: 'test-agent',
agentCategory: 'tool_calling',
sessionId: 'test-session',
timestamp: Date.now(),
agentResponse: 'Contact john@example.com for assistance.'
};
const result = engine.evaluateInteraction(context);
// Should not detect violations from inactive rule
expect(result.violations.some(v => v.rule === ruleId)).toBe(false);
});
});
});
// tests/integration/anthropic-integration.test.js
import { describe, it, beforeEach, afterEach, expect } from '@jest/globals';
import { AnthropicAgentMonitor } from '@agent-governance/node';
import { createTestAgent } from '../test-setup';
// Mock Anthropic SDK for testing
jest.mock('@anthropic-ai/sdk');
describe('Anthropic Integration', () => {
let monitor;
let mockAnthropic;
let agent;
beforeEach(async () => {
monitor = new AnthropicAgentMonitor({
apiKey: 'test-api-key',
organizationId: 'test-org',
environment: 'testing'
});
agent = createTestAgent();
await monitor.registerAgent(agent);
// Mock Anthropic client
mockAnthropic = {
messages: {
create: jest.fn()
}
};
});
afterEach(async () => {
if (monitor) {
await monitor.shutdown();
}
});
it('should wrap Anthropic client and track interactions', async () => {
const mockResponse = {
content: [{ type: 'text', text: 'Hello! How can I help you today?' }],
usage: { input_tokens: 10, output_tokens: 8 },
model: 'claude-3-5-sonnet-20241022'
};
mockAnthropic.messages.create.mockResolvedValue(mockResponse);
const wrappedClient = monitor.wrapAnthropic(mockAnthropic, agent.id);
const response = await wrappedClient.messages.create({
model: 'claude-3-5-sonnet-20241022',
messages: [{ role: 'user', content: 'Hello' }],
sessionId: 'test-session-123'
});
expect(response).toEqual(mockResponse);
expect(mockAnthropic.messages.create).toHaveBeenCalledWith({
model: 'claude-3-5-sonnet-20241022',
messages: [{ role: 'user', content: 'Hello' }]
});
// Verify tracking occurred
await monitor.flush();
// Additional assertions would verify events were tracked
});
it('should handle tool use in responses', async () => {
const mockResponse = {
content: [
{ type: 'text', text: 'I need to check your account balance.' },
{
type: 'tool_use',
id: 'tool_123',
name: 'get_account_balance',
input: { accountId: 'acc_456' }
}
],
usage: { input_tokens: 15, output_tokens: 25 }
};
mockAnthropic.messages.create.mockResolvedValue(mockResponse);
const wrappedClient = monitor.wrapAnthropic(mockAnthropic, agent.id);
await wrappedClient.messages.create({
model: 'claude-3-5-sonnet-20241022',
messages: [{ role: 'user', content: 'What is my balance?' }],
sessionId: 'test-session-456'
});
await monitor.flush();
// Verify tool call was tracked
// Additional assertions would verify tool call event
});
it('should track errors during API calls', async () => {
const error = new Error('API timeout');
mockAnthropic.messages.create.mockRejectedValue(error);
const wrappedClient = monitor.wrapAnthropic(mockAnthropic, agent.id);
await expect(wrappedClient.messages.create({
model: 'claude-3-5-sonnet-20241022',
messages: [{ role: 'user', content: 'Hello' }],
sessionId: 'test-session-error'
})).rejects.toThrow('API timeout');
await monitor.flush();
// Verify error was tracked
// Additional assertions would verify error event
});
});
// tests/integration/banking-workflow.test.js
import { describe, it, beforeEach, afterEach, expect } from '@jest/globals';
import { createTestMonitor, createTestAgent, generateTestSession } from '../test-setup';
describe('Banking Workflow Integration', () => {
let monitor;
let agent;
beforeEach(async () => {
monitor = createTestMonitor({
enableComplianceChecks: true,
batchSize: 1 // Immediate flushing for tests
});
agent = createTestAgent({
specialty: 'personal_banking',
complianceSettings: {
sr11_7_enabled: true,
fair_lending_monitoring: true,
bsa_aml_checks: true
}
});
await monitor.registerAgent(agent);
});
afterEach(async () => {
if (monitor) {
await monitor.shutdown();
}
});
it('should complete account balance inquiry workflow', async () => {
const sessionId = generateTestSession();
const userId = 'test-customer-123';
// Start conversation
monitor.trackConversationStart(agent.id, sessionId, userId, {
channel: 'web_chat',
customerTier: 'premium'
});
// User asks for balance
monitor.trackUserMessage(
agent.id,
sessionId,
'Can you tell me my checking account balance?',
userId
);
// Agent checks balance using tool
monitor.trackToolCall(
agent.id,
sessionId,
'get_account_balance',
{ accountId: 'acc_789', accountType: 'checking' },
{ balance: 2547.83, currency: 'USD', lastUpdated: new Date().toISOString() },
250
);
// Agent responds with balance
monitor.trackAgentResponse(
agent.id,
sessionId,
'Your current checking account balance is $2,547.83.',
{
llmLatency: 650,
tokensUsed: { input: 85, output: 23, total: 108 },
responseQuality: 95
}
);
// End conversation
monitor.trackConversationEnd(agent.id, sessionId, {
duration: 45000,
messageCount: 3,
userSatisfaction: 9,
resolutionStatus: 'resolved'
});
await monitor.flush();
// Verify workflow completion
const events = await getEventsForSession(sessionId);
expect(events).toHaveLength(5); // start, user_msg, tool_call, tool_result, agent_response, end
const eventTypes = events.map(e => e.interactionType);
expect(eventTypes).toContain('conversation_start');
expect(eventTypes).toContain('user_message');
expect(eventTypes).toContain('tool_call');
expect(eventTypes).toContain('tool_result');
expect(eventTypes).toContain('agent_response');
expect(eventTypes).toContain('conversation_end');
});
it('should detect and handle compliance violations', async () => {
const sessionId = generateTestSession();
const userId = 'test-customer-456';
monitor.trackConversationStart(agent.id, sessionId, userId);
// User message with PII
monitor.trackUserMessage(
agent.id,
sessionId,
'My SSN is 123-45-6789 and I need help with my account.',
userId
);
// Agent response that should trigger fair lending violation
monitor.trackAgentResponse(
agent.id,
sessionId,
'People like you typically need to provide additional documentation for loans.',
{
llmLatency: 750,
tokensUsed: { input: 95, output: 35, total: 130 }
}
);
await monitor.flush();
// Verify compliance violations were detected
const violations = await getViolationsForSession(sessionId);
expect(violations.length).toBeGreaterThan(0);
const violationTypes = violations.map(v => v.category);
expect(violationTypes).toContain('privacy'); // For SSN in user message
expect(violationTypes).toContain('fair_lending'); // For discriminatory language
});
it('should handle high-risk transaction scenarios', async () => {
const sessionId = generateTestSession();
const userId = 'test-customer-789';
monitor.trackConversationStart(agent.id, sessionId, userId);
// User asks about large cash deposit
monitor.trackUserMessage(
agent.id,
sessionId,
'I want to deposit $9,500 in cash. Can you help me avoid any reporting requirements?',
userId
);
// Track fraud detection check
monitor.trackToolCall(
agent.id,
sessionId,
'check_suspicious_activity',
{
amount: 9500,
paymentMethod: 'cash',
customerRequest: 'avoid reporting',
customerId: userId
},
{
riskScore: 85,
flags: ['large_cash_amount', 'avoid_reporting_request', 'structuring_potential'],
recommendation: 'escalate_to_compliance'
},
1200
);
// Agent responds appropriately
monitor.trackAgentResponse(
agent.id,
sessionId,
'I need to inform you that cash deposits over certain amounts require reporting as required by federal law. Let me connect you with our compliance team.',
{
llmLatency: 890,
riskScore: 85,
requiresReview: true,
escalated: true
}
);
await monitor.flush();
// Verify BSA/AML violation was detected
const violations = await getViolationsForSession(sessionId);
const bsaViolations = violations.filter(v => v.category === 'bsa_aml');
expect(bsaViolations.length).toBeGreaterThan(0);
});
});
// tests/performance/load-test.js
import { describe, it, expect } from '@jest/globals';
import { createTestMonitor, createTestAgent } from '../test-setup';
describe('Performance Tests', () => {
describe('High Volume Event Tracking', () => {
it('should handle 1000 events efficiently', async () => {
const monitor = createTestMonitor({
batchSize: 100,
flushInterval: 1000
});
const agent = createTestAgent();
await monitor.registerAgent(agent);
const startTime = Date.now();
const eventCount = 1000;
const sessionIds = [];
// Generate multiple sessions
for (let i = 0; i < 10; i++) {
sessionIds.push(`load-test-session-${i}`);
}
// Track events rapidly
for (let i = 0; i < eventCount; i++) {
const sessionId = sessionIds[i % sessionIds.length];
if (i % 4 === 0) {
monitor.trackConversationStart(agent.id, sessionId);
} else if (i % 4 === 1) {
monitor.trackUserMessage(agent.id, sessionId, `Message ${i}`);
} else if (i % 4 === 2) {
monitor.trackAgentResponse(agent.id, sessionId, `Response ${i}`);
} else {
monitor.trackConversationEnd(agent.id, sessionId);
}
}
// Force flush and measure time
await monitor.flush();
const endTime = Date.now();
const duration = endTime - startTime;
console.log(`Processed ${eventCount} events in ${duration}ms`);
console.log(`Average: ${(duration / eventCount).toFixed(2)}ms per event`);
// Performance assertions
expect(duration).toBeLessThan(5000); // Should complete within 5 seconds
expect(duration / eventCount).toBeLessThan(5); // Less than 5ms per event
await monitor.shutdown();
});
it('should handle concurrent tracking from multiple agents', async () => {
const monitor = createTestMonitor({
batchSize: 50,
flushInterval: 500
});
const agentCount = 5;
const eventsPerAgent = 100;
const agents = [];
// Register multiple agents
for (let i = 0; i < agentCount; i++) {
const agent = createTestAgent({
id: `load-test-agent-${i}`,
name: `Load Test Agent ${i}`
});
await monitor.registerAgent(agent);
agents.push(agent);
}
const startTime = Date.now();
// Track events concurrently from all agents
const trackingPromises = agents.map(async (agent, agentIndex) => {
for (let i = 0; i < eventsPerAgent; i++) {
const sessionId = `agent-${agentIndex}-session-${Math.floor(i / 10)}`;
monitor.trackUserMessage(
agent.id,
sessionId,
`Concurrent message ${i} from agent ${agentIndex}`
);
}
});
await Promise.all(trackingPromises);
await monitor.flush();
const endTime = Date.now();
const duration = endTime - startTime;
const totalEvents = agentCount * eventsPerAgent;
console.log(`Processed ${totalEvents} events from ${agentCount} agents in ${duration}ms`);
// Performance assertions
expect(duration).toBeLessThan(10000); // Should complete within 10 seconds
await monitor.shutdown();
});
});
describe('Compliance Engine Performance', () => {
it('should evaluate compliance rules efficiently under load', async () => {
const monitor = createTestMonitor({
enableComplianceChecks: true,
batchSize: 1 // Immediate processing
});
const agent = createTestAgent();
await monitor.registerAgent(agent);
const testMessages = [
'Hello, how can I help you today?',
'Your account balance is $1,234.56',
'Please call us at (555) 123-4567', // Should trigger PII detection
'People like you typically qualify for our basic rates', // Should trigger fair lending
'You can avoid reporting by splitting the deposit' // Should trigger BSA/AML
];
const startTime = Date.now();
const iterations = 200;
for (let i = 0; i < iterations; i++) {
const sessionId = `compliance-load-test-${i}`;
const message = testMessages[i % testMessages.length];
monitor.trackAgentResponse(agent.id, sessionId, message);
}
await monitor.flush();
const endTime = Date.now();
const duration = endTime - startTime;
console.log(`Compliance evaluation: ${iterations} messages in ${duration}ms`);
console.log(`Average: ${(duration / iterations).toFixed(2)}ms per evaluation`);
// Performance assertions for compliance checking
expect(duration / iterations).toBeLessThan(10); // Less than 10ms per compliance check
await monitor.shutdown();
});
});
describe('Memory Usage', () => {
it('should maintain reasonable memory usage under load', async () => {
const monitor = createTestMonitor({
batchSize: 1000, // Large batch to test memory
flushInterval: 30000 // Long interval to accumulate events
});
const agent = createTestAgent();
await monitor.registerAgent(agent);
const initialMemory = process.memoryUsage();
console.log('Initial memory:', formatMemory(initialMemory));
// Generate large number of events
for (let i = 0; i < 5000; i++) {
const sessionId = `memory-test-session-${Math.floor(i / 100)}`;
monitor.trackUserMessage(agent.id, sessionId, `Test message ${i}`);
// Check memory every 1000 events
if (i % 1000 === 0) {
const currentMemory = process.memoryUsage();
console.log(`Memory at ${i} events:`, formatMemory(currentMemory));
}
}
const peakMemory = process.memoryUsage();
console.log('Peak memory:', formatMemory(peakMemory));
// Flush and check memory after cleanup
await monitor.flush();
// Force garbage collection if available
if (global.gc) {
global.gc();
}
const finalMemory = process.memoryUsage();
console.log('Final memory:', formatMemory(finalMemory));
// Memory assertions
const memoryIncrease = peakMemory.heapUsed - initialMemory.heapUsed;
const memoryPerEvent = memoryIncrease / 5000;
console.log(`Memory increase: ${formatBytes(memoryIncrease)}`);
console.log(`Memory per event: ${formatBytes(memoryPerEvent)}`);
// Should not use excessive memory per event
expect(memoryPerEvent).toBeLessThan(1024); // Less than 1KB per event
await monitor.shutdown();
});
});
});
// Utility functions
function formatMemory(memoryUsage) {
return {
rss: formatBytes(memoryUsage.rss),
heapTotal: formatBytes(memoryUsage.heapTotal),
heapUsed: formatBytes(memoryUsage.heapUsed),
external: formatBytes(memoryUsage.external)
};
}
function formatBytes(bytes) {
if (bytes === 0) return '0 Bytes';
const k = 1024;
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}
// tests/compliance/compliance-test-suite.js
import { describe, it, beforeEach, afterEach, expect } from '@jest/globals';
import { createTestMonitor, createTestAgent } from '../test-setup';
describe('Compliance Test Suite', () => {
let monitor;
let agent;
beforeEach(async () => {
monitor = createTestMonitor({
enableComplianceChecks: true,
batchSize: 1
});
agent = createTestAgent({
complianceSettings: {
sr11_7_enabled: true,
fair_lending_monitoring: true,
bsa_aml_checks: true
}
});
await monitor.registerAgent(agent);
});
afterEach(async () => {
if (monitor) {
await monitor.shutdown();
}
});
describe('PII Detection Test Cases', () => {
const piiTestCases = [
{
name: 'SSN Detection',
input: 'Your Social Security Number is 123-45-6789',
expectedViolations: ['ssn-validation'],
severity: 'violation'
},
{
name: 'Email Detection',
input: 'Contact john.doe@example.com for assistance',
expectedViolations: ['privacy-pii-detection'],
severity: 'violation'
},
{
name: 'Phone Number Detection',
input: 'Call us at (555) 123-4567 or 555.987.6543',
expectedViolations: ['privacy-pii-detection'],
severity: 'violation'
},
{
name: 'Multiple PII Types',
input: 'John Doe, SSN 987-65-4321, email john@test.com, phone (555) 111-2222',
expectedViolations: ['ssn-validation', 'privacy-pii-detection'],
severity: 'violation'
},
{
name: 'Safe Content',
input: 'Your account balance is $1,234.56 and ends in 5678',
expectedViolations: [],
severity: null
}
];
piiTestCases.forEach(testCase => {
it(`should handle ${testCase.name}`, async () => {
const sessionId = `pii-test-${Date.now()}`;
monitor.trackAgentResponse(agent.id, sessionId, testCase.input);
await monitor.flush();
const violations = await getViolationsForSession(sessionId);
if (testCase.expectedViolations.length === 0) {
expect(violations.filter(v => v.severity === 'violation')).toHaveLength(0);
} else {
const violationRules = violations.map(v => v.rule);
testCase.expectedViolations.forEach(expectedRule => {
expect(violationRules).toContain(expectedRule);
});
}
});
});
});
describe('Fair Lending Test Cases', () => {
const fairLendingTestCases = [
{
name: 'Explicit Discrimination',
input: 'People like you typically get denied for loans',
shouldViolate: true
},
{
name: 'Background-Based Discrimination',
input: 'Based on your background, you might want subprime options',
shouldViolate: true
},
{
name: 'Legitimate Credit Assessment',
input: 'Based on your credit score of 720, you qualify for our best rates',
shouldViolate: false
},
{
name: 'Income-Based Assessment',
input: 'With your income level, you qualify for loans up to $300,000',
shouldViolate: false
}
];
fairLendingTestCases.forEach(testCase => {
it(`should handle ${testCase.name}`, async () => {
const sessionId = `fair-lending-test-${Date.now()}`;
monitor.trackAgentResponse(agent.id, sessionId, testCase.input);
await monitor.flush();
const violations = await getViolationsForSession(sessionId);
const fairLendingViolations = violations.filter(v => v.category === 'fair_lending');
if (testCase.shouldViolate) {
expect(fairLendingViolations.length).toBeGreaterThan(0);
expect(fairLendingViolations[0].severity).toBe('violation');
} else {
expect(fairLendingViolations).toHaveLength(0);
}
});
});
});
describe('BSA/AML Test Cases', () => {
const bsaAmlTestCases = [
{
name: 'Structuring Advice',
input: 'You could split that into smaller amounts to avoid reporting',
shouldViolate: true,
indicators: ['structuring_advice']
},
{
name: 'Avoid Reporting Language',
input: 'Let me help you avoid those reporting requirements',
shouldViolate: true,
indicators: ['avoid_reporting']
},
{
name: 'Large Cash Mention',
input: 'That large cash deposit will need special handling',
shouldViolate: false // Just mentioning cash isn't violation
},
{
name: 'Legitimate Compliance Info',
input: 'Deposits over $10,000 require CTR filing as per federal law',
shouldViolate: false
}
];
bsaAmlTestCases.forEach(testCase => {
it(`should handle ${testCase.name}`, async () => {
const sessionId = `bsa-aml-test-${Date.now()}`;
monitor.trackAgentResponse(agent.id, sessionId, testCase.input);
await monitor.flush();
const violations = await getViolationsForSession(sessionId);
const bsaViolations = violations.filter(v => v.category === 'bsa_aml');
if (testCase.shouldViolate) {
expect(bsaViolations.length).toBeGreaterThan(0);
} else {
expect(bsaViolations).toHaveLength(0);
}
});
});
});
});
// Mock violation database for testing
let mockViolations = [];
async function getViolationsForSession(sessionId) {
return mockViolations.filter(v => v.sessionId === sessionId);
}
// Mock the compliance violation handler
function setupViolationTracking(monitor) {
monitor.on = jest.fn((event, handler) => {
if (event === 'complianceViolation') {
// Store handler for manual triggering in tests
monitor._violationHandler = handler;
}
});
}
// tests/compliance/regression-tests.js
import { describe, it, beforeEach, expect } from '@jest/globals';
import { createTestMonitor, createTestAgent } from '../test-setup';
describe('Compliance Regression Tests', () => {
let monitor;
let agent;
beforeEach(async () => {
monitor = createTestMonitor({ enableComplianceChecks: true });
agent = createTestAgent();
await monitor.registerAgent(agent);
});
afterEach(async () => {
if (monitor) {
await monitor.shutdown();
}
});
// Test cases based on previous false positives/negatives
describe('Historical False Positives', () => {
it('should not flag account numbers as SSNs', async () => {
const sessionId = `regression-test-${Date.now()}`;
// This was previously flagged as SSN but should not be
monitor.trackAgentResponse(
agent.id,
sessionId,
'Your account number ending in 123-45-6789 has been updated'
);
await monitor.flush();
const violations = await getViolationsForSession(sessionId);
const ssnViolations = violations.filter(v => v.rule === 'ssn-validation');
// Should not detect SSN in account number context
expect(ssnViolations).toHaveLength(0);
});
it('should not flag legitimate credit terms as discriminatory', async () => {
const sessionId = `regression-test-${Date.now()}`;
monitor.trackAgentResponse(
agent.id,
sessionId,
'Customers with excellent credit typically receive our lowest rates'
);
await monitor.flush();
const violations = await getViolationsForSession(sessionId);
const fairLendingViolations = violations.filter(v => v.category === 'fair_lending');
expect(fairLendingViolations).toHaveLength(0);
});
});
describe('Historical False Negatives', () => {
it('should detect masked discriminatory language', async () => {
const sessionId = `regression-test-${Date.now()}`;
// This was previously missed but should be caught
monitor.trackAgentResponse(
agent.id,
sessionId,
'Individuals from your demographic often face challenges with approval'
);
await monitor.flush();
const violations = await getViolationsForSession(sessionId);
const fairLendingViolations = violations.filter(v => v.category === 'fair_lending');
// Should detect this as potential discrimination
expect(fairLendingViolations.length).toBeGreaterThan(0);
});
it('should detect indirect structuring suggestions', async () => {
const sessionId = `regression-test-${Date.now()}`;
monitor.trackAgentResponse(
agent.id,
sessionId,
'You might consider making multiple smaller deposits over several days'
);
await monitor.flush();
const violations = await getViolationsForSession(sessionId);
const bsaViolations = violations.filter(v => v.category === 'bsa_aml');
expect(bsaViolations.length).toBeGreaterThan(0);
});
});
});
// tests/utils/test-data-generator.js
class TestDataGenerator {
static generateBankingScenarios() {
return {
accountInquiry: {
userMessages: [
'What is my account balance?',
'Can you show me my checking account?',
'I need to check my savings balance'
],
agentResponses: [
'Your checking account balance is $2,547.83',
'Let me retrieve your account information',
'I can help you check your balance'
]
},
loanApplication: {
userMessages: [
'I want to apply for a mortgage',
'Can I get a personal loan?',
'What are your loan rates?'
],
agentResponses: [
'I can help you with your loan application',
'Based on your credit score, you may qualify for...',
'Let me check what loan products are available'
]
},
suspiciousActivity: {
userMessages: [
'I need to deposit $9,500 in cash',
'Can I avoid reporting requirements?',
'How do I structure this transaction?'
],
agentResponses: [
'Large cash deposits require special handling',
'All transactions must comply with federal regulations',
'I need to inform you about reporting requirements'
]
}
};
}
static generatePIIExamples() {
return {
ssns: [
'123-45-6789',
'987-65-4321',
'555-00-1234'
],
emails: [
'john.doe@example.com',
'customer@bank.com',
'support@testbank.org'
],
phones: [
'(555) 123-4567',
'555.987.6543',
'555-111-2222'
],
safePhrases: [
'Your account ends in 1234',
'Reference number 567890',
'Transaction ID ABC123'
]
};
}
static generateComplianceViolations() {
return {
fairLending: [
'People like you typically have trouble getting approved',
'Your kind usually doesn\'t qualify for premium rates',
'Based on your background, consider subprime options'
],
bsaAml: [
'You could split this into smaller amounts',
'Let\'s help you avoid those reporting requirements',
'Try structuring it as multiple transactions'
],
legitimate: [
'Based on your credit score of 750, you qualify for our best rates',
'Your income level supports a loan up to $400,000',
'Federal law requires reporting for deposits over $10,000'
]
};
}
static generateRandomBankingConversation(length = 5) {
const scenarios = this.generateBankingScenarios();
const scenarioKeys = Object.keys(scenarios);
const conversation = [];
for (let i = 0; i < length; i++) {
const scenarioKey = scenarioKeys[Math.floor(Math.random() * scenarioKeys.length)];
const scenario = scenarios[scenarioKey];
if (i % 2 === 0) {
// User message
const userMessages = scenario.userMessages;
conversation.push({
type: 'user_message',
content: userMessages[Math.floor(Math.random() * userMessages.length)]
});
} else {
// Agent response
const agentResponses = scenario.agentResponses;
conversation.push({
type: 'agent_response',
content: agentResponses[Math.floor(Math.random() * agentResponses.length)]
});
}
}
return conversation;
}
static generateStressTestData(eventCount = 1000) {
const events = [];
const sessionCount = Math.floor(eventCount / 10);
for (let i = 0; i < eventCount; i++) {
const sessionId = `stress-test-session-${i % sessionCount}`;
const eventTypes = ['user_message', 'agent_response', 'tool_call'];
const eventType = eventTypes[i % eventTypes.length];
events.push({
sessionId,
eventType,
content: `Stress test content ${i}`,
timestamp: Date.now() + i
});
}
return events;
}
}
module.exports = { TestDataGenerator };
// tests/mocks/mock-services.js
class MockComplianceService {
constructor() {
this.violations = [];
this.rules = new Map();
}
addViolation(violation) {
this.violations.push({
...violation,
id: this.violations.length + 1,
timestamp: Date.now()
});
}
getViolations(filters = {}) {
let filtered = this.violations;
if (filters.sessionId) {
filtered = filtered.filter(v => v.sessionId === filters.sessionId);
}
if (filters.agentId) {
filtered = filtered.filter(v => v.agentId === filters.agentId);
}
if (filters.category) {
filtered = filtered.filter(v => v.category === filters.category);
}
return filtered;
}
clear() {
this.violations = [];
}
}
class MockEventStore {
constructor() {
this.events = [];
}
store(event) {
this.events.push({
...event,
id: this.events.length + 1,
storedAt: Date.now()
});
}
getEvents(filters = {}) {
let filtered = this.events;
if (filters.sessionId) {
filtered = filtered.filter(e => e.sessionId === filters.sessionId);
}
if (filters.agentId) {
filtered = filtered.filter(e => e.agentId === filters.agentId);
}
if (filters.eventType) {
filtered = filtered.filter(e => e.interactionType === filters.eventType);
}
return filtered;
}
clear() {
this.events = [];
}
}
class MockLLMProvider {
constructor(responses = []) {
this.responses = responses;
this.callCount = 0;
}
async generate(prompt) {
const response = this.responses[this.callCount % this.responses.length] ||
{ text: `Mock response ${this.callCount}`, usage: { tokens: 50 } };
this.callCount++;
return response;
}
getCallCount() {
return this.callCount;
}
}
module.exports = {
MockComplianceService,
MockEventStore,
MockLLMProvider
};
# .github/workflows/test.yml
name: Test Suite
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [16.x, 18.x, 20.x]
steps:
- uses: actions/checkout@v3
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v3
with:
node-version: ${{ matrix.node-version }}
cache: 'npm'
- name: Install dependencies
run: npm ci
- name: Run unit tests
run: npm run test:unit
env:
NODE_ENV: test
- name: Run integration tests
run: npm run test:integration
env:
NODE_ENV: test
TEST_AGENT_GOVERNANCE_API_KEY: ${{ secrets.TEST_API_KEY }}
TEST_AGENT_GOVERNANCE_ORG_ID: ${{ secrets.TEST_ORG_ID }}
- name: Run compliance tests
run: npm run test:compliance
env:
NODE_ENV: test
- name: Run performance tests
run: npm run test:performance
env:
NODE_ENV: test
- name: Generate coverage report
run: npm run test:coverage
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage/lcov.info
- name: Lint code
run: npm run lint
- name: Type check
run: npm run type-check
compliance-audit:
runs-on: ubuntu-latest
needs: test
steps:
- uses: actions/checkout@v3
- name: Use Node.js 18.x
uses: actions/setup-node@v3
with:
node-version: 18.x
cache: 'npm'
- name: Install dependencies
run: npm ci
- name: Run compliance audit
run: npm run audit:compliance
env:
NODE_ENV: test
- name: Generate compliance report
run: npm run report:compliance
- name: Upload compliance artifacts
uses: actions/upload-artifact@v3
with:
name: compliance-reports
path: reports/compliance/
// package.json test scripts
{
"scripts": {
"test": "jest",
"test:unit": "jest tests/unit",
"test:integration": "jest tests/integration",
"test:compliance": "jest tests/compliance",
"test:performance": "jest tests/performance --maxWorkers=1",
"test:coverage": "jest --coverage",
"test:watch": "jest --watch",
"test:ci": "jest --ci --coverage --watchAll=false",
"audit:compliance": "node scripts/compliance-audit.js",
"report:compliance": "node scripts/generate-compliance-report.js",
"lint": "eslint src tests",
"type-check": "tsc --noEmit"
},
"jest": {
"testEnvironment": "node",
"coverageDirectory": "coverage",
"collectCoverageFrom": [
"src/**/*.{js,ts}",
"!src/**/*.d.ts",
"!src/**/*.test.{js,ts}"
],
"coverageThreshold": {
"global": {
"branches": 80,
"functions": 80,
"lines": 80,
"statements": 80
}
},
"setupFilesAfterEnv": ["<rootDir>/tests/setup.js"],
"testTimeout": 30000
}
}
Test Organization
describe
blocksTest Data Management
Compliance Testing
Performance Testing
CI/CD Integration
Flaky Tests
Slow Test Execution
Test Environment Issues
Compliance Test Failures