Skip to main content

Guides & Best Practices

This guide provides practical advice for optimizing your Cloakr.ai implementation for cost, performance, and scalability.

Cost Optimization

Model Selection Strategy

Choose the right model for your use case to optimize costs:

// High-priority tasks: Use premium models
const premiumResponse = await client.chat({
model: 'gpt-4o',
prompt: 'Critical business analysis',
maxTokens: 2000
});

// Medium-priority tasks: Use balanced models
const balancedResponse = await client.chat({
model: 'gpt-4o-mini',
prompt: 'General content generation',
maxTokens: 1000
});

// Low-priority tasks: Use cost-effective models
const costEffectiveResponse = await client.chat({
model: 'gpt-3.5-turbo',
prompt: 'Simple text processing',
maxTokens: 500
});

Token Management

Optimize token usage to reduce costs:

// Set appropriate max_tokens
const response = await client.chat({
model: 'gpt-4o',
prompt: 'Summarize this document',
maxTokens: 200, // Limit response length
temperature: 0.3 // More focused responses
});

// Use streaming for long responses
const stream = await client.chat({
model: 'gpt-4o',
prompt: 'Write a detailed report',
stream: true,
maxTokens: 2000
});

// Process stream to control costs
let totalTokens = 0;
for await (const chunk of stream) {
if (totalTokens > 1000) {
// Stop processing to control costs
break;
}
totalTokens += chunk.usage?.completionTokens || 0;
process.stdout.write(chunk.choices[0].delta?.text || '');
}

Caching Strategy

Implement intelligent caching to reduce API calls:

import { CloakrClient, CacheManager } from '@cloakrai/sdk';

const cache = new CacheManager({
ttl: 3600, // 1 hour
maxSize: 1000,
strategy: 'lru'
});

const client = new CloakrClient({
apiKey: process.env.CLOAKR_API_KEY,
cache: cache
});

// Cache key based on request parameters
function getCacheKey(model, prompt, maxTokens) {
const content = `${model}:${prompt}:${maxTokens}`;
return `chat:${hash(content)}`;
}

// Check cache before making API call
async function getCachedResponse(model, prompt, maxTokens) {
const cacheKey = getCacheKey(model, prompt, maxTokens);

// Check cache first
const cached = await cache.get(cacheKey);
if (cached) {
return cached;
}

// Make API call if not cached
const response = await client.chat({
model,
prompt,
maxTokens
});

// Cache the response
await cache.set(cacheKey, response);

return response;
}

Batch Processing

Process multiple requests efficiently:

async function batchProcess(prompts, batchSize = 5) {
const results = [];

for (let i = 0; i < prompts.length; i += batchSize) {
const batch = prompts.slice(i, i + batchSize);

// Process batch in parallel
const batchPromises = batch.map(prompt =>
client.chat({
model: 'gpt-4o-mini', // Use cost-effective model
prompt,
maxTokens: 500
})
);

const batchResults = await Promise.all(batchPromises);
results.push(...batchResults);

// Rate limiting between batches
await new Promise(resolve => setTimeout(resolve, 1000));
}

return results;
}

Performance Tuning

Connection Optimization

Optimize network connections for better performance:

import { CloakrClient, ConnectionPool } from '@cloakrai/sdk';

const pool = new ConnectionPool({
maxConnections: 20,
maxIdleTime: 30000,
connectionTimeout: 5000,
keepAlive: true
});

const client = new CloakrClient({
apiKey: process.env.CLOAKR_API_KEY,
connectionPool: pool,
timeout: 30000
});

Parallel Processing

Use parallel processing for better throughput:

async function parallelProcessing(tasks) {
const concurrency = 10; // Limit concurrent requests
const semaphore = new Semaphore(concurrency);

const results = await Promise.all(
tasks.map(async (task) => {
await semaphore.acquire();
try {
return await client.chat({
model: 'gpt-4o',
prompt: task.prompt,
maxTokens: task.maxTokens
});
} finally {
semaphore.release();
}
})
);

return results;
}

// Simple semaphore implementation
class Semaphore {
constructor(max) {
this.max = max;
this.current = 0;
this.queue = [];
}

async acquire() {
if (this.current < this.max) {
this.current++;
return Promise.resolve();
}

return new Promise(resolve => {
this.queue.push(resolve);
});
}

release() {
this.current--;
if (this.queue.length > 0) {
this.current++;
const resolve = this.queue.shift();
resolve();
}
}
}

Response Time Optimization

Optimize for faster response times:

// Use streaming for immediate feedback
async function streamingResponse(prompt) {
const stream = await client.chat({
model: 'gpt-4o',
prompt,
stream: true
});

let response = '';
for await (const chunk of stream) {
const text = chunk.choices[0].delta?.text || '';
response += text;

// Process partial responses
if (text.includes('.') || text.includes('\n')) {
await processPartialResponse(response);
}
}

return response;
}

// Use smaller models for faster responses
async function fastResponse(prompt) {
return await client.chat({
model: 'gpt-3.5-turbo', // Faster than GPT-4
prompt,
maxTokens: 300,
temperature: 0.3 // More deterministic
});
}

Memory Management

Optimize memory usage for large-scale applications:

// Implement request cleanup
class RequestManager {
constructor() {
this.activeRequests = new Map();
this.maxRequests = 100;
}

async processRequest(id, request) {
if (this.activeRequests.size >= this.maxRequests) {
// Clean up oldest requests
const oldestKey = this.activeRequests.keys().next().value;
this.activeRequests.delete(oldestKey);
}

const promise = client.chat(request);
this.activeRequests.set(id, promise);

try {
const result = await promise;
return result;
} finally {
this.activeRequests.delete(id);
}
}

cleanup() {
this.activeRequests.clear();
}
}

Scaling Strategies

Horizontal Scaling

Scale your application horizontally:

// Load balancer configuration
const loadBalancer = {
endpoints: [
'https://api.cloakr.ai/v1',
'https://api2.cloakr.ai/v1',
'https://api3.cloakr.ai/v1'
],
strategy: 'round-robin',
healthCheck: true
};

// Client with load balancing
const client = new CloakrClient({
apiKey: process.env.CLOAKR_API_KEY,
loadBalancer: loadBalancer
});

Database Optimization

Optimize database operations for embeddings and logs:

// Vector database optimization
const vectorConfig = {
indexType: 'hnsw', // Hierarchical Navigable Small World
dimensions: 1536,
metric: 'cosine',
efConstruction: 200,
efSearch: 100
};

// Batch vector operations
async function batchVectorOperations(embeddings) {
const batchSize = 100;
const results = [];

for (let i = 0; i < embeddings.length; i += batchSize) {
const batch = embeddings.slice(i, i + batchSize);
const batchResult = await vectorDB.batchUpsert(batch);
results.push(...batchResult);
}

return results;
}

Kubernetes Deployment

Deploy with Kubernetes for scalability:

# deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: cloakr-app
spec:
replicas: 3
selector:
matchLabels:
app: cloakr-app
template:
metadata:
labels:
app: cloakr-app
spec:
containers:
- name: cloakr-app
image: your-app:latest
env:
- name: CLOAKR_API_KEY
valueFrom:
secretKeyRef:
name: cloakr-secrets
key: api-key
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 5

Monitoring and Observability

Metrics Collection

Implement comprehensive metrics:

import { CloakrClient, MetricsCollector } from '@cloakrai/sdk';

class CustomMetricsCollector extends MetricsCollector {
constructor() {
super();
this.metrics = new Map();
}

increment(metric, value = 1) {
const current = this.metrics.get(metric) || 0;
this.metrics.set(metric, current + value);
}

timing(metric, duration) {
const key = `${metric}_duration`;
const current = this.metrics.get(key) || [];
current.push(duration);
this.metrics.set(key, current);
}

getMetrics() {
return Object.fromEntries(this.metrics);
}
}

const metrics = new CustomMetricsCollector();
const client = new CloakrClient({
apiKey: process.env.CLOAKR_API_KEY,
metrics: metrics
});

// Custom event handlers
client.on('request', (data) => {
metrics.increment('requests.total');
metrics.timing('requests.duration', data.duration);
});

client.on('error', (error) => {
metrics.increment('errors.total');
metrics.increment(`errors.${error.code}`);
});

Logging Strategy

Implement structured logging:

import winston from 'winston';

const logger = winston.createLogger({
level: 'info',
format: winston.format.combine(
winston.format.timestamp(),
winston.format.json()
),
transports: [
new winston.transports.File({ filename: 'error.log', level: 'error' }),
new winston.transports.File({ filename: 'combined.log' })
]
});

// Structured logging
client.on('request', (data) => {
logger.info('Cloakr request', {
model: data.model,
tokenCount: data.tokenCount,
duration: data.duration,
timestamp: new Date().toISOString()
});
});

client.on('error', (error) => {
logger.error('Cloakr error', {
code: error.code,
message: error.message,
timestamp: new Date().toISOString()
});
});

Alerting

Set up intelligent alerting:

class AlertManager {
constructor() {
this.thresholds = {
errorRate: 0.05, // 5% error rate
responseTime: 5000, // 5 seconds
costPerHour: 100 // $100 per hour
};
}

async checkMetrics(metrics) {
const alerts = [];

// Check error rate
const errorRate = metrics['errors.total'] / metrics['requests.total'];
if (errorRate > this.thresholds.errorRate) {
alerts.push({
type: 'error_rate_high',
message: `Error rate ${(errorRate * 100).toFixed(2)}% exceeds threshold`,
severity: 'high'
});
}

// Check response time
const avgResponseTime = metrics['requests.duration'].reduce((a, b) => a + b, 0) / metrics['requests.duration'].length;
if (avgResponseTime > this.thresholds.responseTime) {
alerts.push({
type: 'response_time_high',
message: `Average response time ${avgResponseTime}ms exceeds threshold`,
severity: 'medium'
});
}

return alerts;
}

async sendAlert(alert) {
// Send to your alerting system (Slack, PagerDuty, etc.)
console.log('Alert:', alert);
}
}

Best Practices

Error Handling

Implement robust error handling:

class RobustClient {
constructor(apiKey) {
this.client = new CloakrClient({ apiKey });
this.retryAttempts = 3;
this.retryDelay = 1000;
}

async chat(request) {
for (let attempt = 1; attempt <= this.retryAttempts; attempt++) {
try {
return await this.client.chat(request);
} catch (error) {
if (attempt === this.retryAttempts) {
throw error;
}

if (error.code === 'rate_limit_exceeded') {
const delay = error.retryAfter * 1000;
await new Promise(resolve => setTimeout(resolve, delay));
} else {
const delay = this.retryDelay * Math.pow(2, attempt - 1);
await new Promise(resolve => setTimeout(resolve, delay));
}
}
}
}
}

Security Best Practices

Implement security best practices:

// API key rotation
class SecureClient {
constructor() {
this.apiKeys = [
process.env.CLOAKR_API_KEY_1,
process.env.CLOAKR_API_KEY_2,
process.env.CLOAKR_API_KEY_3
];
this.currentKeyIndex = 0;
}

getCurrentApiKey() {
return this.apiKeys[this.currentKeyIndex];
}

rotateApiKey() {
this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
}

async chat(request) {
const client = new CloakrClient({
apiKey: this.getCurrentApiKey()
});

try {
return await client.chat(request);
} catch (error) {
if (error.code === 'invalid_api_key') {
this.rotateApiKey();
return await this.chat(request);
}
throw error;
}
}
}

Testing Strategy

Implement comprehensive testing:

// Unit tests
describe('Cloakr Integration', () => {
let client;

beforeEach(() => {
client = new CloakrClient({
apiKey: process.env.CLOAKR_TEST_API_KEY
});
});

test('should handle successful requests', async () => {
const response = await client.chat({
model: 'gpt-4o',
prompt: 'Hello world',
maxTokens: 10
});

expect(response.choices[0].text).toBeTruthy();
});

test('should handle rate limiting', async () => {
const requests = Array(100).fill().map(() =>
client.chat({
model: 'gpt-4o',
prompt: 'Test',
maxTokens: 10
})
);

await expect(Promise.all(requests)).rejects.toThrow();
});
});

Next Steps