490 lines
14 KiB
Plaintext
490 lines
14 KiB
Plaintext
![]() |
# Production Environment Configuration Template
|
||
|
# Copy this file to config.prod.toml for production-ready settings
|
||
|
#
|
||
|
# This template provides secure, performance-optimized settings for production:
|
||
|
# - Minimal logging to reduce overhead
|
||
|
# - Security-focused configurations
|
||
|
# - Production provider defaults
|
||
|
# - Optimized performance settings
|
||
|
# - Robust error handling and validation
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION CORE CONFIGURATION
|
||
|
# =============================================================================
|
||
|
|
||
|
[core]
|
||
|
version = "1.0.0"
|
||
|
name = "provisioning-system-prod"
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION PATHS
|
||
|
# =============================================================================
|
||
|
# Configured for production deployment standards
|
||
|
|
||
|
[paths]
|
||
|
# Production base path - typically system-wide installation
|
||
|
# Standard production locations:
|
||
|
# base = "/opt/provisioning" # Standard system location
|
||
|
# base = "/usr/local/provisioning" # Alternative system location
|
||
|
# base = "/app/provisioning" # Container deployment
|
||
|
# base = "/srv/provisioning" # Service directory
|
||
|
base = "/opt/provisioning"
|
||
|
|
||
|
# Production paths follow security best practices
|
||
|
# All paths inherit from base for consistency
|
||
|
kloud = "{{paths.base}}/infra"
|
||
|
providers = "{{paths.base}}/providers"
|
||
|
taskservs = "{{paths.base}}/taskservs"
|
||
|
clusters = "{{paths.base}}/cluster"
|
||
|
resources = "{{paths.base}}/resources"
|
||
|
templates = "{{paths.base}}/templates"
|
||
|
tools = "{{paths.base}}/tools"
|
||
|
core = "{{paths.base}}/core"
|
||
|
|
||
|
[paths.files]
|
||
|
# Production configuration files with secure defaults
|
||
|
settings = "{{paths.base}}/kcl/settings.k"
|
||
|
keys = "{{paths.base}}/keys/prod-keys.yaml"
|
||
|
requirements = "{{paths.base}}/requirements.yaml"
|
||
|
notify_icon = "{{paths.base}}/resources/icon.png"
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION SECURITY AND DEBUGGING
|
||
|
# =============================================================================
|
||
|
# Minimal debugging for security and performance
|
||
|
|
||
|
[debug]
|
||
|
# Disable debug mode in production for security
|
||
|
enabled = false
|
||
|
|
||
|
# Never show metadata in production logs
|
||
|
metadata = false
|
||
|
|
||
|
# Never enable check mode by default in production
|
||
|
check = false
|
||
|
|
||
|
# Disable remote debugging in production
|
||
|
remote = false
|
||
|
|
||
|
# Use warning level logging to capture only important events
|
||
|
# This reduces log volume while maintaining operational visibility
|
||
|
log_level = "warn"
|
||
|
|
||
|
# Ensure terminal features work properly in production
|
||
|
no_terminal = false
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION OUTPUT CONFIGURATION
|
||
|
# =============================================================================
|
||
|
|
||
|
[output]
|
||
|
# Use less for reliable paging in production environments
|
||
|
file_viewer = "less"
|
||
|
|
||
|
# YAML format for human-readable production output
|
||
|
format = "yaml"
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION SOPS CONFIGURATION
|
||
|
# =============================================================================
|
||
|
# Secure secrets management for production
|
||
|
|
||
|
[sops]
|
||
|
# Enable SOPS for production secret management
|
||
|
use_sops = true
|
||
|
|
||
|
# Production SOPS configuration with strict security
|
||
|
config_path = "{{paths.base}}/.sops.yaml"
|
||
|
|
||
|
# Secure key search paths for production
|
||
|
# Only search trusted, secure locations
|
||
|
key_search_paths = [
|
||
|
"/etc/sops/age/keys.txt",
|
||
|
"{{paths.base}}/keys/age.txt",
|
||
|
"/var/lib/provisioning/keys/age.txt"
|
||
|
]
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION RUNTIME CONFIGURATION
|
||
|
# =============================================================================
|
||
|
|
||
|
[taskservs]
|
||
|
# Production runtime directory with proper permissions
|
||
|
run_path = "/var/lib/provisioning/taskservs"
|
||
|
|
||
|
[clusters]
|
||
|
# Production cluster runtime with persistence
|
||
|
run_path = "/var/lib/provisioning/clusters"
|
||
|
|
||
|
[generation]
|
||
|
# Production generation directory
|
||
|
dir_path = "/var/lib/provisioning/generated"
|
||
|
defs_file = "prod-defs.toml"
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION PROVIDER CONFIGURATION
|
||
|
# =============================================================================
|
||
|
# Production-ready cloud provider settings
|
||
|
|
||
|
[providers]
|
||
|
# Default to AWS for production deployments
|
||
|
# Change to your primary production cloud provider
|
||
|
default = "aws"
|
||
|
|
||
|
# AWS Production Configuration
|
||
|
[providers.aws]
|
||
|
# Use default AWS endpoints for production
|
||
|
api_url = ""
|
||
|
# Use IAM roles/instance profiles for authentication
|
||
|
auth = ""
|
||
|
# Use CLI interface for production stability
|
||
|
interface = "CLI"
|
||
|
|
||
|
# UpCloud Production Configuration
|
||
|
[providers.upcloud]
|
||
|
# Standard UpCloud API endpoint
|
||
|
api_url = "https://api.upcloud.com/1.3"
|
||
|
# Use API keys stored in environment/SOPS
|
||
|
auth = ""
|
||
|
# Use CLI interface for production
|
||
|
interface = "CLI"
|
||
|
|
||
|
# Local Provider (disabled in production)
|
||
|
[providers.local]
|
||
|
# Not typically used in production
|
||
|
api_url = ""
|
||
|
auth = ""
|
||
|
interface = "CLI"
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION ENVIRONMENT SETTINGS
|
||
|
# =============================================================================
|
||
|
|
||
|
# Production environment defaults
|
||
|
[environments.prod]
|
||
|
debug.enabled = false
|
||
|
debug.log_level = "warn"
|
||
|
debug.metadata = false
|
||
|
debug.check = false
|
||
|
debug.remote = false
|
||
|
providers.default = "aws"
|
||
|
output.format = "yaml"
|
||
|
output.file_viewer = "less"
|
||
|
|
||
|
# Development override (if needed for production debugging)
|
||
|
[environments.dev]
|
||
|
debug.enabled = true
|
||
|
debug.log_level = "info"
|
||
|
debug.check = true
|
||
|
providers.default = "local"
|
||
|
output.format = "json"
|
||
|
|
||
|
# Testing environment for production validation
|
||
|
[environments.test]
|
||
|
debug.enabled = false
|
||
|
debug.log_level = "info"
|
||
|
debug.check = true
|
||
|
providers.default = "aws"
|
||
|
output.format = "yaml"
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION PERFORMANCE OPTIMIZATION
|
||
|
# =============================================================================
|
||
|
|
||
|
# Performance settings optimized for production workloads
|
||
|
[performance]
|
||
|
# Higher parallelism for production efficiency
|
||
|
parallel_operations = 8
|
||
|
# Longer timeouts for production reliability
|
||
|
timeout_seconds = 600
|
||
|
# Enable caching for better performance
|
||
|
cache_enabled = true
|
||
|
# Production cache directory
|
||
|
cache_dir = "/var/cache/provisioning"
|
||
|
# Cache retention for production
|
||
|
cache_retention_hours = 24
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION SECURITY CONFIGURATION
|
||
|
# =============================================================================
|
||
|
|
||
|
# Security settings for production environment
|
||
|
[security]
|
||
|
# Always require confirmation for destructive operations
|
||
|
require_confirmation = true
|
||
|
# Never log sensitive data in production
|
||
|
log_sensitive_data = false
|
||
|
# Enable strict validation in production
|
||
|
strict_validation = true
|
||
|
# Production backup settings
|
||
|
auto_backup = true
|
||
|
backup_dir = "/var/backups/provisioning"
|
||
|
# Backup retention policy
|
||
|
backup_retention_days = 30
|
||
|
# Encrypt backups in production
|
||
|
backup_encryption = true
|
||
|
# Audit logging for production
|
||
|
audit_enabled = true
|
||
|
audit_log_path = "/var/log/provisioning/audit.log"
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION MONITORING AND ALERTING
|
||
|
# =============================================================================
|
||
|
|
||
|
# Production monitoring configuration
|
||
|
[monitoring]
|
||
|
# Enable comprehensive monitoring
|
||
|
enabled = true
|
||
|
# Production metrics endpoint
|
||
|
endpoint = "https://metrics.example.com/provisioning"
|
||
|
# Monitoring interval
|
||
|
interval = "60s"
|
||
|
# Health check configuration
|
||
|
health_check_enabled = true
|
||
|
health_check_port = 8080
|
||
|
# Log aggregation for production
|
||
|
log_endpoint = "https://logs.example.com/provisioning"
|
||
|
|
||
|
# Production alerting
|
||
|
[alerting]
|
||
|
# Enable production alerting
|
||
|
enabled = true
|
||
|
# Alert channels
|
||
|
email_enabled = true
|
||
|
email_recipients = ["ops@example.com", "devops@example.com"]
|
||
|
slack_enabled = true
|
||
|
slack_webhook = "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK"
|
||
|
# PagerDuty integration
|
||
|
pagerduty_enabled = true
|
||
|
pagerduty_key = "SOPS_ENCRYPTED_KEY"
|
||
|
# Alert thresholds
|
||
|
error_threshold = 5
|
||
|
warning_threshold = 10
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION BACKUP AND DISASTER RECOVERY
|
||
|
# =============================================================================
|
||
|
|
||
|
# Production backup configuration
|
||
|
[backup]
|
||
|
# Enable automated backups
|
||
|
enabled = true
|
||
|
# Backup schedule (production frequency)
|
||
|
schedule = "0 2 * * *" # Daily at 2 AM
|
||
|
# Backup retention policy
|
||
|
retention_days = 90
|
||
|
# Backup storage location
|
||
|
location = "/var/backups/provisioning"
|
||
|
# Remote backup storage
|
||
|
remote_enabled = true
|
||
|
remote_location = "s3://company-backups/provisioning/"
|
||
|
# Backup encryption
|
||
|
encryption_enabled = true
|
||
|
# Backup verification
|
||
|
verification_enabled = true
|
||
|
|
||
|
# Disaster recovery settings
|
||
|
[disaster_recovery]
|
||
|
# Enable DR procedures
|
||
|
enabled = true
|
||
|
# DR site configuration
|
||
|
dr_site = "us-west-2"
|
||
|
# RTO and RPO targets
|
||
|
rto_minutes = 60
|
||
|
rpo_minutes = 15
|
||
|
# DR testing schedule
|
||
|
test_schedule = "0 3 1 * *" # Monthly DR testing
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION COMPLIANCE AND GOVERNANCE
|
||
|
# =============================================================================
|
||
|
|
||
|
# Compliance settings for production
|
||
|
[compliance]
|
||
|
# Enable compliance monitoring
|
||
|
enabled = true
|
||
|
# Compliance frameworks
|
||
|
frameworks = ["SOC2", "PCI-DSS", "GDPR"]
|
||
|
# Compliance reporting
|
||
|
reporting_enabled = true
|
||
|
report_frequency = "monthly"
|
||
|
# Data retention policies
|
||
|
data_retention_days = 2555 # 7 years
|
||
|
# Encryption requirements
|
||
|
encryption_at_rest = true
|
||
|
encryption_in_transit = true
|
||
|
|
||
|
# Governance settings
|
||
|
[governance]
|
||
|
# Change management
|
||
|
change_approval_required = true
|
||
|
# Configuration drift detection
|
||
|
drift_detection_enabled = true
|
||
|
drift_check_interval = "24h"
|
||
|
# Policy enforcement
|
||
|
policy_enforcement_enabled = true
|
||
|
# Resource tagging requirements
|
||
|
required_tags = ["Environment", "Owner", "Project", "CostCenter"]
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION INTEGRATION SETTINGS
|
||
|
# =============================================================================
|
||
|
|
||
|
# CI/CD integration for production
|
||
|
[cicd]
|
||
|
# Enable CI/CD integration
|
||
|
enabled = true
|
||
|
# Pipeline triggers
|
||
|
trigger_on_config_change = true
|
||
|
# Deployment gates
|
||
|
require_approval = true
|
||
|
# Automated testing
|
||
|
run_tests = true
|
||
|
test_timeout = 1800
|
||
|
# Rollback capability
|
||
|
auto_rollback_enabled = true
|
||
|
|
||
|
# ITSM integration
|
||
|
[itsm]
|
||
|
# ServiceNow integration
|
||
|
servicenow_enabled = true
|
||
|
servicenow_instance = "https://company.service-now.com"
|
||
|
# Change request automation
|
||
|
auto_create_change_requests = true
|
||
|
# Incident management
|
||
|
auto_create_incidents = true
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION RESOURCE MANAGEMENT
|
||
|
# =============================================================================
|
||
|
|
||
|
# Resource quotas and limits for production
|
||
|
[resources]
|
||
|
# CPU limits
|
||
|
max_cpu_cores = 32
|
||
|
# Memory limits
|
||
|
max_memory_gb = 128
|
||
|
# Storage limits
|
||
|
max_storage_gb = 1000
|
||
|
# Network limits
|
||
|
max_bandwidth_mbps = 1000
|
||
|
# Instance limits
|
||
|
max_instances = 100
|
||
|
|
||
|
# Cost management
|
||
|
[cost_management]
|
||
|
# Enable cost tracking
|
||
|
enabled = true
|
||
|
# Budget alerts
|
||
|
budget_alerts_enabled = true
|
||
|
monthly_budget_limit = 10000
|
||
|
# Cost optimization
|
||
|
auto_optimize = false
|
||
|
optimization_schedule = "0 4 * * 0" # Weekly optimization review
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION OPERATIONAL PROCEDURES
|
||
|
# =============================================================================
|
||
|
|
||
|
# Maintenance windows
|
||
|
[maintenance]
|
||
|
# Scheduled maintenance
|
||
|
enabled = true
|
||
|
# Maintenance window schedule
|
||
|
schedule = "0 3 * * 0" # Sunday 3 AM
|
||
|
# Maintenance duration
|
||
|
duration_hours = 4
|
||
|
# Notification before maintenance
|
||
|
notification_hours = 24
|
||
|
|
||
|
# Incident response
|
||
|
[incident_response]
|
||
|
# Enable automated incident response
|
||
|
enabled = true
|
||
|
# Response team notifications
|
||
|
primary_contact = "ops@example.com"
|
||
|
escalation_contact = "management@example.com"
|
||
|
# Response time targets
|
||
|
response_time_minutes = 15
|
||
|
resolution_time_hours = 4
|
||
|
|
||
|
# =============================================================================
|
||
|
# PRODUCTION USAGE GUIDELINES
|
||
|
# =============================================================================
|
||
|
#
|
||
|
# Production Deployment Checklist:
|
||
|
# --------------------------------
|
||
|
#
|
||
|
# 1. Security Review:
|
||
|
# □ SOPS keys properly secured
|
||
|
# □ IAM roles configured with least privilege
|
||
|
# □ Network security groups configured
|
||
|
# □ Audit logging enabled
|
||
|
#
|
||
|
# 2. Performance Validation:
|
||
|
# □ Resource quotas set appropriately
|
||
|
# □ Monitoring and alerting configured
|
||
|
# □ Backup and DR procedures tested
|
||
|
# □ Load testing completed
|
||
|
#
|
||
|
# 3. Compliance Verification:
|
||
|
# □ Required tags applied to all resources
|
||
|
# □ Data encryption enabled
|
||
|
# □ Compliance frameworks configured
|
||
|
# □ Change management processes in place
|
||
|
#
|
||
|
# 4. Operational Readiness:
|
||
|
# □ Runbooks created and tested
|
||
|
# □ On-call procedures established
|
||
|
# □ Incident response tested
|
||
|
# □ Documentation updated
|
||
|
#
|
||
|
# Production Operations Commands:
|
||
|
# ------------------------------
|
||
|
#
|
||
|
# 1. Health Check:
|
||
|
# ./core/nulib/provisioning validate config --strict
|
||
|
#
|
||
|
# 2. Deploy Infrastructure:
|
||
|
# ./core/nulib/provisioning server create --infra production
|
||
|
#
|
||
|
# 3. Monitor Operations:
|
||
|
# ./core/nulib/provisioning show servers --infra production --out yaml
|
||
|
#
|
||
|
# 4. Backup Configuration:
|
||
|
# ./core/nulib/provisioning backup create --infra production
|
||
|
#
|
||
|
# 5. Emergency Procedures:
|
||
|
# ./core/nulib/provisioning cluster delete --infra production --emergency
|
||
|
#
|
||
|
# =============================================================================
|
||
|
# PRODUCTION TROUBLESHOOTING
|
||
|
# =============================================================================
|
||
|
#
|
||
|
# Common Production Issues:
|
||
|
# ------------------------
|
||
|
#
|
||
|
# 1. Authentication Failures:
|
||
|
# - Check IAM roles and policies
|
||
|
# - Verify SOPS key access
|
||
|
# - Validate provider credentials
|
||
|
#
|
||
|
# 2. Performance Issues:
|
||
|
# - Review parallel_operations setting
|
||
|
# - Check timeout_seconds values
|
||
|
# - Monitor resource utilization
|
||
|
#
|
||
|
# 3. Security Alerts:
|
||
|
# - Review audit logs
|
||
|
# - Check compliance status
|
||
|
# - Validate encryption settings
|
||
|
#
|
||
|
# 4. Backup Failures:
|
||
|
# - Verify backup storage access
|
||
|
# - Check retention policies
|
||
|
# - Test recovery procedures
|
||
|
#
|
||
|
# 5. Monitoring Gaps:
|
||
|
# - Validate monitoring endpoints
|
||
|
# - Check alert configurations
|
||
|
# - Test notification channels
|