# Observability Collection Scripts for Nushell Infrastructure # Secure collection of system metrics, logs, and telemetry data # Collect comprehensive system metrics export def collect-system-metrics []: nothing -> record { let timestamp = (date now) let base_metrics = { timestamp: ($timestamp | format date "%Y-%m-%d %H:%M:%S") hostname: ($env.HOSTNAME? | default "unknown") collection_version: "1.0.0" } # CPU metrics let cpu_metrics = try { let cpu_info = (cat /proc/cpuinfo | lines | where $it =~ "processor|model name|cpu MHz" | parse "{key}: {value}") let cpu_count = ($cpu_info | where key == "processor" | length) let cpu_model = ($cpu_info | where key =~ "model name" | first | get value) # Load average let loadavg = (cat /proc/loadavg | split row " ") { cores: $cpu_count model: $cpu_model load_1m: ($loadavg | get 0 | into float) load_5m: ($loadavg | get 1 | into float) load_15m: ($loadavg | get 2 | into float) } } catch { {error: "Failed to collect CPU metrics"} } # Memory metrics try { let meminfo = (cat /proc/meminfo | lines | parse "{key}: {value} kB") let total_mem = ($meminfo | where key == "MemTotal" | first | get value | into int) let free_mem = ($meminfo | where key == "MemFree" | first | get value | into int) let available_mem = ($meminfo | where key == "MemAvailable" | first | get value | into int) let buffers = ($meminfo | where key == "Buffers" | first | get value | into int) let cached = ($meminfo | where key == "Cached" | first | get value | into int) $metrics = ($metrics | insert memory { total_kb: $total_mem free_kb: $free_mem available_kb: $available_mem buffers_kb: $buffers cached_kb: $cached used_kb: ($total_mem - $free_mem) usage_percent: (($total_mem - $free_mem) / $total_mem * 100 | math round --precision 2) }) } catch { $metrics = ($metrics | insert memory {error: "Failed to collect memory metrics"}) } # Disk metrics try { let disk_usage = (df -k | lines | skip 1 | parse "{filesystem} {total} {used} {available} {percent} {mount}") $metrics = ($metrics | insert disk ($disk_usage | select filesystem total used available percent mount)) } catch { $metrics = ($metrics | insert disk {error: "Failed to collect disk metrics"}) } # Network metrics (basic) try { let network_stats = (cat /proc/net/dev | lines | skip 2 | parse "{interface}: {rx_bytes} {rx_packets} {rx_errs} {rx_drop} {rx_fifo} {rx_frame} {rx_compressed} {rx_multicast} {tx_bytes} {tx_packets} {tx_errs} {tx_drop} {tx_fifo} {tx_colls} {tx_carrier} {tx_compressed}") $metrics = ($metrics | insert network ($network_stats | select interface rx_bytes tx_bytes rx_packets tx_packets)) } catch { $metrics = ($metrics | insert network {error: "Failed to collect network metrics"}) } # Process count try { let process_count = (ls /proc | where name =~ "^[0-9]+$" | length) $metrics = ($metrics | insert processes { total: $process_count }) } catch { $metrics = ($metrics | insert processes {error: "Failed to collect process metrics"}) } return $metrics } # Collect container metrics (if running in containerized environment) export def collect-container-metrics []: nothing -> record { let timestamp = (date now) mut metrics = { timestamp: ($timestamp | format date "%Y-%m-%d %H:%M:%S") container_runtime: "unknown" } # Check for Docker try { if (which docker | is-not-empty) { let containers = (docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Image}}" | lines | skip 1) $metrics = ($metrics | insert docker { available: true containers: ($containers | length) running: ($containers | where $it =~ "Up" | length) }) $metrics = ($metrics | insert container_runtime "docker") } } catch {} # Check for Podman try { if (which podman | is-not-empty) { let containers = (podman ps --format "table {{.Names}}\t{{.Status}}\t{{.Image}}" | lines | skip 1) $metrics = ($metrics | insert podman { available: true containers: ($containers | length) running: ($containers | where $it =~ "Up" | length) }) if ($metrics.container_runtime == "unknown") { $metrics = ($metrics | insert container_runtime "podman") } } } catch {} # Check for Kubernetes try { if (which kubectl | is-not-empty) { let pods = (kubectl get pods --all-namespaces --no-headers | lines) $metrics = ($metrics | insert kubernetes { available: true pods_total: ($pods | length) pods_running: ($pods | where $it =~ "Running" | length) pods_pending: ($pods | where $it =~ "Pending" | length) pods_failed: ($pods | where $it =~ "Failed" | length) }) } } catch {} return $metrics } # Collect application logs with filtering export def collect-logs [ --service(-s): string # Specific service to collect logs from --since: string = "1h" # Time range (1h, 30m, etc.) --level: string = "error" # Log level filter --lines(-l): int = 100 # Maximum lines to collect ]: nothing -> list { mut logs = [] # Systemd journal logs try { mut journalctl_cmd = ["journalctl", "--output=json", "--no-pager", $"--since=($since)"] if ($service | is-not-empty) { $journalctl_cmd = ($journalctl_cmd | append ["-u", $service]) } if (($level | is-not-empty) and ($level != "all")) { $journalctl_cmd = ($journalctl_cmd | append ["-p", $level]) } if ($lines | is-not-empty) { $journalctl_cmd = ($journalctl_cmd | append ["-n", ($lines | into string)]) } let journal_logs = (^$journalctl_cmd.0 ...$journalctl_cmd.1 | lines | where $it != "" | each { |line| $line | from json }) $logs = ($logs | append $journal_logs) } catch {} # Container logs (Docker) try { if (which docker | is-not-empty and ($service | is-not-empty)) { let container_logs = (docker logs --since $since --tail $lines $service 2>/dev/null | lines | enumerate | each { |item| { timestamp: (date now | format date "%Y-%m-%d %H:%M:%S") source: "docker" container: $service message: $item.item line_number: $item.index } }) $logs = ($logs | append $container_logs) } } catch {} # File-based logs (common locations) let log_files = [ "/var/log/syslog" "/var/log/messages" "/var/log/kern.log" "/var/log/auth.log" ] for log_file in $log_files { try { if ($log_file | path exists) { let file_logs = (tail -n $lines $log_file | lines | enumerate | each { |item| { timestamp: (date now | format date "%Y-%m-%d %H:%M:%S") source: "file" file: $log_file message: $item.item line_number: $item.index } }) $logs = ($logs | append $file_logs) } } catch {} } return ($logs | first $lines) } # Process and analyze log patterns export def analyze-logs [logs: list]: nothing -> record { let total_logs = ($logs | length) if $total_logs == 0 { return { total: 0 analysis: "No logs to analyze" } } # Error pattern analysis let error_patterns = ["error", "failed", "exception", "critical", "fatal"] mut error_counts = {} for pattern in $error_patterns { let count = ($logs | where message =~ $"(?i)($pattern)" | length) $error_counts = ($error_counts | insert $pattern $count) } # Source distribution let source_dist = ($logs | group-by source | transpose key value | each { |item| {source: $item.key, count: ($item.value | length)} }) # Time-based analysis (last hour) let recent_logs = ($logs | where timestamp > ((date now) - 1hr)) return { total: $total_logs recent_count: ($recent_logs | length) error_patterns: $error_counts source_distribution: $source_dist analysis_timestamp: (date now | format date "%Y-%m-%d %H:%M:%S") } } # Export metrics in various formats export def export-metrics [ metrics: record --format(-f): string = "json" # json, yaml, csv --output(-o): string # Output file path ]: nothing -> any { let formatted_data = match $format { "yaml" => ($metrics | to yaml) "csv" => { # Flatten metrics for CSV export let flattened = ($metrics | flatten | transpose key value) $flattened | to csv } _ => ($metrics | to json) } if ($output | is-not-empty) { $formatted_data | save $output print $"Metrics exported to ($output)" } else { $formatted_data } } # Health monitoring function export def health-monitor [ --interval(-i): int = 60 # Collection interval in seconds --duration(-d): int = 300 # Total monitoring duration in seconds --output(-o): string # Output file for continuous monitoring ]: nothing -> nothing { let start_time = (date now) let end_time = ($start_time + ($duration * 1sec)) print $"🔍 Starting health monitoring for ($duration) seconds with ($interval)s intervals" print $"📊 Collecting system and container metrics" while (date now) < $end_time { let current_time = (date now) let system_metrics = (collect-system-metrics) let container_metrics = (collect-container-metrics) let combined_metrics = { collection_time: ($current_time | format date "%Y-%m-%d %H:%M:%S") system: $system_metrics containers: $container_metrics } if ($output | is-not-empty) { $combined_metrics | to json | save -a $output } else { print $"⏰ ($current_time | format date "%H:%M:%S") - CPU: ($system_metrics.cpu.load_1m?)% | Memory: ($system_metrics.memory.usage_percent?)%" } sleep ($interval * 1sec) } print "✅ Health monitoring completed" } # Quick system status check export def status-check []: nothing -> record { let system = (collect-system-metrics) let containers = (collect-container-metrics) # Determine overall health mut health_status = "healthy" mut alerts = [] # CPU load check if (($system.cpu.load_1m? | default 0) > 4.0) { $health_status = "warning" $alerts = ($alerts | append "High CPU load") } # Memory usage check if (($system.memory.usage_percent? | default 0) > 90) { $health_status = "critical" $alerts = ($alerts | append "High memory usage") } # Disk usage check try { let high_disk = ($system.disk | where {|x| ($x.percent | str replace "%" "" | into float) > 90}) if ($high_disk | length) > 0 { $health_status = "warning" $alerts = ($alerts | append "High disk usage") } } catch {} return { status: $health_status alerts: $alerts metrics: { system: $system containers: $containers } timestamp: (date now | format date "%Y-%m-%d %H:%M:%S") } }