feat(taskserv): implement real-time version checking with configurable HTTP client

- Add: GitHub API integration for live version checking in taskserv management
- Add: HTTP client configuration option (http.use_curl) in config.defaults.toml
- Add: Helper function fetch_latest_version with curl/http get support
- Fix: Settings path structure for prov_data_dirpath access pattern
- Remove: Legacy simulation code for version checking
- Update: Core configuration name from "provisioning-system" to "provisioning"
- Clean: Remove obsolete example configs and infrastructure files
This commit is contained in:
Jesús Pérez 2025-09-24 01:55:06 +01:00
parent 38a7470da0
commit 3c3ef47f7f
No known key found for this signature in database
GPG key ID: 9F243E355E0BC939
34 changed files with 5942 additions and 13 deletions

View file

@ -0,0 +1,347 @@
# Observability Collection Scripts for Nushell Infrastructure
# Secure collection of system metrics, logs, and telemetry data
# Collect comprehensive system metrics
export def collect-system-metrics []: nothing -> record {
let timestamp = (date now)
let base_metrics = {
timestamp: ($timestamp | format date "%Y-%m-%d %H:%M:%S")
hostname: ($env.HOSTNAME? | default "unknown")
collection_version: "1.0.0"
}
# CPU metrics
let cpu_metrics = try {
let cpu_info = (cat /proc/cpuinfo | lines | where $it =~ "processor|model name|cpu MHz" | parse "{key}: {value}")
let cpu_count = ($cpu_info | where key == "processor" | length)
let cpu_model = ($cpu_info | where key =~ "model name" | first | get value)
# Load average
let loadavg = (cat /proc/loadavg | split row " ")
{
cores: $cpu_count
model: $cpu_model
load_1m: ($loadavg | get 0 | into float)
load_5m: ($loadavg | get 1 | into float)
load_15m: ($loadavg | get 2 | into float)
}
} catch {
{error: "Failed to collect CPU metrics"}
}
# Memory metrics
try {
let meminfo = (cat /proc/meminfo | lines | parse "{key}: {value} kB")
let total_mem = ($meminfo | where key == "MemTotal" | first | get value | into int)
let free_mem = ($meminfo | where key == "MemFree" | first | get value | into int)
let available_mem = ($meminfo | where key == "MemAvailable" | first | get value | into int)
let buffers = ($meminfo | where key == "Buffers" | first | get value | into int)
let cached = ($meminfo | where key == "Cached" | first | get value | into int)
$metrics = ($metrics | insert memory {
total_kb: $total_mem
free_kb: $free_mem
available_kb: $available_mem
buffers_kb: $buffers
cached_kb: $cached
used_kb: ($total_mem - $free_mem)
usage_percent: (($total_mem - $free_mem) / $total_mem * 100 | math round --precision 2)
})
} catch {
$metrics = ($metrics | insert memory {error: "Failed to collect memory metrics"})
}
# Disk metrics
try {
let disk_usage = (df -k | lines | skip 1 | parse "{filesystem} {total} {used} {available} {percent} {mount}")
$metrics = ($metrics | insert disk ($disk_usage | select filesystem total used available percent mount))
} catch {
$metrics = ($metrics | insert disk {error: "Failed to collect disk metrics"})
}
# Network metrics (basic)
try {
let network_stats = (cat /proc/net/dev | lines | skip 2 | parse "{interface}: {rx_bytes} {rx_packets} {rx_errs} {rx_drop} {rx_fifo} {rx_frame} {rx_compressed} {rx_multicast} {tx_bytes} {tx_packets} {tx_errs} {tx_drop} {tx_fifo} {tx_colls} {tx_carrier} {tx_compressed}")
$metrics = ($metrics | insert network ($network_stats | select interface rx_bytes tx_bytes rx_packets tx_packets))
} catch {
$metrics = ($metrics | insert network {error: "Failed to collect network metrics"})
}
# Process count
try {
let process_count = (ls /proc | where name =~ "^[0-9]+$" | length)
$metrics = ($metrics | insert processes {
total: $process_count
})
} catch {
$metrics = ($metrics | insert processes {error: "Failed to collect process metrics"})
}
return $metrics
}
# Collect container metrics (if running in containerized environment)
export def collect-container-metrics []: nothing -> record {
let timestamp = (date now)
mut metrics = {
timestamp: ($timestamp | format date "%Y-%m-%d %H:%M:%S")
container_runtime: "unknown"
}
# Check for Docker
try {
if (which docker | is-not-empty) {
let containers = (docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Image}}" | lines | skip 1)
$metrics = ($metrics | insert docker {
available: true
containers: ($containers | length)
running: ($containers | where $it =~ "Up" | length)
})
$metrics = ($metrics | insert container_runtime "docker")
}
} catch {}
# Check for Podman
try {
if (which podman | is-not-empty) {
let containers = (podman ps --format "table {{.Names}}\t{{.Status}}\t{{.Image}}" | lines | skip 1)
$metrics = ($metrics | insert podman {
available: true
containers: ($containers | length)
running: ($containers | where $it =~ "Up" | length)
})
if ($metrics.container_runtime == "unknown") {
$metrics = ($metrics | insert container_runtime "podman")
}
}
} catch {}
# Check for Kubernetes
try {
if (which kubectl | is-not-empty) {
let pods = (kubectl get pods --all-namespaces --no-headers | lines)
$metrics = ($metrics | insert kubernetes {
available: true
pods_total: ($pods | length)
pods_running: ($pods | where $it =~ "Running" | length)
pods_pending: ($pods | where $it =~ "Pending" | length)
pods_failed: ($pods | where $it =~ "Failed" | length)
})
}
} catch {}
return $metrics
}
# Collect application logs with filtering
export def collect-logs [
--service(-s): string # Specific service to collect logs from
--since: string = "1h" # Time range (1h, 30m, etc.)
--level: string = "error" # Log level filter
--lines(-l): int = 100 # Maximum lines to collect
]: nothing -> list<record> {
mut logs = []
# Systemd journal logs
try {
mut journalctl_cmd = ["journalctl", "--output=json", "--no-pager", $"--since=($since)"]
if ($service | is-not-empty) {
$journalctl_cmd = ($journalctl_cmd | append ["-u", $service])
}
if (($level | is-not-empty) and ($level != "all")) {
$journalctl_cmd = ($journalctl_cmd | append ["-p", $level])
}
if ($lines | is-not-empty) {
$journalctl_cmd = ($journalctl_cmd | append ["-n", ($lines | into string)])
}
let journal_logs = (^$journalctl_cmd.0 ...$journalctl_cmd.1 | lines | where $it != "" | each { |line| $line | from json })
$logs = ($logs | append $journal_logs)
} catch {}
# Container logs (Docker)
try {
if (which docker | is-not-empty and ($service | is-not-empty)) {
let container_logs = (docker logs --since $since --tail $lines $service 2>/dev/null | lines | enumerate | each { |item|
{
timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
source: "docker"
container: $service
message: $item.item
line_number: $item.index
}
})
$logs = ($logs | append $container_logs)
}
} catch {}
# File-based logs (common locations)
let log_files = [
"/var/log/syslog"
"/var/log/messages"
"/var/log/kern.log"
"/var/log/auth.log"
]
for log_file in $log_files {
try {
if ($log_file | path exists) {
let file_logs = (tail -n $lines $log_file | lines | enumerate | each { |item|
{
timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
source: "file"
file: $log_file
message: $item.item
line_number: $item.index
}
})
$logs = ($logs | append $file_logs)
}
} catch {}
}
return ($logs | first $lines)
}
# Process and analyze log patterns
export def analyze-logs [logs: list<record>]: nothing -> record {
let total_logs = ($logs | length)
if $total_logs == 0 {
return {
total: 0
analysis: "No logs to analyze"
}
}
# Error pattern analysis
let error_patterns = ["error", "failed", "exception", "critical", "fatal"]
mut error_counts = {}
for pattern in $error_patterns {
let count = ($logs | where message =~ $"(?i)($pattern)" | length)
$error_counts = ($error_counts | insert $pattern $count)
}
# Source distribution
let source_dist = ($logs | group-by source | transpose key value | each { |item|
{source: $item.key, count: ($item.value | length)}
})
# Time-based analysis (last hour)
let recent_logs = ($logs | where timestamp > ((date now) - 1hr))
return {
total: $total_logs
recent_count: ($recent_logs | length)
error_patterns: $error_counts
source_distribution: $source_dist
analysis_timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
}
}
# Export metrics in various formats
export def export-metrics [
metrics: record
--format(-f): string = "json" # json, yaml, csv
--output(-o): string # Output file path
]: nothing -> any {
let formatted_data = match $format {
"yaml" => ($metrics | to yaml)
"csv" => {
# Flatten metrics for CSV export
let flattened = ($metrics | flatten | transpose key value)
$flattened | to csv
}
_ => ($metrics | to json)
}
if ($output | is-not-empty) {
$formatted_data | save $output
print $"Metrics exported to ($output)"
} else {
$formatted_data
}
}
# Health monitoring function
export def health-monitor [
--interval(-i): int = 60 # Collection interval in seconds
--duration(-d): int = 300 # Total monitoring duration in seconds
--output(-o): string # Output file for continuous monitoring
]: nothing -> nothing {
let start_time = (date now)
let end_time = ($start_time + ($duration * 1sec))
print $"🔍 Starting health monitoring for ($duration) seconds with ($interval)s intervals"
print $"📊 Collecting system and container metrics"
while (date now) < $end_time {
let current_time = (date now)
let system_metrics = (collect-system-metrics)
let container_metrics = (collect-container-metrics)
let combined_metrics = {
collection_time: ($current_time | format date "%Y-%m-%d %H:%M:%S")
system: $system_metrics
containers: $container_metrics
}
if ($output | is-not-empty) {
$combined_metrics | to json | save -a $output
} else {
print $"⏰ ($current_time | format date "%H:%M:%S") - CPU: ($system_metrics.cpu.load_1m?)% | Memory: ($system_metrics.memory.usage_percent?)%"
}
sleep ($interval * 1sec)
}
print "✅ Health monitoring completed"
}
# Quick system status check
export def status-check []: nothing -> record {
let system = (collect-system-metrics)
let containers = (collect-container-metrics)
# Determine overall health
mut health_status = "healthy"
mut alerts = []
# CPU load check
if (($system.cpu.load_1m? | default 0) > 4.0) {
$health_status = "warning"
$alerts = ($alerts | append "High CPU load")
}
# Memory usage check
if (($system.memory.usage_percent? | default 0) > 90) {
$health_status = "critical"
$alerts = ($alerts | append "High memory usage")
}
# Disk usage check
try {
let high_disk = ($system.disk | where {|x| ($x.percent | str replace "%" "" | into float) > 90})
if ($high_disk | length) > 0 {
$health_status = "warning"
$alerts = ($alerts | append "High disk usage")
}
} catch {}
return {
status: $health_status
alerts: $alerts
metrics: {
system: $system
containers: $containers
}
timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
}
}

View file

@ -0,0 +1,419 @@
# Log Processing and Analysis Scripts for Nushell Infrastructure
# Advanced log parsing, filtering, and transformation capabilities
# Parse structured logs from various formats
export def parse-logs [
--format(-f): string = "auto" # json, syslog, apache, nginx, auto
--filter: string # Filter expression
--transform: string # Transform expression
] -> list<record> {
let input_data = $in
# Auto-detect format if not specified
let detected_format = if $format == "auto" {
if ($input_data | first | str starts-with "{") {
"json"
} else if ($input_data | first | str contains "T") {
"syslog"
} else {
"text"
}
} else {
$format
}
# Parse based on format
mut parsed_logs = match $detected_format {
"json" => {
$input_data | lines | where $it != "" | each { |line|
try {
$line | from json
} catch {
{raw: $line, parse_error: true}
}
}
}
"syslog" => {
$input_data | lines | each { |line|
# RFC3164 syslog format: <priority>timestamp hostname tag: message
let syslog_pattern = '<(?P<priority>\d+)>(?P<timestamp>\w+\s+\d+\s+\d+:\d+:\d+)\s+(?P<hostname>\S+)\s+(?P<tag>\S+):\s*(?P<message>.*)'
try {
let matches = ($line | parse -r $syslog_pattern)
if ($matches | length) > 0 {
$matches | first
} else {
{raw: $line, format: "syslog"}
}
} catch {
{raw: $line, parse_error: true}
}
}
}
"apache" => {
$input_data | lines | each { |line|
# Apache Combined Log Format
let apache_pattern = '(?P<ip>\S+)\s+\S+\s+\S+\s+\[(?P<timestamp>[^\]]+)\]\s+"(?P<method>\S+)\s+(?P<url>\S+)\s+(?P<protocol>[^"]+)"\s+(?P<status>\d+)\s+(?P<size>\d+|-)\s+"(?P<referer>[^"]*)"\s+"(?P<user_agent>[^"]*)"'
try {
let matches = ($line | parse -r $apache_pattern)
if ($matches | length) > 0 {
$matches | first
} else {
{raw: $line, format: "apache"}
}
} catch {
{raw: $line, parse_error: true}
}
}
}
"nginx" => {
$input_data | lines | each { |line|
# Nginx default log format
let nginx_pattern = '(?P<ip>\S+)\s+-\s+-\s+\[(?P<timestamp>[^\]]+)\]\s+"(?P<method>\S+)\s+(?P<url>\S+)\s+(?P<protocol>[^"]+)"\s+(?P<status>\d+)\s+(?P<size>\d+)\s+"(?P<referer>[^"]*)"\s+"(?P<user_agent>[^"]*)"'
try {
let matches = ($line | parse -r $nginx_pattern)
if ($matches | length) > 0 {
$matches | first
} else {
{raw: $line, format: "nginx"}
}
} catch {
{raw: $line, parse_error: true}
}
}
}
_ => {
$input_data | lines | enumerate | each { |item|
{
line_number: $item.index
message: $item.item
timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
}
}
}
}
# Apply filter if specified
if ($filter | is-not-empty) {
$parsed_logs = ($parsed_logs | filter { |log|
try {
nu -c $"($log) | ($filter)"
} catch {
false
}
})
}
# Apply transformation if specified
if ($transform | is-not-empty) {
$parsed_logs = ($parsed_logs | each { |log|
try {
nu -c $"($log) | ($transform)"
} catch {
$log
}
})
}
return $parsed_logs
}
# Aggregate logs by time windows
export def aggregate-by-time [
logs: list<record>
--window(-w): string = "1h" # Time window: 1m, 5m, 1h, 1d
--field(-f): string = "timestamp" # Timestamp field name
--metric(-m): string = "count" # Aggregation metric: count, sum, avg, max, min
--group(-g): string # Group by field
] -> list<record> {
# Parse time window
let window_duration = match $window {
"1m" => 60
"5m" => 300
"1h" => 3600
"1d" => 86400
_ => 3600 # Default to 1 hour
}
# Convert timestamps to epoch and create time buckets
mut processed_logs = ($logs | each { |log|
let timestamp_value = ($log | get -i $field | default (date now))
let epoch = ($timestamp_value | date to-timezone UTC | format date "%s" | into int)
let bucket = (($epoch / $window_duration) * $window_duration)
$log | insert time_bucket $bucket | insert epoch $epoch
})
# Group by time bucket and optional field
let grouped = if ($group | is-not-empty) {
$processed_logs | group-by time_bucket $group
} else {
$processed_logs | group-by time_bucket
}
# Aggregate based on metric
$grouped | transpose bucket logs | each { |bucket_data|
let bucket_timestamp = ($bucket_data.bucket | into int | into datetime | format date "%Y-%m-%d %H:%M:%S")
let logs_in_bucket = $bucket_data.logs
match $metric {
"count" => {
{
timestamp: $bucket_timestamp
window: $window
count: ($logs_in_bucket | length)
}
}
"sum" => {
# Requires a numeric field to sum
{
timestamp: $bucket_timestamp
window: $window
sum: ($logs_in_bucket | get value | math sum)
}
}
"avg" => {
{
timestamp: $bucket_timestamp
window: $window
average: ($logs_in_bucket | get value | math avg)
}
}
_ => {
{
timestamp: $bucket_timestamp
window: $window
count: ($logs_in_bucket | length)
logs: $logs_in_bucket
}
}
}
} | sort-by timestamp
}
# Detect anomalies in log patterns
export def detect-anomalies [
logs: list<record>
--field(-f): string = "message" # Field to analyze
--threshold(-t): float = 2.0 # Standard deviation threshold
--window(-w): string = "1h" # Time window for baseline
] -> list<record> {
# Calculate baseline statistics
let baseline_window = match $window {
"1m" => 60
"5m" => 300
"1h" => 3600
"1d" => 86400
_ => 3600
}
let now = (date now)
let baseline_start = ($now - ($baseline_window * 1sec))
# Filter logs for baseline period
let baseline_logs = ($logs | where {|log|
let log_time = ($log | get -i timestamp | default $now)
$log_time >= $baseline_start and $log_time <= $now
})
if ($baseline_logs | length) == 0 {
return []
}
# Count occurrences by time buckets
let time_series = ($baseline_logs | aggregate-by-time --window "5m" --field timestamp --metric count)
# Calculate statistics
let counts = ($time_series | get count)
let mean = ($counts | math avg)
let std_dev = ($counts | math stddev)
# Find anomalies (values beyond threshold standard deviations)
let anomaly_threshold_high = ($mean + ($threshold * $std_dev))
let anomaly_threshold_low = ($mean - ($threshold * $std_dev))
let anomalies = ($time_series | where {|bucket|
$bucket.count > $anomaly_threshold_high or $bucket.count < $anomaly_threshold_low
})
return ($anomalies | each { |anomaly|
$anomaly | insert {
anomaly_type: (if $anomaly.count > $anomaly_threshold_high { "spike" } else { "drop" })
severity: (if (($anomaly.count - $mean) | math abs) > (3 * $std_dev) { "high" } else { "medium" })
baseline_mean: $mean
baseline_stddev: $std_dev
}
})
}
# Extract patterns and insights from logs
export def extract-patterns [
logs: list<record>
--field(-f): string = "message" # Field to analyze
--pattern-type(-t): string = "error" # error, ip, url, email, custom
--custom-regex(-r): string # Custom regex pattern
--min-frequency(-m): int = 2 # Minimum pattern frequency
] -> list<record> {
let field_values = ($logs | get $field | where $it != null)
let patterns = match $pattern_type {
"error" => {
# Common error patterns
let error_regexes = [
'error:?\s*(.+)',
'exception:?\s*(.+)',
'failed:?\s*(.+)',
'timeout:?\s*(.+)',
'connection\s*refused:?\s*(.+)'
]
mut all_matches = []
for regex in $error_regexes {
let matches = ($field_values | each { |value|
try {
$value | parse -r $regex | each { |match| $match."capture0" }
} catch {
[]
}
} | flatten)
$all_matches = ($all_matches | append $matches)
}
$all_matches
}
"ip" => {
# IP address pattern
let ip_regex = '\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b'
$field_values | each { |value|
try {
$value | parse -r $ip_regex
} catch {
[]
}
} | flatten
}
"url" => {
# URL pattern
let url_regex = 'https?://[^\s<>"]+'
$field_values | each { |value|
try {
$value | parse -r $url_regex
} catch {
[]
}
} | flatten
}
"email" => {
# Email pattern
let email_regex = '\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
$field_values | each { |value|
try {
$value | parse -r $email_regex
} catch {
[]
}
} | flatten
}
"custom" => {
if ($custom_regex | is-not-empty) {
$field_values | each { |value|
try {
$value | parse -r $custom_regex
} catch {
[]
}
} | flatten
} else {
[]
}
}
_ => []
}
# Count pattern frequencies
let pattern_counts = ($patterns | group-by {|x| $x} | transpose pattern occurrences | each { |item|
{
pattern: $item.pattern
frequency: ($item.occurrences | length)
examples: ($item.occurrences | first 3)
}
} | where frequency >= $min_frequency | sort-by frequency -r)
return $pattern_counts
}
# Generate log summary report
export def generate-summary [
logs: list<record>
--timeframe(-t): string = "24h" # Timeframe for analysis
--include-patterns(-p) # Include pattern analysis
--include-anomalies(-a) # Include anomaly detection
] -> record {
let total_logs = ($logs | length)
let start_time = (date now | format date "%Y-%m-%d %H:%M:%S")
if $total_logs == 0 {
return {
summary: "No logs to analyze"
timestamp: $start_time
total_logs: 0
}
}
# Basic statistics
let time_range = ($logs | get -i timestamp | default [] | each { |ts| $ts | date to-timezone UTC })
let earliest = ($time_range | math min)
let latest = ($time_range | math max)
# Log level distribution
let level_distribution = ($logs | get -i level | default [] | group-by {|x| $x} | transpose level count | each { |item|
{level: $item.level, count: ($item.count | length)}
} | sort-by count -r)
# Source distribution
let source_distribution = ($logs | get -i source | default [] | group-by {|x| $x} | transpose source count | each { |item|
{source: $item.source, count: ($item.count | length)}
} | sort-by count -r)
mut summary_report = {
analysis_timestamp: $start_time
timeframe: $timeframe
total_logs: $total_logs
time_range: {
earliest: ($earliest | format date "%Y-%m-%d %H:%M:%S")
latest: ($latest | format date "%Y-%m-%d %H:%M:%S")
duration_hours: ((($latest | date to-timezone UTC) - ($earliest | date to-timezone UTC)) / 1hr | math round --precision 2)
}
distribution: {
by_level: $level_distribution
by_source: $source_distribution
}
statistics: {
logs_per_hour: (($total_logs / ((($latest | date to-timezone UTC) - ($earliest | date to-timezone UTC)) / 1hr)) | math round --precision 2)
unique_sources: ($source_distribution | length)
error_rate: (($logs | where {|log| ($log | get -i level | default "") =~ "error|critical|fatal"} | length) / $total_logs * 100 | math round --precision 2)
}
}
# Add pattern analysis if requested
if $include_patterns {
let error_patterns = (extract-patterns $logs --pattern-type error --min-frequency 2)
let ip_patterns = (extract-patterns $logs --pattern-type ip --min-frequency 3)
$summary_report = ($summary_report | insert patterns {
errors: $error_patterns
ip_addresses: ($ip_patterns | first 10)
})
}
# Add anomaly detection if requested
if $include_anomalies {
let anomalies = (detect-anomalies $logs --threshold 2.0 --window "1h")
$summary_report = ($summary_report | insert anomalies {
detected: ($anomalies | length)
high_severity: ($anomalies | where severity == "high" | length)
details: ($anomalies | first 5)
})
}
return $summary_report
}

View file

@ -0,0 +1,50 @@
# Simple test script for Nushell infrastructure
# Validates basic functionality without complex dependencies
export def test-basic-functionality []: nothing -> record {
{
nushell_version: (version | get version)
current_time: (date now | format date "%Y-%m-%d %H:%M:%S")
hostname: ($env.HOSTNAME? | default "unknown")
user: ($env.USER? | default "unknown")
working_directory: $env.PWD
test_status: "passed"
}
}
export def test-security-environment []: nothing -> record {
{
readonly_mode: ($env.NUSHELL_READONLY_MODE? | default "unknown")
execution_mode: ($env.NUSHELL_EXECUTION_MODE? | default "unknown")
audit_enabled: ($env.NUSHELL_AUDIT_ENABLED? | default "unknown")
session_timeout: ($env.NUSHELL_SESSION_TIMEOUT? | default "unknown")
test_status: "passed"
}
}
export def test-file-operations []: nothing -> record {
let test_results = {
can_read_proc: (try { ls /proc | length } catch { 0 })
can_read_tmp: (try { ls /tmp | length } catch { 0 })
current_processes: (try { ps | length } catch { 0 })
disk_usage: (try { df | length } catch { 0 })
test_status: "completed"
}
$test_results
}
# Main test function
export def run-all-tests []: nothing -> record {
let basic_test = (test-basic-functionality)
let security_test = (test-security-environment)
let file_test = (test-file-operations)
{
timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
basic_functionality: $basic_test
security_environment: $security_test
file_operations: $file_test
overall_status: "tests_completed"
}
}

View file

@ -0,0 +1,398 @@
# Telemetry and Monitoring Integration for Nushell Infrastructure
# Secure telemetry collection and forwarding capabilities
# Send telemetry data to configured endpoints
export def send-telemetry [
data: record
--endpoint(-e): string # Override default endpoint
--format(-f): string = "json" # json, prometheus, influx
--timeout(-t): int = 30 # Request timeout in seconds
--retry(-r): int = 3 # Number of retries
] -> record {
let telemetry_endpoint = ($endpoint | default ($env.NUSHELL_TELEMETRY_ENDPOINT? | default ""))
if ($telemetry_endpoint | is-empty) {
return {
success: false
error: "No telemetry endpoint configured"
data_sent: false
}
}
# Prepare data based on format
let formatted_data = match $format {
"prometheus" => {
# Convert to Prometheus exposition format
convert-to-prometheus $data
}
"influx" => {
# Convert to InfluxDB line protocol
convert-to-influx $data
}
_ => {
# Default JSON format
$data | to json
}
}
# Add metadata
let telemetry_payload = {
timestamp: (date now | format date "%Y-%m-%dT%H:%M:%S.%fZ")
hostname: ($env.HOSTNAME? | default "unknown")
agent: "nushell-provisioning"
version: "1.0.0"
data: $data
}
# Send data with retries
mut attempt = 1
while $attempt <= $retry {
try {
let response = (http post $telemetry_endpoint ($telemetry_payload | to json) --timeout ($timeout * 1000) --headers {"Content-Type": "application/json"})
return {
success: true
endpoint: $telemetry_endpoint
response_status: 200
attempt: $attempt
data_sent: true
timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
}
} catch { |err|
if $attempt == $retry {
return {
success: false
error: ($err | get msg)
endpoint: $telemetry_endpoint
attempts: $attempt
data_sent: false
}
}
# Wait before retry (exponential backoff)
let wait_time = ($attempt * $attempt * 2)
sleep ($wait_time * 1sec)
}
$attempt = ($attempt + 1)
}
return {
success: false
error: "Max retries exceeded"
attempts: $retry
data_sent: false
}
}
# Convert metrics to Prometheus exposition format
def convert-to-prometheus [data: record] -> string {
mut prometheus_output = ""
# Process system metrics if available
if ($data | get -i system | is-not-empty) {
let sys = ($data | get system)
# CPU metrics
if ($sys | get -i cpu | is-not-empty) {
let cpu = ($sys | get cpu)
$prometheus_output = $prometheus_output + $"# HELP system_load_1m System load average over 1 minute\n"
$prometheus_output = $prometheus_output + $"# TYPE system_load_1m gauge\n"
$prometheus_output = $prometheus_output + $"system_load_1m{hostname=\"($env.HOSTNAME? | default 'unknown')\"} ($cpu.load_1m? | default 0)\n"
$prometheus_output = $prometheus_output + $"# HELP system_load_5m System load average over 5 minutes\n"
$prometheus_output = $prometheus_output + $"# TYPE system_load_5m gauge\n"
$prometheus_output = $prometheus_output + $"system_load_5m{hostname=\"($env.HOSTNAME? | default 'unknown')\"} ($cpu.load_5m? | default 0)\n"
}
# Memory metrics
if ($sys | get -i memory | is-not-empty) {
let mem = ($sys | get memory)
$prometheus_output = $prometheus_output + $"# HELP system_memory_usage_percent Memory usage percentage\n"
$prometheus_output = $prometheus_output + $"# TYPE system_memory_usage_percent gauge\n"
$prometheus_output = $prometheus_output + $"system_memory_usage_percent{hostname=\"($env.HOSTNAME? | default 'unknown')\"} ($mem.usage_percent? | default 0)\n"
$prometheus_output = $prometheus_output + $"# HELP system_memory_total_bytes Total memory in bytes\n"
$prometheus_output = $prometheus_output + $"# TYPE system_memory_total_bytes gauge\n"
$prometheus_output = $prometheus_output + $"system_memory_total_bytes{hostname=\"($env.HOSTNAME? | default 'unknown')\"} (($mem.total_kb? | default 0) * 1024)\n"
}
}
return $prometheus_output
}
# Convert metrics to InfluxDB line protocol
def convert-to-influx [data: record] -> string {
mut influx_lines = []
let timestamp = (date now | format date "%s%N")
let hostname = ($env.HOSTNAME? | default "unknown")
# Process system metrics
if ($data | get -i system | is-not-empty) {
let sys = ($data | get system)
# CPU metrics
if ($sys | get -i cpu | is-not-empty) {
let cpu = ($sys | get cpu)
$influx_lines = ($influx_lines | append $"system_cpu,hostname=($hostname) load_1m=($cpu.load_1m? | default 0),load_5m=($cpu.load_5m? | default 0),load_15m=($cpu.load_15m? | default 0) ($timestamp)")
}
# Memory metrics
if ($sys | get -i memory | is-not-empty) {
let mem = ($sys | get memory)
$influx_lines = ($influx_lines | append $"system_memory,hostname=($hostname) usage_percent=($mem.usage_percent? | default 0),total_kb=($mem.total_kb? | default 0),used_kb=($mem.used_kb? | default 0) ($timestamp)")
}
# Process metrics
if ($sys | get -i processes | is-not-empty) {
let proc = ($sys | get processes)
$influx_lines = ($influx_lines | append $"system_processes,hostname=($hostname) total=($proc.total? | default 0) ($timestamp)")
}
}
return ($influx_lines | str join "\n")
}
# Create and manage telemetry batches
export def batch-telemetry [
--max-batch-size(-s): int = 100 # Maximum items per batch
--max-wait-time(-w): int = 30 # Maximum wait time in seconds
--output-file(-o): string # File to store batched data
] -> nothing {
mut batch = []
mut batch_start_time = (date now)
print $"📊 Starting telemetry batching (max size: ($max_batch_size), max wait: ($max_wait_time)s)"
# Monitor for telemetry data
while true {
# Check if we have data to batch (this would typically come from external sources)
# For demonstration, we'll create sample data
let current_time = (date now)
# Collect current metrics
try {
use ../observability/collect.nu *
let metrics = (collect-system-metrics)
# Add to batch
$batch = ($batch | append {
timestamp: ($current_time | format date "%Y-%m-%dT%H:%M:%S.%fZ")
type: "system_metrics"
data: $metrics
})
# Check batch conditions
let batch_size = ($batch | length)
let elapsed_time = (($current_time - $batch_start_time) / 1sec)
if $batch_size >= $max_batch_size or $elapsed_time >= $max_wait_time {
# Send batch
let batch_result = (send-batch $batch --output-file $output_file)
if $batch_result.success {
print $"✅ Batch sent successfully: ($batch_size) items"
} else {
print $"❌ Batch send failed: ($batch_result.error)"
}
# Reset batch
$batch = []
$batch_start_time = (date now)
}
} catch { |err|
print $"⚠️ Error collecting metrics: ($err | get msg)"
}
# Wait before next collection
sleep 10sec
}
}
# Send a batch of telemetry data
def send-batch [
batch: list<record>
--output-file(-o): string
] -> record {
if ($batch | length) == 0 {
return {success: true, message: "Empty batch, nothing to send"}
}
let batch_payload = {
batch_id: (random uuid)
batch_size: ($batch | length)
batch_timestamp: (date now | format date "%Y-%m-%dT%H:%M:%S.%fZ")
hostname: ($env.HOSTNAME? | default "unknown")
agent: "nushell-telemetry"
items: $batch
}
# Save to file if specified
if ($output_file | is-not-empty) {
try {
$batch_payload | to json | save -a $output_file
return {
success: true
message: $"Batch saved to file: ($output_file)"
batch_size: ($batch | length)
}
} catch { |err|
return {
success: false
error: $"Failed to save batch: ($err | get msg)"
}
}
}
# Send to telemetry endpoint
let endpoint = ($env.NUSHELL_TELEMETRY_ENDPOINT? | default "")
if ($endpoint | is-not-empty) {
return (send-telemetry $batch_payload --endpoint $endpoint)
} else {
return {
success: false
error: "No telemetry endpoint configured"
}
}
}
# Monitor system health and send alerts
export def health-monitoring [
--alert-threshold(-t): record = {cpu: 80, memory: 90, disk: 95} # Alert thresholds
--check-interval(-i): int = 60 # Check interval in seconds
--alert-endpoint(-e): string # Alert webhook endpoint
] -> nothing {
print $"🔍 Starting health monitoring with thresholds: ($alert_threshold)"
while true {
try {
use ../observability/collect.nu *
let status = (status-check)
# Check for threshold violations
mut alerts = []
# CPU check
if ($status.metrics.system.cpu.load_1m? | default 0) > ($alert_threshold.cpu / 10.0) {
$alerts = ($alerts | append {
type: "cpu_high"
severity: "warning"
message: $"High CPU load: ($status.metrics.system.cpu.load_1m)"
threshold: $alert_threshold.cpu
current_value: $status.metrics.system.cpu.load_1m
})
}
# Memory check
if ($status.metrics.system.memory.usage_percent? | default 0) > $alert_threshold.memory {
$alerts = ($alerts | append {
type: "memory_high"
severity: "critical"
message: $"High memory usage: ($status.metrics.system.memory.usage_percent)%"
threshold: $alert_threshold.memory
current_value: $status.metrics.system.memory.usage_percent
})
}
# Disk check
try {
let high_disk_usage = ($status.metrics.system.disk | where {|disk|
($disk.percent | str replace "%" "" | into float) > $alert_threshold.disk
})
if ($high_disk_usage | length) > 0 {
for disk in $high_disk_usage {
$alerts = ($alerts | append {
type: "disk_high"
severity: "critical"
message: $"High disk usage on ($disk.mount): ($disk.percent)"
threshold: $alert_threshold.disk
current_value: ($disk.percent | str replace "%" "" | into float)
filesystem: $disk.filesystem
mount: $disk.mount
})
}
}
} catch {}
# Send alerts if any
if ($alerts | length) > 0 {
let alert_payload = {
timestamp: (date now | format date "%Y-%m-%dT%H:%M:%S.%fZ")
hostname: ($env.HOSTNAME? | default "unknown")
alert_count: ($alerts | length)
alerts: $alerts
system_status: $status
}
# Send to telemetry endpoint
let result = (send-telemetry $alert_payload --endpoint ($alert_endpoint | default ($env.NUSHELL_TELEMETRY_ENDPOINT? | default "")))
if $result.success {
print $"🚨 Sent ($alerts | length) alerts to monitoring system"
} else {
print $"❌ Failed to send alerts: ($result.error)"
}
# Also log alerts locally
$alerts | each { |alert|
print $"⚠️ ALERT: ($alert.type) - ($alert.message)"
}
}
# Send regular health status
let health_payload = {
type: "health_check"
timestamp: (date now | format date "%Y-%m-%dT%H:%M:%S.%fZ")
status: $status
}
send-telemetry $health_payload | ignore
} catch { |err|
print $"❌ Health monitoring error: ($err | get msg)"
}
sleep ($check_interval * 1sec)
}
}
# Initialize telemetry configuration
export def init-telemetry [
--endpoint(-e): string # Telemetry endpoint URL
--format(-f): string = "json" # Default format
--enable-health(-h) # Enable health monitoring
--config-file(-c): string # Save configuration to file
] -> record {
let config = {
endpoint: ($endpoint | default "")
format: $format
health_monitoring: ($enable_health | default false)
created: (date now | format date "%Y-%m-%d %H:%M:%S")
version: "1.0.0"
}
# Set environment variables
$env.NUSHELL_TELEMETRY_ENDPOINT = ($endpoint | default "")
$env.NUSHELL_TELEMETRY_FORMAT = $format
$env.NUSHELL_TELEMETRY_ENABLED = "true"
# Save configuration if file specified
if ($config_file | is-not-empty) {
try {
$config | to json | save $config_file
print $"📝 Telemetry configuration saved to ($config_file)"
} catch { |err|
print $"⚠️ Failed to save configuration: ($err | get msg)"
}
}
print $"🔧 Telemetry initialized:"
print $" Endpoint: ($config.endpoint)"
print $" Format: ($config.format)"
print $" Health monitoring: ($config.health_monitoring)"
return $config
}