provisioning/taskservs/nushell/observability/telemetry.nu

# Telemetry and Monitoring Integration for Nushell Infrastructure
# Secure telemetry collection and forwarding capabilities

# Send telemetry data to configured endpoints
export def send-telemetry [
    data: record
    --endpoint(-e): string      # Override default endpoint
    --format(-f): string = "json"  # json, prometheus, influx
    --timeout(-t): int = 30     # Request timeout in seconds
    --retry(-r): int = 3        # Number of retries
] -> record {
    let telemetry_endpoint = ($endpoint | default ($env.NUSHELL_TELEMETRY_ENDPOINT? | default ""))

    if ($telemetry_endpoint | is-empty) {
        return {
            success: false
            error: "No telemetry endpoint configured"
            data_sent: false
        }
    }

    # Prepare data based on format
    let formatted_data = match $format {
        "prometheus" => {
            # Convert to Prometheus exposition format
            convert-to-prometheus $data
        }
        "influx" => {
            # Convert to InfluxDB line protocol
            convert-to-influx $data
        }
        _ => {
            # Default JSON format
            $data | to json
        }
    }

    # Add metadata
    let telemetry_payload = {
        timestamp: (date now | format date "%Y-%m-%dT%H:%M:%S.%fZ")
        hostname: ($env.HOSTNAME? | default "unknown")
        agent: "nushell-provisioning"
        version: "1.0.0"
        data: $data
    }

    # Send data with retries
    mut attempt = 1
    while $attempt <= $retry {
        try {
            let response = (http post $telemetry_endpoint ($telemetry_payload | to json) --timeout ($timeout * 1000) --headers {"Content-Type": "application/json"})

            return {
                success: true
                endpoint: $telemetry_endpoint
                response_status: 200
                attempt: $attempt
                data_sent: true
                timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
            }

        } catch { |err|
            if $attempt == $retry {
                return {
                    success: false
                    error: ($err | get msg)
                    endpoint: $telemetry_endpoint
                    attempts: $attempt
                    data_sent: false
                }
            }

            # Wait before retry (exponential backoff)
            let wait_time = ($attempt * $attempt * 2)
            sleep ($wait_time * 1sec)
        }

        $attempt = ($attempt + 1)
    }

    return {
        success: false
        error: "Max retries exceeded"
        attempts: $retry
        data_sent: false
    }
}

# Convert metrics to Prometheus exposition format
def convert-to-prometheus [data: record] -> string {
    mut prometheus_output = ""

    # Process system metrics if available
    if ($data | get -i system | is-not-empty) {
        let sys = ($data | get system)

        # CPU metrics
        if ($sys | get -i cpu | is-not-empty) {
            let cpu = ($sys | get cpu)
            $prometheus_output = $prometheus_output + $"# HELP system_load_1m System load average over 1 minute\n"
            $prometheus_output = $prometheus_output + $"# TYPE system_load_1m gauge\n"
            $prometheus_output = $prometheus_output + $"system_load_1m{hostname=\"($env.HOSTNAME? | default 'unknown')\"} ($cpu.load_1m? | default 0)\n"

            $prometheus_output = $prometheus_output + $"# HELP system_load_5m System load average over 5 minutes\n"
            $prometheus_output = $prometheus_output + $"# TYPE system_load_5m gauge\n"
            $prometheus_output = $prometheus_output + $"system_load_5m{hostname=\"($env.HOSTNAME? | default 'unknown')\"} ($cpu.load_5m? | default 0)\n"
        }

        # Memory metrics
        if ($sys | get -i memory | is-not-empty) {
            let mem = ($sys | get memory)
            $prometheus_output = $prometheus_output + $"# HELP system_memory_usage_percent Memory usage percentage\n"
            $prometheus_output = $prometheus_output + $"# TYPE system_memory_usage_percent gauge\n"
            $prometheus_output = $prometheus_output + $"system_memory_usage_percent{hostname=\"($env.HOSTNAME? | default 'unknown')\"} ($mem.usage_percent? | default 0)\n"

            $prometheus_output = $prometheus_output + $"# HELP system_memory_total_bytes Total memory in bytes\n"
            $prometheus_output = $prometheus_output + $"# TYPE system_memory_total_bytes gauge\n"
            $prometheus_output = $prometheus_output + $"system_memory_total_bytes{hostname=\"($env.HOSTNAME? | default 'unknown')\"} (($mem.total_kb? | default 0) * 1024)\n"
        }
    }

    return $prometheus_output
}

# Convert metrics to InfluxDB line protocol
def convert-to-influx [data: record] -> string {
    mut influx_lines = []
    let timestamp = (date now | format date "%s%N")
    let hostname = ($env.HOSTNAME? | default "unknown")

    # Process system metrics
    if ($data | get -i system | is-not-empty) {
        let sys = ($data | get system)

        # CPU metrics
        if ($sys | get -i cpu | is-not-empty) {
            let cpu = ($sys | get cpu)
            $influx_lines = ($influx_lines | append $"system_cpu,hostname=($hostname) load_1m=($cpu.load_1m? | default 0),load_5m=($cpu.load_5m? | default 0),load_15m=($cpu.load_15m? | default 0) ($timestamp)")
        }

        # Memory metrics
        if ($sys | get -i memory | is-not-empty) {
            let mem = ($sys | get memory)
            $influx_lines = ($influx_lines | append $"system_memory,hostname=($hostname) usage_percent=($mem.usage_percent? | default 0),total_kb=($mem.total_kb? | default 0),used_kb=($mem.used_kb? | default 0) ($timestamp)")
        }

        # Process metrics
        if ($sys | get -i processes | is-not-empty) {
            let proc = ($sys | get processes)
            $influx_lines = ($influx_lines | append $"system_processes,hostname=($hostname) total=($proc.total? | default 0) ($timestamp)")
        }
    }

    return ($influx_lines | str join "\n")
}

# Create and manage telemetry batches
export def batch-telemetry [
    --max-batch-size(-s): int = 100    # Maximum items per batch
    --max-wait-time(-w): int = 30      # Maximum wait time in seconds
    --output-file(-o): string          # File to store batched data
] -> nothing {
    mut batch = []
    mut batch_start_time = (date now)

    print $"📊 Starting telemetry batching (max size: ($max_batch_size), max wait: ($max_wait_time)s)"

    # Monitor for telemetry data
    while true {
        # Check if we have data to batch (this would typically come from external sources)
        # For demonstration, we'll create sample data
        let current_time = (date now)

        # Collect current metrics
        try {
            use ../observability/collect.nu *
            let metrics = (collect-system-metrics)

            # Add to batch
            $batch = ($batch | append {
                timestamp: ($current_time | format date "%Y-%m-%dT%H:%M:%S.%fZ")
                type: "system_metrics"
                data: $metrics
            })

            # Check batch conditions
            let batch_size = ($batch | length)
            let elapsed_time = (($current_time - $batch_start_time) / 1sec)

            if $batch_size >= $max_batch_size or $elapsed_time >= $max_wait_time {
                # Send batch
                let batch_result = (send-batch $batch --output-file $output_file)

                if $batch_result.success {
                    print $"✅ Batch sent successfully: ($batch_size) items"
                } else {
                    print $"❌ Batch send failed: ($batch_result.error)"
                }

                # Reset batch
                $batch = []
                $batch_start_time = (date now)
            }

        } catch { |err|
            print $"⚠️  Error collecting metrics: ($err | get msg)"
        }

        # Wait before next collection
        sleep 10sec
    }
}

# Send a batch of telemetry data
def send-batch [
    batch: list<record>
    --output-file(-o): string
] -> record {
    if ($batch | length) == 0 {
        return {success: true, message: "Empty batch, nothing to send"}
    }

    let batch_payload = {
        batch_id: (random uuid)
        batch_size: ($batch | length)
        batch_timestamp: (date now | format date "%Y-%m-%dT%H:%M:%S.%fZ")
        hostname: ($env.HOSTNAME? | default "unknown")
        agent: "nushell-telemetry"
        items: $batch
    }

    # Save to file if specified
    if ($output_file | is-not-empty) {
        try {
            $batch_payload | to json | save -a $output_file
            return {
                success: true
                message: $"Batch saved to file: ($output_file)"
                batch_size: ($batch | length)
            }
        } catch { |err|
            return {
                success: false
                error: $"Failed to save batch: ($err | get msg)"
            }
        }
    }

    # Send to telemetry endpoint
    let endpoint = ($env.NUSHELL_TELEMETRY_ENDPOINT? | default "")
    if ($endpoint | is-not-empty) {
        return (send-telemetry $batch_payload --endpoint $endpoint)
    } else {
        return {
            success: false
            error: "No telemetry endpoint configured"
        }
    }
}

# Monitor system health and send alerts
export def health-monitoring [
    --alert-threshold(-t): record = {cpu: 80, memory: 90, disk: 95}  # Alert thresholds
    --check-interval(-i): int = 60        # Check interval in seconds
    --alert-endpoint(-e): string          # Alert webhook endpoint
] -> nothing {
    print $"🔍 Starting health monitoring with thresholds: ($alert_threshold)"

    while true {
        try {
            use ../observability/collect.nu *
            let status = (status-check)

            # Check for threshold violations
            mut alerts = []

            # CPU check
            if ($status.metrics.system.cpu.load_1m? | default 0) > ($alert_threshold.cpu / 10.0) {
                $alerts = ($alerts | append {
                    type: "cpu_high"
                    severity: "warning"
                    message: $"High CPU load: ($status.metrics.system.cpu.load_1m)"
                    threshold: $alert_threshold.cpu
                    current_value: $status.metrics.system.cpu.load_1m
                })
            }

            # Memory check
            if ($status.metrics.system.memory.usage_percent? | default 0) > $alert_threshold.memory {
                $alerts = ($alerts | append {
                    type: "memory_high"
                    severity: "critical"
                    message: $"High memory usage: ($status.metrics.system.memory.usage_percent)%"
                    threshold: $alert_threshold.memory
                    current_value: $status.metrics.system.memory.usage_percent
                })
            }

            # Disk check
            try {
                let high_disk_usage = ($status.metrics.system.disk | where {|disk|
                    ($disk.percent | str replace "%" "" | into float) > $alert_threshold.disk
                })

                if ($high_disk_usage | length) > 0 {
                    for disk in $high_disk_usage {
                        $alerts = ($alerts | append {
                            type: "disk_high"
                            severity: "critical"
                            message: $"High disk usage on ($disk.mount): ($disk.percent)"
                            threshold: $alert_threshold.disk
                            current_value: ($disk.percent | str replace "%" "" | into float)
                            filesystem: $disk.filesystem
                            mount: $disk.mount
                        })
                    }
                }
            } catch {}

            # Send alerts if any
            if ($alerts | length) > 0 {
                let alert_payload = {
                    timestamp: (date now | format date "%Y-%m-%dT%H:%M:%S.%fZ")
                    hostname: ($env.HOSTNAME? | default "unknown")
                    alert_count: ($alerts | length)
                    alerts: $alerts
                    system_status: $status
                }

                # Send to telemetry endpoint
                let result = (send-telemetry $alert_payload --endpoint ($alert_endpoint | default ($env.NUSHELL_TELEMETRY_ENDPOINT? | default "")))

                if $result.success {
                    print $"🚨 Sent ($alerts | length) alerts to monitoring system"
                } else {
                    print $"❌ Failed to send alerts: ($result.error)"
                }

                # Also log alerts locally
                $alerts | each { |alert|
                    print $"⚠️  ALERT: ($alert.type) - ($alert.message)"
                }
            }

            # Send regular health status
            let health_payload = {
                type: "health_check"
                timestamp: (date now | format date "%Y-%m-%dT%H:%M:%S.%fZ")
                status: $status
            }

            send-telemetry $health_payload | ignore

        } catch { |err|
            print $"❌ Health monitoring error: ($err | get msg)"
        }

        sleep ($check_interval * 1sec)
    }
}

# Initialize telemetry configuration
export def init-telemetry [
    --endpoint(-e): string              # Telemetry endpoint URL
    --format(-f): string = "json"       # Default format
    --enable-health(-h)                 # Enable health monitoring
    --config-file(-c): string           # Save configuration to file
] -> record {
    let config = {
        endpoint: ($endpoint | default "")
        format: $format
        health_monitoring: ($enable_health | default false)
        created: (date now | format date "%Y-%m-%d %H:%M:%S")
        version: "1.0.0"
    }

    # Set environment variables
    $env.NUSHELL_TELEMETRY_ENDPOINT = ($endpoint | default "")
    $env.NUSHELL_TELEMETRY_FORMAT = $format
    $env.NUSHELL_TELEMETRY_ENABLED = "true"

    # Save configuration if file specified
    if ($config_file | is-not-empty) {
        try {
            $config | to json | save $config_file
            print $"📝 Telemetry configuration saved to ($config_file)"
        } catch { |err|
            print $"⚠️  Failed to save configuration: ($err | get msg)"
        }
    }

    print $"🔧 Telemetry initialized:"
    print $"   Endpoint: ($config.endpoint)"
    print $"   Format: ($config.format)"
    print $"   Health monitoring: ($config.health_monitoring)"

    return $config
}
feat(taskserv): implement real-time version checking with configurable HTTP client - Add: GitHub API integration for live version checking in taskserv management - Add: HTTP client configuration option (http.use_curl) in config.defaults.toml - Add: Helper function fetch_latest_version with curl/http get support - Fix: Settings path structure for prov_data_dirpath access pattern - Remove: Legacy simulation code for version checking - Update: Core configuration name from "provisioning-system" to "provisioning" - Clean: Remove obsolete example configs and infrastructure files 2025-09-24 00:55:06 +00:00			`# Telemetry and Monitoring Integration for Nushell Infrastructure`
			`# Secure telemetry collection and forwarding capabilities`

			`# Send telemetry data to configured endpoints`
			`export def send-telemetry [`
			`data: record`
			`--endpoint(-e): string # Override default endpoint`
			`--format(-f): string = "json" # json, prometheus, influx`
			`--timeout(-t): int = 30 # Request timeout in seconds`
			`--retry(-r): int = 3 # Number of retries`
			`] -> record {`
			`let telemetry_endpoint = ($endpoint \| default ($env.NUSHELL_TELEMETRY_ENDPOINT? \| default ""))`

			`if ($telemetry_endpoint \| is-empty) {`
			`return {`
			`success: false`
			`error: "No telemetry endpoint configured"`
			`data_sent: false`
			`}`
			`}`

			`# Prepare data based on format`
			`let formatted_data = match $format {`
			`"prometheus" => {`
			`# Convert to Prometheus exposition format`
			`convert-to-prometheus $data`
			`}`
			`"influx" => {`
			`# Convert to InfluxDB line protocol`
			`convert-to-influx $data`
			`}`
			`_ => {`
			`# Default JSON format`
			`$data \| to json`
			`}`
			`}`

			`# Add metadata`
			`let telemetry_payload = {`
			`timestamp: (date now \| format date "%Y-%m-%dT%H:%M:%S.%fZ")`
			`hostname: ($env.HOSTNAME? \| default "unknown")`
			`agent: "nushell-provisioning"`
			`version: "1.0.0"`
			`data: $data`
			`}`

			`# Send data with retries`
			`mut attempt = 1`
			`while $attempt <= $retry {`
			`try {`
			`let response = (http post $telemetry_endpoint ($telemetry_payload \| to json) --timeout ($timeout * 1000) --headers {"Content-Type": "application/json"})`

			`return {`
			`success: true`
			`endpoint: $telemetry_endpoint`
			`response_status: 200`
			`attempt: $attempt`
			`data_sent: true`
			`timestamp: (date now \| format date "%Y-%m-%d %H:%M:%S")`
			`}`

			`} catch { \|err\|`
			`if $attempt == $retry {`
			`return {`
			`success: false`
			`error: ($err \| get msg)`
			`endpoint: $telemetry_endpoint`
			`attempts: $attempt`
			`data_sent: false`
			`}`
			`}`

			`# Wait before retry (exponential backoff)`
			`let wait_time = ($attempt * $attempt * 2)`
			`sleep ($wait_time * 1sec)`
			`}`

			`$attempt = ($attempt + 1)`
			`}`

			`return {`
			`success: false`
			`error: "Max retries exceeded"`
			`attempts: $retry`
			`data_sent: false`
			`}`
			`}`

			`# Convert metrics to Prometheus exposition format`
			`def convert-to-prometheus [data: record] -> string {`
			`mut prometheus_output = ""`

			`# Process system metrics if available`
			`if ($data \| get -i system \| is-not-empty) {`
			`let sys = ($data \| get system)`

			`# CPU metrics`
			`if ($sys \| get -i cpu \| is-not-empty) {`
			`let cpu = ($sys \| get cpu)`
			`$prometheus_output = $prometheus_output + $"# HELP system_load_1m System load average over 1 minute\n"`
			`$prometheus_output = $prometheus_output + $"# TYPE system_load_1m gauge\n"`
			`$prometheus_output = $prometheus_output + $"system_load_1m{hostname=\"($env.HOSTNAME? \| default 'unknown')\"} ($cpu.load_1m? \| default 0)\n"`

			`$prometheus_output = $prometheus_output + $"# HELP system_load_5m System load average over 5 minutes\n"`
			`$prometheus_output = $prometheus_output + $"# TYPE system_load_5m gauge\n"`
			`$prometheus_output = $prometheus_output + $"system_load_5m{hostname=\"($env.HOSTNAME? \| default 'unknown')\"} ($cpu.load_5m? \| default 0)\n"`
			`}`

			`# Memory metrics`
			`if ($sys \| get -i memory \| is-not-empty) {`
			`let mem = ($sys \| get memory)`
			`$prometheus_output = $prometheus_output + $"# HELP system_memory_usage_percent Memory usage percentage\n"`
			`$prometheus_output = $prometheus_output + $"# TYPE system_memory_usage_percent gauge\n"`
			`$prometheus_output = $prometheus_output + $"system_memory_usage_percent{hostname=\"($env.HOSTNAME? \| default 'unknown')\"} ($mem.usage_percent? \| default 0)\n"`

			`$prometheus_output = $prometheus_output + $"# HELP system_memory_total_bytes Total memory in bytes\n"`
			`$prometheus_output = $prometheus_output + $"# TYPE system_memory_total_bytes gauge\n"`
			`$prometheus_output = $prometheus_output + $"system_memory_total_bytes{hostname=\"($env.HOSTNAME? \| default 'unknown')\"} (($mem.total_kb? \| default 0) * 1024)\n"`
			`}`
			`}`

			`return $prometheus_output`
			`}`

			`# Convert metrics to InfluxDB line protocol`
			`def convert-to-influx [data: record] -> string {`
			`mut influx_lines = []`
			`let timestamp = (date now \| format date "%s%N")`
			`let hostname = ($env.HOSTNAME? \| default "unknown")`

			`# Process system metrics`
			`if ($data \| get -i system \| is-not-empty) {`
			`let sys = ($data \| get system)`

			`# CPU metrics`
			`if ($sys \| get -i cpu \| is-not-empty) {`
			`let cpu = ($sys \| get cpu)`
			`$influx_lines = ($influx_lines \| append $"system_cpu,hostname=($hostname) load_1m=($cpu.load_1m? \| default 0),load_5m=($cpu.load_5m? \| default 0),load_15m=($cpu.load_15m? \| default 0) ($timestamp)")`
			`}`

			`# Memory metrics`
			`if ($sys \| get -i memory \| is-not-empty) {`
			`let mem = ($sys \| get memory)`
			`$influx_lines = ($influx_lines \| append $"system_memory,hostname=($hostname) usage_percent=($mem.usage_percent? \| default 0),total_kb=($mem.total_kb? \| default 0),used_kb=($mem.used_kb? \| default 0) ($timestamp)")`
			`}`

			`# Process metrics`
			`if ($sys \| get -i processes \| is-not-empty) {`
			`let proc = ($sys \| get processes)`
			`$influx_lines = ($influx_lines \| append $"system_processes,hostname=($hostname) total=($proc.total? \| default 0) ($timestamp)")`
			`}`
			`}`

			`return ($influx_lines \| str join "\n")`
			`}`

			`# Create and manage telemetry batches`
			`export def batch-telemetry [`
			`--max-batch-size(-s): int = 100 # Maximum items per batch`
			`--max-wait-time(-w): int = 30 # Maximum wait time in seconds`
			`--output-file(-o): string # File to store batched data`
			`] -> nothing {`
			`mut batch = []`
			`mut batch_start_time = (date now)`

			`print $"📊 Starting telemetry batching (max size: ($max_batch_size), max wait: ($max_wait_time)s)"`

			`# Monitor for telemetry data`
			`while true {`
			`# Check if we have data to batch (this would typically come from external sources)`
			`# For demonstration, we'll create sample data`
			`let current_time = (date now)`

			`# Collect current metrics`
			`try {`
			`use ../observability/collect.nu *`
			`let metrics = (collect-system-metrics)`

			`# Add to batch`
			`$batch = ($batch \| append {`
			`timestamp: ($current_time \| format date "%Y-%m-%dT%H:%M:%S.%fZ")`
			`type: "system_metrics"`
			`data: $metrics`
			`})`

			`# Check batch conditions`
			`let batch_size = ($batch \| length)`
			`let elapsed_time = (($current_time - $batch_start_time) / 1sec)`

			`if $batch_size >= $max_batch_size or $elapsed_time >= $max_wait_time {`
			`# Send batch`
			`let batch_result = (send-batch $batch --output-file $output_file)`

			`if $batch_result.success {`
			`print $"✅ Batch sent successfully: ($batch_size) items"`
			`} else {`
			`print $"❌ Batch send failed: ($batch_result.error)"`
			`}`

			`# Reset batch`
			`$batch = []`
			`$batch_start_time = (date now)`
			`}`

			`} catch { \|err\|`
			`print $"⚠️ Error collecting metrics: ($err \| get msg)"`
			`}`

			`# Wait before next collection`
			`sleep 10sec`
			`}`
			`}`

			`# Send a batch of telemetry data`
			`def send-batch [`
			`batch: list<record>`
			`--output-file(-o): string`
			`] -> record {`
			`if ($batch \| length) == 0 {`
			`return {success: true, message: "Empty batch, nothing to send"}`
			`}`

			`let batch_payload = {`
			`batch_id: (random uuid)`
			`batch_size: ($batch \| length)`
			`batch_timestamp: (date now \| format date "%Y-%m-%dT%H:%M:%S.%fZ")`
			`hostname: ($env.HOSTNAME? \| default "unknown")`
			`agent: "nushell-telemetry"`
			`items: $batch`
			`}`

			`# Save to file if specified`
			`if ($output_file \| is-not-empty) {`
			`try {`
			`$batch_payload \| to json \| save -a $output_file`
			`return {`
			`success: true`
			`message: $"Batch saved to file: ($output_file)"`
			`batch_size: ($batch \| length)`
			`}`
			`} catch { \|err\|`
			`return {`
			`success: false`
			`error: $"Failed to save batch: ($err \| get msg)"`
			`}`
			`}`
			`}`

			`# Send to telemetry endpoint`
			`let endpoint = ($env.NUSHELL_TELEMETRY_ENDPOINT? \| default "")`
			`if ($endpoint \| is-not-empty) {`
			`return (send-telemetry $batch_payload --endpoint $endpoint)`
			`} else {`
			`return {`
			`success: false`
			`error: "No telemetry endpoint configured"`
			`}`
			`}`
			`}`

			`# Monitor system health and send alerts`
			`export def health-monitoring [`
			`--alert-threshold(-t): record = {cpu: 80, memory: 90, disk: 95} # Alert thresholds`
			`--check-interval(-i): int = 60 # Check interval in seconds`
			`--alert-endpoint(-e): string # Alert webhook endpoint`
			`] -> nothing {`
			`print $"🔍 Starting health monitoring with thresholds: ($alert_threshold)"`

			`while true {`
			`try {`
			`use ../observability/collect.nu *`
			`let status = (status-check)`

			`# Check for threshold violations`
			`mut alerts = []`

			`# CPU check`
			`if ($status.metrics.system.cpu.load_1m? \| default 0) > ($alert_threshold.cpu / 10.0) {`
			`$alerts = ($alerts \| append {`
			`type: "cpu_high"`
			`severity: "warning"`
			`message: $"High CPU load: ($status.metrics.system.cpu.load_1m)"`
			`threshold: $alert_threshold.cpu`
			`current_value: $status.metrics.system.cpu.load_1m`
			`})`
			`}`

			`# Memory check`
			`if ($status.metrics.system.memory.usage_percent? \| default 0) > $alert_threshold.memory {`
			`$alerts = ($alerts \| append {`
			`type: "memory_high"`
			`severity: "critical"`
			`message: $"High memory usage: ($status.metrics.system.memory.usage_percent)%"`
			`threshold: $alert_threshold.memory`
			`current_value: $status.metrics.system.memory.usage_percent`
			`})`
			`}`

			`# Disk check`
			`try {`
			`let high_disk_usage = ($status.metrics.system.disk \| where {\|disk\|`
			`($disk.percent \| str replace "%" "" \| into float) > $alert_threshold.disk`
			`})`

			`if ($high_disk_usage \| length) > 0 {`
			`for disk in $high_disk_usage {`
			`$alerts = ($alerts \| append {`
			`type: "disk_high"`
			`severity: "critical"`
			`message: $"High disk usage on ($disk.mount): ($disk.percent)"`
			`threshold: $alert_threshold.disk`
			`current_value: ($disk.percent \| str replace "%" "" \| into float)`
			`filesystem: $disk.filesystem`
			`mount: $disk.mount`
			`})`
			`}`
			`}`
			`} catch {}`

			`# Send alerts if any`
			`if ($alerts \| length) > 0 {`
			`let alert_payload = {`
			`timestamp: (date now \| format date "%Y-%m-%dT%H:%M:%S.%fZ")`
			`hostname: ($env.HOSTNAME? \| default "unknown")`
			`alert_count: ($alerts \| length)`
			`alerts: $alerts`
			`system_status: $status`
			`}`

			`# Send to telemetry endpoint`
			`let result = (send-telemetry $alert_payload --endpoint ($alert_endpoint \| default ($env.NUSHELL_TELEMETRY_ENDPOINT? \| default "")))`

			`if $result.success {`
			`print $"🚨 Sent ($alerts \| length) alerts to monitoring system"`
			`} else {`
			`print $"❌ Failed to send alerts: ($result.error)"`
			`}`

			`# Also log alerts locally`
			`$alerts \| each { \|alert\|`
			`print $"⚠️ ALERT: ($alert.type) - ($alert.message)"`
			`}`
			`}`

			`# Send regular health status`
			`let health_payload = {`
			`type: "health_check"`
			`timestamp: (date now \| format date "%Y-%m-%dT%H:%M:%S.%fZ")`
			`status: $status`
			`}`

			`send-telemetry $health_payload \| ignore`

			`} catch { \|err\|`
			`print $"❌ Health monitoring error: ($err \| get msg)"`
			`}`

			`sleep ($check_interval * 1sec)`
			`}`
			`}`

			`# Initialize telemetry configuration`
			`export def init-telemetry [`
			`--endpoint(-e): string # Telemetry endpoint URL`
			`--format(-f): string = "json" # Default format`
			`--enable-health(-h) # Enable health monitoring`
			`--config-file(-c): string # Save configuration to file`
			`] -> record {`
			`let config = {`
			`endpoint: ($endpoint \| default "")`
			`format: $format`
			`health_monitoring: ($enable_health \| default false)`
			`created: (date now \| format date "%Y-%m-%d %H:%M:%S")`
			`version: "1.0.0"`
			`}`

			`# Set environment variables`
			`$env.NUSHELL_TELEMETRY_ENDPOINT = ($endpoint \| default "")`
			`$env.NUSHELL_TELEMETRY_FORMAT = $format`
			`$env.NUSHELL_TELEMETRY_ENABLED = "true"`

			`# Save configuration if file specified`
			`if ($config_file \| is-not-empty) {`
			`try {`
			`$config \| to json \| save $config_file`
			`print $"📝 Telemetry configuration saved to ($config_file)"`
			`} catch { \|err\|`
			`print $"⚠️ Failed to save configuration: ($err \| get msg)"`
			`}`
			`}`

			`print $"🔧 Telemetry initialized:"`
			`print $" Endpoint: ($config.endpoint)"`
			`print $" Format: ($config.format)"`
			`print $" Health monitoring: ($config.health_monitoring)"`

			`return $config`
			`}`