provisioning/core/nulib/observability/collectors.nu

#!/usr/bin/env nu

# Observability Collectors for Provisioning System
# Collects metrics, logs, events, and state from infrastructure

use ../dataframes/polars_integration.nu *
use ../dataframes/log_processor.nu *
use ../lib_provisioning/utils/settings.nu *

# Main collector orchestrator
export def start_collectors [
    --config_file: string = "observability.toml"
    --interval: string = "60s"
    --output_dir: string = "data/observability"
    --enable_dataframes = true
    --debug = false
]: nothing -> nothing {

    print "🔍 Starting Observability Collectors..."

    # Load configuration
    let config = load_collector_config $config_file

    # Ensure output directory exists
    mkdir ($output_dir | path expand)

    # Initialize collectors
    let collectors = initialize_collectors $config

    print $"📊 Initialized ($collectors | length) collectors"

    if $debug {
        $env.OBSERVABILITY_DEBUG = "true"
        print "Debug mode enabled"
    }

    # Start collection loop
    collection_loop $collectors $interval $output_dir $enable_dataframes $debug
}

def load_collector_config [config_file: string]: string -> record {
    if ($config_file | path exists) {
        open $config_file
    } else {
        # Default configuration
        {
            collectors: {
                system_metrics: {
                    enabled: true
                    interval: "60s"
                    metrics: ["cpu", "memory", "disk", "network"]
                }
                infrastructure_state: {
                    enabled: true
                    interval: "300s"
                    sources: ["servers", "services", "clusters"]
                }
                application_logs: {
                    enabled: true
                    interval: "60s"
                    log_sources: ["provisioning", "containers", "kubernetes"]
                }
                cost_metrics: {
                    enabled: true
                    interval: "3600s"
                    providers: ["aws", "gcp", "azure"]
                }
                security_events: {
                    enabled: true
                    interval: "60s"
                    sources: ["auth", "network", "filesystem"]
                }
                performance_metrics: {
                    enabled: true
                    interval: "30s"
                    targets: ["deployments", "scaling", "response_times"]
                }
            }
            storage: {
                format: "parquet"  # parquet, json, csv
                retention_days: 30
                compression: "gzip"
            }
            alerting: {
                enabled: true
                channels: ["console", "webhook"]
                thresholds: {
                    cpu_usage: 80
                    memory_usage: 85
                    disk_usage: 90
                    error_rate: 0.05
                }
            }
        }
    }
}

def initialize_collectors [config: record]: nothing -> list {
    let enabled_collectors = []

    $config.collectors | transpose name settings | each {|collector|
        if $collector.settings.enabled {
            {
                name: $collector.name
                config: $collector.settings
                last_run: null
                status: "initialized"
            }
        }
    } | compact
}

def collection_loop [
    collectors: list
    interval: string
    output_dir: string
    enable_dataframes: bool
    debug: bool
]: nothing -> nothing {

    let interval_seconds = parse_interval $interval

    print $"🔄 Starting collection loop (interval: ($interval))..."

    while true {
        let collection_start = (date now)

        $collectors | each {|collector|
            do {
                if (should_collect $collector $collection_start) {
                    if $debug {
                        print $"📥 Collecting from: ($collector.name)"
                    }

                    let data = collect_from_collector $collector

                    if ($data | length) > 0 {
                        save_collected_data $data $collector.name $output_dir $enable_dataframes
                    }
                }
            } | complete | if ($in.exit_code != 0) {
                print $"❌ Error in collector ($collector.name): ($in.stderr)"
            }
        } | ignore

        let collection_duration = ((date now) - $collection_start)

        if $debug {
            print $"✅ Collection cycle completed in ($collection_duration)"
        }

        sleep ($interval_seconds * 1sec)
    }
}

def parse_interval [interval: string]: string -> int {
    match $interval {
        $i if ($i | str ends-with "s") => ($i | str replace "s" "" | into int)
        $i if ($i | str ends-with "m") => (($i | str replace "m" "" | into int) * 60)
        $i if ($i | str ends-with "h") => (($i | str replace "h" "" | into int) * 3600)
        _ => 60  # default 60 seconds
    }
}

def should_collect [collector: record, current_time: datetime]: nothing -> bool {
    if ($collector.last_run | is-empty) {
        true  # First run
    } else {
        let elapsed = ($current_time - $collector.last_run)
        let interval_duration = (parse_interval $collector.config.interval)
        ($elapsed | into int) >= ($interval_duration * 1000 * 1000 * 1000)  # nanoseconds
    }
}

def collect_from_collector [collector: record]: nothing -> list {
    # Placeholder implementation - collectors will be enhanced later
    print $"📊 Collecting from: ($collector.name)"
    []
}

# System metrics collector
def collect_system_metrics [config: record]: nothing -> list {
    mut metrics = []

    if "cpu" in $config.metrics {
        $metrics = ($metrics | append (get_cpu_metrics))
    }

    if "memory" in $config.metrics {
        $metrics = ($metrics | append (get_memory_metrics))
    }

    if "disk" in $config.metrics {
        $metrics = ($metrics | append (get_disk_metrics))
    }

    if "network" in $config.metrics {
        $metrics = ($metrics | append (get_network_metrics))
    }

    $metrics | each {|metric|
        $metric | upsert timestamp (date now) | upsert collector "system_metrics"
    }
}

def get_cpu_metrics []: nothing -> record {
    do {
        # Use different methods based on OS
        let cpu_usage = if (sys host | get name) == "Linux" {
            # Linux: use /proc/stat
            let cpu_info = (cat /proc/loadavg | split row " ")
            {
                usage_percent: (($cpu_info.0 | into float) * 100 / (sys host | get cpu | length))
                load_1min: ($cpu_info.0 | into float)
                load_5min: ($cpu_info.1 | into float)
                load_15min: ($cpu_info.2 | into float)
            }
        } else if (sys host | get name) == "Darwin" {
            # macOS: use iostat or top
            let top_output = (top -l 1 -n 0 | lines | find "CPU usage" | first)
            let usage = ($top_output | parse --regex 'CPU usage: (?P<user>\d+\.\d+)% user, (?P<sys>\d+\.\d+)% sys, (?P<idle>\d+\.\d+)% idle')
            if ($usage | length) > 0 {
                let u = $usage.0
                {
                    usage_percent: (100 - ($u.idle | into float))
                    user_percent: ($u.user | into float)
                    system_percent: ($u.sys | into float)
                    idle_percent: ($u.idle | into float)
                }
            } else {
                { usage_percent: 0, error: "Could not parse CPU usage" }
            }
        } else {
            { usage_percent: 0, error: "Unsupported OS for CPU metrics" }
        }

        {
            metric_name: "cpu"
            value: $cpu_usage.usage_percent
            unit: "percent"
            details: $cpu_usage
        }
    } | complete | if ($in.exit_code != 0) {
        {
            metric_name: "cpu"
            value: 0
            unit: "percent"
            error: "Failed to collect CPU metrics"
        }
    }
}

def get_memory_metrics []: nothing -> record {
    do {
        let mem_info = (sys mem)
        {
            metric_name: "memory"
            value: (($mem_info.used | into float) / ($mem_info.total | into float) * 100)
            unit: "percent"
            details: {
                total_bytes: $mem_info.total
                used_bytes: $mem_info.used
                available_bytes: $mem_info.available
                free_bytes: $mem_info.free
                usage_percent: (($mem_info.used | into float) / ($mem_info.total | into float) * 100)
            }
        }
    } | complete | if ($in.exit_code != 0) {
        {
            metric_name: "memory"
            value: 0
            unit: "percent"
            error: "Failed to collect memory metrics"
        }
    }
}

def get_disk_metrics []: nothing -> list {
    do {
        let disk_info = (sys disks)
        $disk_info | each {|disk|
            {
                metric_name: "disk"
                value: (($disk.used | into float) / ($disk.total | into float) * 100)
                unit: "percent"
                device: $disk.name
                mount_point: $disk.mount
                details: {
                    total_bytes: $disk.total
                    used_bytes: $disk.used
                    available_bytes: $disk.available
                    usage_percent: (($disk.used | into float) / ($disk.total | into float) * 100)
                    filesystem: $disk.type
                }
            }
        }
    } | complete | if ($in.exit_code != 0) {
        [{
            metric_name: "disk"
            value: 0
            unit: "percent"
            error: "Failed to collect disk metrics"
        }]
    }
}

def get_network_metrics []: nothing -> list {
    do {
        let net_info = (sys net)
        $net_info | each {|interface|
            {
                metric_name: "network"
                interface: $interface.name
                details: {
                    bytes_sent: $interface.sent
                    bytes_received: $interface.recv
                    packets_sent: $interface.packets_sent
                    packets_received: $interface.packets_recv
                }
            }
        }
    } | complete | if ($in.exit_code != 0) {
        [{
            metric_name: "network"
            value: 0
            error: "Failed to collect network metrics"
        }]
    }
}

# Infrastructure state collector
def collect_infrastructure_state [config: record]: nothing -> list {
    mut state_data = []

    if "servers" in $config.sources {
        let server_state = collect_server_state
        $state_data = ($state_data | append $server_state)
    }

    if "services" in $config.sources {
        let service_state = collect_service_state
        $state_data = ($state_data | append $service_state)
    }

    if "clusters" in $config.sources {
        let cluster_state = collect_cluster_state
        $state_data = ($state_data | append $cluster_state)
    }

    $state_data | each {|state|
        $state | upsert timestamp (date now) | upsert collector "infrastructure_state"
    }
}

def collect_server_state []: nothing -> list {
    do {
        # Use provisioning query to get server state
        let servers = (nu -c "use core/nulib/main_provisioning/query.nu; main query servers --out json" | from json)

        $servers | each {|server|
            {
                resource_type: "server"
                resource_id: $server.name
                state: $server.state
                provider: $server.provider
                details: $server
            }
        }
    } | complete | if ($in.exit_code != 0) {
        print "⚠️  Could not collect server state"
        []
    }
}

def collect_service_state []: nothing -> list {
    do {
        # Collect Docker container states
        if ((which docker | length) > 0) {
            let containers = (docker ps -a --format "{{.ID}},{{.Names}},{{.Status}},{{.Image}}" | lines | each {|line|
                let parts = ($line | split column ",")
                if ($parts | length) >= 4 {
                    {
                        resource_type: "container"
                        resource_id: $parts.1
                        state: $parts.2
                        image: $parts.3
                        container_id: $parts.0
                    }
                }
            } | compact)

            $containers
        } else {
            []
        }
    } | complete | if ($in.exit_code != 0) {
        []
    }
}

def collect_cluster_state []: nothing -> list {
    do {
        # Collect Kubernetes cluster state if available
        if ((which kubectl | length) > 0) {
            let pods = (kubectl get pods -o json | from json)

            $pods.items | each {|pod|
                {
                    resource_type: "pod"
                    resource_id: $pod.metadata.name
                    namespace: $pod.metadata.namespace
                    state: $pod.status.phase
                    node: $pod.spec.nodeName
                    details: {
                        containers: ($pod.spec.containers | length)
                        restart_count: ($pod.status.containerStatuses? | default [] | get restartCount | math sum)
                    }
                }
            }
        } else {
            []
        }
    } | complete | if ($in.exit_code != 0) {
        []
    }
}

# Application logs collector
def collect_application_logs [config: record]: nothing -> list {
    collect_logs --since "1m" --sources $config.log_sources --output_format "list"
}

# Cost metrics collector
def collect_cost_metrics [config: record]: nothing -> list {
    let cost_data = ($config.providers | each {|provider|
        collect_provider_costs $provider
    } | flatten)

    $cost_data | each {|cost|
        $cost | upsert timestamp (date now) | upsert collector "cost_metrics"
    }
}

def collect_provider_costs [provider: string]: string -> list {
    match $provider {
        "aws" => collect_aws_costs
        "gcp" => collect_gcp_costs
        "azure" => collect_azure_costs
        _ => []
    }
}

def collect_aws_costs []: nothing -> list {
    do {
        if ((which aws | length) > 0) {
            # Use AWS Cost Explorer API (requires setup)
            # For now, return mock data structure
            [{
                provider: "aws"
                service: "ec2"
                cost_usd: 125.50
                period: "daily"
                region: "us-east-1"
            }]
        } else {
            []
        }
    } | complete | if ($in.exit_code != 0) {
        []
    }
}

def collect_gcp_costs []: nothing -> list {
    # GCP billing API integration would go here
    []
}

def collect_azure_costs []: nothing -> list {
    # Azure cost management API integration would go here
    []
}

# Security events collector
def collect_security_events [config: record]: nothing -> list {
    mut security_events = []

    if "auth" in $config.sources {
        $security_events = ($security_events | append (collect_auth_events))
    }

    if "network" in $config.sources {
        $security_events = ($security_events | append (collect_network_events))
    }

    if "filesystem" in $config.sources {
        $security_events = ($security_events | append (collect_filesystem_events))
    }

    $security_events | each {|event|
        $event | upsert timestamp (date now) | upsert collector "security_events"
    }
}

def collect_auth_events []: nothing -> list {
    do {
        # Collect authentication logs
        if ($"/var/log/auth.log" | path exists) {
            let auth_logs = (tail -n 100 /var/log/auth.log | lines)

            $auth_logs | each {|line|
                if ($line | str contains "Failed password") {
                    {
                        event_type: "auth_failure"
                        severity: "medium"
                        message: $line
                        source: "auth.log"
                    }
                } else if ($line | str contains "Accepted publickey") {
                    {
                        event_type: "auth_success"
                        severity: "info"
                        message: $line
                        source: "auth.log"
                    }
                }
            } | compact
        } else {
            []
        }
    } | complete | if ($in.exit_code != 0) {
        []
    }
}

def collect_network_events []: nothing -> list {
    # Network security events would be collected here
    # This could include firewall logs, intrusion detection, etc.
    []
}

def collect_filesystem_events []: nothing -> list {
    # File system security events
    # This could include file integrity monitoring, access logs, etc.
    []
}

# Performance metrics collector
def collect_performance_metrics [config: record]: nothing -> list {
    mut perf_metrics = []

    if "deployments" in $config.targets {
        $perf_metrics = ($perf_metrics | append (collect_deployment_metrics))
    }

    if "scaling" in $config.targets {
        $perf_metrics = ($perf_metrics | append (collect_scaling_metrics))
    }

    if "response_times" in $config.targets {
        $perf_metrics = ($perf_metrics | append (collect_response_time_metrics))
    }

    $perf_metrics | each {|metric|
        $metric | upsert timestamp (date now) | upsert collector "performance_metrics"
    }
}

def collect_deployment_metrics []: nothing -> list {
    # Track deployment performance
    # This would integrate with CI/CD systems
    [{
        metric_name: "deployment_duration"
        value: 300  # seconds
        deployment_id: "deploy-123"
        status: "success"
    }]
}

def collect_scaling_metrics []: nothing -> list {
    # Track auto-scaling events and performance
    []
}

def collect_response_time_metrics []: nothing -> list {
    # Collect application response times
    # This could integrate with APM tools
    []
}

# Save collected data
def save_collected_data [
    data: list
    collector_name: string
    output_dir: string
    enable_dataframes: bool
]: nothing -> nothing {

    let timestamp = (date now | date format "%Y-%m-%d_%H-%M-%S")
    let filename = $"($collector_name)_($timestamp)"

    if $enable_dataframes and (check_polars_available) {
        # Save as Parquet for efficient storage and querying
        let df = create_infra_dataframe $data --source $collector_name
        let parquet_path = ($output_dir | path join $"($filename).parquet")
        export_dataframe $df $parquet_path --format "parquet"
    } else {
        # Save as JSON
        let json_path = ($output_dir | path join $"($filename).json")
        $data | to json | save --force $json_path
    }
}

# Query collected observability data
export def query_observability_data [
    --collector: string = "all"
    --time_range: string = "1h"
    --data_dir: string = "data/observability"
    --query: string = ""
]: nothing -> any {

    print $"🔍 Querying observability data (collector: ($collector), range: ($time_range))..."

    let data_files = if $collector == "all" {
        ls ($data_dir | path join "*.parquet") | get name
    } else {
        ls ($data_dir | path join $"($collector)_*.parquet") | get name
    }

    if ($data_files | length) == 0 {
        print "No observability data found"
        return []
    }

    # Load and combine data
    let combined_data = ($data_files | each {|file|
        if (check_polars_available) {
            # Load parquet with Polars
            polars open $file
        } else {
            # Fallback to JSON if no Polars
            let json_file = ($file | str replace ".parquet" ".json")
            if ($json_file | path exists) {
                open $json_file
            } else {
                []
            }
        }
    } | flatten)

    if ($query | is-not-empty) {
        query_dataframe $combined_data $query
    } else {
        $combined_data
    }
}
chore: add current provisioning state before migration 2025-09-22 22:11:41 +00:00			`#!/usr/bin/env nu`

			`# Observability Collectors for Provisioning System`
			`# Collects metrics, logs, events, and state from infrastructure`

			`use ../dataframes/polars_integration.nu *`
			`use ../dataframes/log_processor.nu *`
			`use ../lib_provisioning/utils/settings.nu *`

			`# Main collector orchestrator`
			`export def start_collectors [`
			`--config_file: string = "observability.toml"`
			`--interval: string = "60s"`
			`--output_dir: string = "data/observability"`
			`--enable_dataframes = true`
			`--debug = false`
			`]: nothing -> nothing {`

			`print "🔍 Starting Observability Collectors..."`

			`# Load configuration`
			`let config = load_collector_config $config_file`

			`# Ensure output directory exists`
			`mkdir ($output_dir \| path expand)`

			`# Initialize collectors`
			`let collectors = initialize_collectors $config`

			`print $"📊 Initialized ($collectors \| length) collectors"`

			`if $debug {`
			`$env.OBSERVABILITY_DEBUG = "true"`
			`print "Debug mode enabled"`
			`}`

			`# Start collection loop`
			`collection_loop $collectors $interval $output_dir $enable_dataframes $debug`
			`}`

			`def load_collector_config [config_file: string]: string -> record {`
			`if ($config_file \| path exists) {`
			`open $config_file`
			`} else {`
			`# Default configuration`
			`{`
			`collectors: {`
			`system_metrics: {`
			`enabled: true`
			`interval: "60s"`
			`metrics: ["cpu", "memory", "disk", "network"]`
			`}`
			`infrastructure_state: {`
			`enabled: true`
			`interval: "300s"`
			`sources: ["servers", "services", "clusters"]`
			`}`
			`application_logs: {`
			`enabled: true`
			`interval: "60s"`
			`log_sources: ["provisioning", "containers", "kubernetes"]`
			`}`
			`cost_metrics: {`
			`enabled: true`
			`interval: "3600s"`
			`providers: ["aws", "gcp", "azure"]`
			`}`
			`security_events: {`
			`enabled: true`
			`interval: "60s"`
			`sources: ["auth", "network", "filesystem"]`
			`}`
			`performance_metrics: {`
			`enabled: true`
			`interval: "30s"`
			`targets: ["deployments", "scaling", "response_times"]`
			`}`
			`}`
			`storage: {`
			`format: "parquet" # parquet, json, csv`
			`retention_days: 30`
			`compression: "gzip"`
			`}`
			`alerting: {`
			`enabled: true`
			`channels: ["console", "webhook"]`
			`thresholds: {`
			`cpu_usage: 80`
			`memory_usage: 85`
			`disk_usage: 90`
			`error_rate: 0.05`
			`}`
			`}`
			`}`
			`}`
			`}`

			`def initialize_collectors [config: record]: nothing -> list {`
			`let enabled_collectors = []`

			`$config.collectors \| transpose name settings \| each {\|collector\|`
			`if $collector.settings.enabled {`
			`{`
			`name: $collector.name`
			`config: $collector.settings`
			`last_run: null`
			`status: "initialized"`
			`}`
			`}`
			`} \| compact`
			`}`

			`def collection_loop [`
			`collectors: list`
			`interval: string`
			`output_dir: string`
			`enable_dataframes: bool`
			`debug: bool`
			`]: nothing -> nothing {`

			`let interval_seconds = parse_interval $interval`

			`print $"🔄 Starting collection loop (interval: ($interval))..."`

			`while true {`
			`let collection_start = (date now)`

			`$collectors \| each {\|collector\|`
			`do {`
			`if (should_collect $collector $collection_start) {`
			`if $debug {`
			`print $"📥 Collecting from: ($collector.name)"`
			`}`

			`let data = collect_from_collector $collector`

			`if ($data \| length) > 0 {`
			`save_collected_data $data $collector.name $output_dir $enable_dataframes`
			`}`
			`}`
			`} \| complete \| if ($in.exit_code != 0) {`
			`print $"❌ Error in collector ($collector.name): ($in.stderr)"`
			`}`
			`} \| ignore`

			`let collection_duration = ((date now) - $collection_start)`

			`if $debug {`
			`print $"✅ Collection cycle completed in ($collection_duration)"`
			`}`

			`sleep ($interval_seconds * 1sec)`
			`}`
			`}`

			`def parse_interval [interval: string]: string -> int {`
			`match $interval {`
			`$i if ($i \| str ends-with "s") => ($i \| str replace "s" "" \| into int)`
			`$i if ($i \| str ends-with "m") => (($i \| str replace "m" "" \| into int) * 60)`
			`$i if ($i \| str ends-with "h") => (($i \| str replace "h" "" \| into int) * 3600)`
			`_ => 60 # default 60 seconds`
			`}`
			`}`

			`def should_collect [collector: record, current_time: datetime]: nothing -> bool {`
			`if ($collector.last_run \| is-empty) {`
			`true # First run`
			`} else {`
			`let elapsed = ($current_time - $collector.last_run)`
			`let interval_duration = (parse_interval $collector.config.interval)`
			`($elapsed \| into int) >= ($interval_duration * 1000 * 1000 * 1000) # nanoseconds`
			`}`
			`}`

			`def collect_from_collector [collector: record]: nothing -> list {`
			`# Placeholder implementation - collectors will be enhanced later`
			`print $"📊 Collecting from: ($collector.name)"`
			`[]`
			`}`

			`# System metrics collector`
			`def collect_system_metrics [config: record]: nothing -> list {`
			`mut metrics = []`

			`if "cpu" in $config.metrics {`
			`$metrics = ($metrics \| append (get_cpu_metrics))`
			`}`

			`if "memory" in $config.metrics {`
			`$metrics = ($metrics \| append (get_memory_metrics))`
			`}`

			`if "disk" in $config.metrics {`
			`$metrics = ($metrics \| append (get_disk_metrics))`
			`}`

			`if "network" in $config.metrics {`
			`$metrics = ($metrics \| append (get_network_metrics))`
			`}`

			`$metrics \| each {\|metric\|`
			`$metric \| upsert timestamp (date now) \| upsert collector "system_metrics"`
			`}`
			`}`

			`def get_cpu_metrics []: nothing -> record {`
			`do {`
			`# Use different methods based on OS`
			`let cpu_usage = if (sys host \| get name) == "Linux" {`
			`# Linux: use /proc/stat`
			`let cpu_info = (cat /proc/loadavg \| split row " ")`
			`{`
			`usage_percent: (($cpu_info.0 \| into float) * 100 / (sys host \| get cpu \| length))`
			`load_1min: ($cpu_info.0 \| into float)`
			`load_5min: ($cpu_info.1 \| into float)`
			`load_15min: ($cpu_info.2 \| into float)`
			`}`
			`} else if (sys host \| get name) == "Darwin" {`
			`# macOS: use iostat or top`
			`let top_output = (top -l 1 -n 0 \| lines \| find "CPU usage" \| first)`
			`let usage = ($top_output \| parse --regex 'CPU usage: (?P<user>\d+\.\d+)% user, (?P<sys>\d+\.\d+)% sys, (?P<idle>\d+\.\d+)% idle')`
			`if ($usage \| length) > 0 {`
			`let u = $usage.0`
			`{`
			`usage_percent: (100 - ($u.idle \| into float))`
			`user_percent: ($u.user \| into float)`
			`system_percent: ($u.sys \| into float)`
			`idle_percent: ($u.idle \| into float)`
			`}`
			`} else {`
			`{ usage_percent: 0, error: "Could not parse CPU usage" }`
			`}`
			`} else {`
			`{ usage_percent: 0, error: "Unsupported OS for CPU metrics" }`
			`}`

			`{`
			`metric_name: "cpu"`
			`value: $cpu_usage.usage_percent`
			`unit: "percent"`
			`details: $cpu_usage`
			`}`
			`} \| complete \| if ($in.exit_code != 0) {`
			`{`
			`metric_name: "cpu"`
			`value: 0`
			`unit: "percent"`
			`error: "Failed to collect CPU metrics"`
			`}`
			`}`
			`}`

			`def get_memory_metrics []: nothing -> record {`
			`do {`
			`let mem_info = (sys mem)`
			`{`
			`metric_name: "memory"`
			`value: (($mem_info.used \| into float) / ($mem_info.total \| into float) * 100)`
			`unit: "percent"`
			`details: {`
			`total_bytes: $mem_info.total`
			`used_bytes: $mem_info.used`
			`available_bytes: $mem_info.available`
			`free_bytes: $mem_info.free`
			`usage_percent: (($mem_info.used \| into float) / ($mem_info.total \| into float) * 100)`
			`}`
			`}`
			`} \| complete \| if ($in.exit_code != 0) {`
			`{`
			`metric_name: "memory"`
			`value: 0`
			`unit: "percent"`
			`error: "Failed to collect memory metrics"`
			`}`
			`}`
			`}`

			`def get_disk_metrics []: nothing -> list {`
			`do {`
			`let disk_info = (sys disks)`
			`$disk_info \| each {\|disk\|`
			`{`
			`metric_name: "disk"`
			`value: (($disk.used \| into float) / ($disk.total \| into float) * 100)`
			`unit: "percent"`
			`device: $disk.name`
			`mount_point: $disk.mount`
			`details: {`
			`total_bytes: $disk.total`
			`used_bytes: $disk.used`
			`available_bytes: $disk.available`
			`usage_percent: (($disk.used \| into float) / ($disk.total \| into float) * 100)`
			`filesystem: $disk.type`
			`}`
			`}`
			`}`
			`} \| complete \| if ($in.exit_code != 0) {`
			`[{`
			`metric_name: "disk"`
			`value: 0`
			`unit: "percent"`
			`error: "Failed to collect disk metrics"`
			`}]`
			`}`
			`}`

			`def get_network_metrics []: nothing -> list {`
			`do {`
			`let net_info = (sys net)`
			`$net_info \| each {\|interface\|`
			`{`
			`metric_name: "network"`
			`interface: $interface.name`
			`details: {`
			`bytes_sent: $interface.sent`
			`bytes_received: $interface.recv`
			`packets_sent: $interface.packets_sent`
			`packets_received: $interface.packets_recv`
			`}`
			`}`
			`}`
			`} \| complete \| if ($in.exit_code != 0) {`
			`[{`
			`metric_name: "network"`
			`value: 0`
			`error: "Failed to collect network metrics"`
			`}]`
			`}`
			`}`

			`# Infrastructure state collector`
			`def collect_infrastructure_state [config: record]: nothing -> list {`
			`mut state_data = []`

			`if "servers" in $config.sources {`
			`let server_state = collect_server_state`
			`$state_data = ($state_data \| append $server_state)`
			`}`

			`if "services" in $config.sources {`
			`let service_state = collect_service_state`
			`$state_data = ($state_data \| append $service_state)`
			`}`

			`if "clusters" in $config.sources {`
			`let cluster_state = collect_cluster_state`
			`$state_data = ($state_data \| append $cluster_state)`
			`}`

			`$state_data \| each {\|state\|`
			`$state \| upsert timestamp (date now) \| upsert collector "infrastructure_state"`
			`}`
			`}`

			`def collect_server_state []: nothing -> list {`
			`do {`
			`# Use provisioning query to get server state`
			`let servers = (nu -c "use core/nulib/main_provisioning/query.nu; main query servers --out json" \| from json)`

			`$servers \| each {\|server\|`
			`{`
			`resource_type: "server"`
			`resource_id: $server.name`
			`state: $server.state`
			`provider: $server.provider`
			`details: $server`
			`}`
			`}`
			`} \| complete \| if ($in.exit_code != 0) {`
			`print "⚠️ Could not collect server state"`
			`[]`
			`}`
			`}`

			`def collect_service_state []: nothing -> list {`
			`do {`
			`# Collect Docker container states`
			`if ((which docker \| length) > 0) {`
			`let containers = (docker ps -a --format "{{.ID}},{{.Names}},{{.Status}},{{.Image}}" \| lines \| each {\|line\|`
			`let parts = ($line \| split column ",")`
			`if ($parts \| length) >= 4 {`
			`{`
			`resource_type: "container"`
			`resource_id: $parts.1`
			`state: $parts.2`
			`image: $parts.3`
			`container_id: $parts.0`
			`}`
			`}`
			`} \| compact)`

			`$containers`
			`} else {`
			`[]`
			`}`
			`} \| complete \| if ($in.exit_code != 0) {`
			`[]`
			`}`
			`}`

			`def collect_cluster_state []: nothing -> list {`
			`do {`
			`# Collect Kubernetes cluster state if available`
			`if ((which kubectl \| length) > 0) {`
			`let pods = (kubectl get pods -o json \| from json)`

			`$pods.items \| each {\|pod\|`
			`{`
			`resource_type: "pod"`
			`resource_id: $pod.metadata.name`
			`namespace: $pod.metadata.namespace`
			`state: $pod.status.phase`
			`node: $pod.spec.nodeName`
			`details: {`
			`containers: ($pod.spec.containers \| length)`
			`restart_count: ($pod.status.containerStatuses? \| default [] \| get restartCount \| math sum)`
			`}`
			`}`
			`}`
			`} else {`
			`[]`
			`}`
			`} \| complete \| if ($in.exit_code != 0) {`
			`[]`
			`}`
			`}`

			`# Application logs collector`
			`def collect_application_logs [config: record]: nothing -> list {`
			`collect_logs --since "1m" --sources $config.log_sources --output_format "list"`
			`}`

			`# Cost metrics collector`
			`def collect_cost_metrics [config: record]: nothing -> list {`
			`let cost_data = ($config.providers \| each {\|provider\|`
			`collect_provider_costs $provider`
			`} \| flatten)`

			`$cost_data \| each {\|cost\|`
			`$cost \| upsert timestamp (date now) \| upsert collector "cost_metrics"`
			`}`
			`}`

			`def collect_provider_costs [provider: string]: string -> list {`
			`match $provider {`
			`"aws" => collect_aws_costs`
			`"gcp" => collect_gcp_costs`
			`"azure" => collect_azure_costs`
			`_ => []`
			`}`
			`}`

			`def collect_aws_costs []: nothing -> list {`
			`do {`
			`if ((which aws \| length) > 0) {`
			`# Use AWS Cost Explorer API (requires setup)`
			`# For now, return mock data structure`
			`[{`
			`provider: "aws"`
			`service: "ec2"`
			`cost_usd: 125.50`
			`period: "daily"`
			`region: "us-east-1"`
			`}]`
			`} else {`
			`[]`
			`}`
			`} \| complete \| if ($in.exit_code != 0) {`
			`[]`
			`}`
			`}`

			`def collect_gcp_costs []: nothing -> list {`
			`# GCP billing API integration would go here`
			`[]`
			`}`

			`def collect_azure_costs []: nothing -> list {`
			`# Azure cost management API integration would go here`
			`[]`
			`}`

			`# Security events collector`
			`def collect_security_events [config: record]: nothing -> list {`
			`mut security_events = []`

			`if "auth" in $config.sources {`
			`$security_events = ($security_events \| append (collect_auth_events))`
			`}`

			`if "network" in $config.sources {`
			`$security_events = ($security_events \| append (collect_network_events))`
			`}`

			`if "filesystem" in $config.sources {`
			`$security_events = ($security_events \| append (collect_filesystem_events))`
			`}`

			`$security_events \| each {\|event\|`
			`$event \| upsert timestamp (date now) \| upsert collector "security_events"`
			`}`
			`}`

			`def collect_auth_events []: nothing -> list {`
			`do {`
			`# Collect authentication logs`
			`if ($"/var/log/auth.log" \| path exists) {`
			`let auth_logs = (tail -n 100 /var/log/auth.log \| lines)`

			`$auth_logs \| each {\|line\|`
			`if ($line \| str contains "Failed password") {`
			`{`
			`event_type: "auth_failure"`
			`severity: "medium"`
			`message: $line`
			`source: "auth.log"`
			`}`
			`} else if ($line \| str contains "Accepted publickey") {`
			`{`
			`event_type: "auth_success"`
			`severity: "info"`
			`message: $line`
			`source: "auth.log"`
			`}`
			`}`
			`} \| compact`
			`} else {`
			`[]`
			`}`
			`} \| complete \| if ($in.exit_code != 0) {`
			`[]`
			`}`
			`}`

			`def collect_network_events []: nothing -> list {`
			`# Network security events would be collected here`
			`# This could include firewall logs, intrusion detection, etc.`
			`[]`
			`}`

			`def collect_filesystem_events []: nothing -> list {`
			`# File system security events`
			`# This could include file integrity monitoring, access logs, etc.`
			`[]`
			`}`

			`# Performance metrics collector`
			`def collect_performance_metrics [config: record]: nothing -> list {`
			`mut perf_metrics = []`

			`if "deployments" in $config.targets {`
			`$perf_metrics = ($perf_metrics \| append (collect_deployment_metrics))`
			`}`

			`if "scaling" in $config.targets {`
			`$perf_metrics = ($perf_metrics \| append (collect_scaling_metrics))`
			`}`

			`if "response_times" in $config.targets {`
			`$perf_metrics = ($perf_metrics \| append (collect_response_time_metrics))`
			`}`

			`$perf_metrics \| each {\|metric\|`
			`$metric \| upsert timestamp (date now) \| upsert collector "performance_metrics"`
			`}`
			`}`

			`def collect_deployment_metrics []: nothing -> list {`
			`# Track deployment performance`
			`# This would integrate with CI/CD systems`
			`[{`
			`metric_name: "deployment_duration"`
			`value: 300 # seconds`
			`deployment_id: "deploy-123"`
			`status: "success"`
			`}]`
			`}`

			`def collect_scaling_metrics []: nothing -> list {`
			`# Track auto-scaling events and performance`
			`[]`
			`}`

			`def collect_response_time_metrics []: nothing -> list {`
			`# Collect application response times`
			`# This could integrate with APM tools`
			`[]`
			`}`

			`# Save collected data`
			`def save_collected_data [`
			`data: list`
			`collector_name: string`
			`output_dir: string`
			`enable_dataframes: bool`
			`]: nothing -> nothing {`

			`let timestamp = (date now \| date format "%Y-%m-%d_%H-%M-%S")`
			`let filename = $"($collector_name)_($timestamp)"`

			`if $enable_dataframes and (check_polars_available) {`
			`# Save as Parquet for efficient storage and querying`
			`let df = create_infra_dataframe $data --source $collector_name`
			`let parquet_path = ($output_dir \| path join $"($filename).parquet")`
			`export_dataframe $df $parquet_path --format "parquet"`
			`} else {`
			`# Save as JSON`
			`let json_path = ($output_dir \| path join $"($filename).json")`
			`$data \| to json \| save --force $json_path`
			`}`
			`}`

			`# Query collected observability data`
			`export def query_observability_data [`
			`--collector: string = "all"`
			`--time_range: string = "1h"`
			`--data_dir: string = "data/observability"`
			`--query: string = ""`
			`]: nothing -> any {`

			`print $"🔍 Querying observability data (collector: ($collector), range: ($time_range))..."`

			`let data_files = if $collector == "all" {`
			`ls ($data_dir \| path join "*.parquet") \| get name`
			`} else {`
			`ls ($data_dir \| path join $"($collector)_*.parquet") \| get name`
			`}`

			`if ($data_files \| length) == 0 {`
			`print "No observability data found"`
			`return []`
			`}`

			`# Load and combine data`
			`let combined_data = ($data_files \| each {\|file\|`
			`if (check_polars_available) {`
			`# Load parquet with Polars`
			`polars open $file`
			`} else {`
			`# Fallback to JSON if no Polars`
			`let json_file = ($file \| str replace ".parquet" ".json")`
			`if ($json_file \| path exists) {`
			`open $json_file`
			`} else {`
			`[]`
			`}`
			`}`
			`} \| flatten)`

			`if ($query \| is-not-empty) {`
			`query_dataframe $combined_data $query`
			`} else {`
			`$combined_data`
			`}`
			`}`