feat(taskserv): implement real-time version checking with configurable HTTP client
- Add: GitHub API integration for live version checking in taskserv management - Add: HTTP client configuration option (http.use_curl) in config.defaults.toml - Add: Helper function fetch_latest_version with curl/http get support - Fix: Settings path structure for prov_data_dirpath access pattern - Remove: Legacy simulation code for version checking - Update: Core configuration name from "provisioning-system" to "provisioning" - Clean: Remove obsolete example configs and infrastructure files
This commit is contained in:
parent
38a7470da0
commit
3c3ef47f7f
34 changed files with 5942 additions and 13 deletions
347
taskservs/nushell/observability/collect.nu
Normal file
347
taskservs/nushell/observability/collect.nu
Normal file
|
|
@ -0,0 +1,347 @@
|
|||
# Observability Collection Scripts for Nushell Infrastructure
|
||||
# Secure collection of system metrics, logs, and telemetry data
|
||||
|
||||
# Collect comprehensive system metrics
|
||||
export def collect-system-metrics []: nothing -> record {
|
||||
let timestamp = (date now)
|
||||
|
||||
let base_metrics = {
|
||||
timestamp: ($timestamp | format date "%Y-%m-%d %H:%M:%S")
|
||||
hostname: ($env.HOSTNAME? | default "unknown")
|
||||
collection_version: "1.0.0"
|
||||
}
|
||||
|
||||
# CPU metrics
|
||||
let cpu_metrics = try {
|
||||
let cpu_info = (cat /proc/cpuinfo | lines | where $it =~ "processor|model name|cpu MHz" | parse "{key}: {value}")
|
||||
let cpu_count = ($cpu_info | where key == "processor" | length)
|
||||
let cpu_model = ($cpu_info | where key =~ "model name" | first | get value)
|
||||
|
||||
# Load average
|
||||
let loadavg = (cat /proc/loadavg | split row " ")
|
||||
|
||||
{
|
||||
cores: $cpu_count
|
||||
model: $cpu_model
|
||||
load_1m: ($loadavg | get 0 | into float)
|
||||
load_5m: ($loadavg | get 1 | into float)
|
||||
load_15m: ($loadavg | get 2 | into float)
|
||||
}
|
||||
} catch {
|
||||
{error: "Failed to collect CPU metrics"}
|
||||
}
|
||||
|
||||
# Memory metrics
|
||||
try {
|
||||
let meminfo = (cat /proc/meminfo | lines | parse "{key}: {value} kB")
|
||||
let total_mem = ($meminfo | where key == "MemTotal" | first | get value | into int)
|
||||
let free_mem = ($meminfo | where key == "MemFree" | first | get value | into int)
|
||||
let available_mem = ($meminfo | where key == "MemAvailable" | first | get value | into int)
|
||||
let buffers = ($meminfo | where key == "Buffers" | first | get value | into int)
|
||||
let cached = ($meminfo | where key == "Cached" | first | get value | into int)
|
||||
|
||||
$metrics = ($metrics | insert memory {
|
||||
total_kb: $total_mem
|
||||
free_kb: $free_mem
|
||||
available_kb: $available_mem
|
||||
buffers_kb: $buffers
|
||||
cached_kb: $cached
|
||||
used_kb: ($total_mem - $free_mem)
|
||||
usage_percent: (($total_mem - $free_mem) / $total_mem * 100 | math round --precision 2)
|
||||
})
|
||||
} catch {
|
||||
$metrics = ($metrics | insert memory {error: "Failed to collect memory metrics"})
|
||||
}
|
||||
|
||||
# Disk metrics
|
||||
try {
|
||||
let disk_usage = (df -k | lines | skip 1 | parse "{filesystem} {total} {used} {available} {percent} {mount}")
|
||||
$metrics = ($metrics | insert disk ($disk_usage | select filesystem total used available percent mount))
|
||||
} catch {
|
||||
$metrics = ($metrics | insert disk {error: "Failed to collect disk metrics"})
|
||||
}
|
||||
|
||||
# Network metrics (basic)
|
||||
try {
|
||||
let network_stats = (cat /proc/net/dev | lines | skip 2 | parse "{interface}: {rx_bytes} {rx_packets} {rx_errs} {rx_drop} {rx_fifo} {rx_frame} {rx_compressed} {rx_multicast} {tx_bytes} {tx_packets} {tx_errs} {tx_drop} {tx_fifo} {tx_colls} {tx_carrier} {tx_compressed}")
|
||||
$metrics = ($metrics | insert network ($network_stats | select interface rx_bytes tx_bytes rx_packets tx_packets))
|
||||
} catch {
|
||||
$metrics = ($metrics | insert network {error: "Failed to collect network metrics"})
|
||||
}
|
||||
|
||||
# Process count
|
||||
try {
|
||||
let process_count = (ls /proc | where name =~ "^[0-9]+$" | length)
|
||||
$metrics = ($metrics | insert processes {
|
||||
total: $process_count
|
||||
})
|
||||
} catch {
|
||||
$metrics = ($metrics | insert processes {error: "Failed to collect process metrics"})
|
||||
}
|
||||
|
||||
return $metrics
|
||||
}
|
||||
|
||||
# Collect container metrics (if running in containerized environment)
|
||||
export def collect-container-metrics []: nothing -> record {
|
||||
let timestamp = (date now)
|
||||
|
||||
mut metrics = {
|
||||
timestamp: ($timestamp | format date "%Y-%m-%d %H:%M:%S")
|
||||
container_runtime: "unknown"
|
||||
}
|
||||
|
||||
# Check for Docker
|
||||
try {
|
||||
if (which docker | is-not-empty) {
|
||||
let containers = (docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Image}}" | lines | skip 1)
|
||||
$metrics = ($metrics | insert docker {
|
||||
available: true
|
||||
containers: ($containers | length)
|
||||
running: ($containers | where $it =~ "Up" | length)
|
||||
})
|
||||
$metrics = ($metrics | insert container_runtime "docker")
|
||||
}
|
||||
} catch {}
|
||||
|
||||
# Check for Podman
|
||||
try {
|
||||
if (which podman | is-not-empty) {
|
||||
let containers = (podman ps --format "table {{.Names}}\t{{.Status}}\t{{.Image}}" | lines | skip 1)
|
||||
$metrics = ($metrics | insert podman {
|
||||
available: true
|
||||
containers: ($containers | length)
|
||||
running: ($containers | where $it =~ "Up" | length)
|
||||
})
|
||||
if ($metrics.container_runtime == "unknown") {
|
||||
$metrics = ($metrics | insert container_runtime "podman")
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
|
||||
# Check for Kubernetes
|
||||
try {
|
||||
if (which kubectl | is-not-empty) {
|
||||
let pods = (kubectl get pods --all-namespaces --no-headers | lines)
|
||||
$metrics = ($metrics | insert kubernetes {
|
||||
available: true
|
||||
pods_total: ($pods | length)
|
||||
pods_running: ($pods | where $it =~ "Running" | length)
|
||||
pods_pending: ($pods | where $it =~ "Pending" | length)
|
||||
pods_failed: ($pods | where $it =~ "Failed" | length)
|
||||
})
|
||||
}
|
||||
} catch {}
|
||||
|
||||
return $metrics
|
||||
}
|
||||
|
||||
# Collect application logs with filtering
|
||||
export def collect-logs [
|
||||
--service(-s): string # Specific service to collect logs from
|
||||
--since: string = "1h" # Time range (1h, 30m, etc.)
|
||||
--level: string = "error" # Log level filter
|
||||
--lines(-l): int = 100 # Maximum lines to collect
|
||||
]: nothing -> list<record> {
|
||||
mut logs = []
|
||||
|
||||
# Systemd journal logs
|
||||
try {
|
||||
mut journalctl_cmd = ["journalctl", "--output=json", "--no-pager", $"--since=($since)"]
|
||||
|
||||
if ($service | is-not-empty) {
|
||||
$journalctl_cmd = ($journalctl_cmd | append ["-u", $service])
|
||||
}
|
||||
|
||||
if (($level | is-not-empty) and ($level != "all")) {
|
||||
$journalctl_cmd = ($journalctl_cmd | append ["-p", $level])
|
||||
}
|
||||
|
||||
if ($lines | is-not-empty) {
|
||||
$journalctl_cmd = ($journalctl_cmd | append ["-n", ($lines | into string)])
|
||||
}
|
||||
|
||||
let journal_logs = (^$journalctl_cmd.0 ...$journalctl_cmd.1 | lines | where $it != "" | each { |line| $line | from json })
|
||||
$logs = ($logs | append $journal_logs)
|
||||
} catch {}
|
||||
|
||||
# Container logs (Docker)
|
||||
try {
|
||||
if (which docker | is-not-empty and ($service | is-not-empty)) {
|
||||
let container_logs = (docker logs --since $since --tail $lines $service 2>/dev/null | lines | enumerate | each { |item|
|
||||
{
|
||||
timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
|
||||
source: "docker"
|
||||
container: $service
|
||||
message: $item.item
|
||||
line_number: $item.index
|
||||
}
|
||||
})
|
||||
$logs = ($logs | append $container_logs)
|
||||
}
|
||||
} catch {}
|
||||
|
||||
# File-based logs (common locations)
|
||||
let log_files = [
|
||||
"/var/log/syslog"
|
||||
"/var/log/messages"
|
||||
"/var/log/kern.log"
|
||||
"/var/log/auth.log"
|
||||
]
|
||||
|
||||
for log_file in $log_files {
|
||||
try {
|
||||
if ($log_file | path exists) {
|
||||
let file_logs = (tail -n $lines $log_file | lines | enumerate | each { |item|
|
||||
{
|
||||
timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
|
||||
source: "file"
|
||||
file: $log_file
|
||||
message: $item.item
|
||||
line_number: $item.index
|
||||
}
|
||||
})
|
||||
$logs = ($logs | append $file_logs)
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
return ($logs | first $lines)
|
||||
}
|
||||
|
||||
# Process and analyze log patterns
|
||||
export def analyze-logs [logs: list<record>]: nothing -> record {
|
||||
let total_logs = ($logs | length)
|
||||
|
||||
if $total_logs == 0 {
|
||||
return {
|
||||
total: 0
|
||||
analysis: "No logs to analyze"
|
||||
}
|
||||
}
|
||||
|
||||
# Error pattern analysis
|
||||
let error_patterns = ["error", "failed", "exception", "critical", "fatal"]
|
||||
mut error_counts = {}
|
||||
|
||||
for pattern in $error_patterns {
|
||||
let count = ($logs | where message =~ $"(?i)($pattern)" | length)
|
||||
$error_counts = ($error_counts | insert $pattern $count)
|
||||
}
|
||||
|
||||
# Source distribution
|
||||
let source_dist = ($logs | group-by source | transpose key value | each { |item|
|
||||
{source: $item.key, count: ($item.value | length)}
|
||||
})
|
||||
|
||||
# Time-based analysis (last hour)
|
||||
let recent_logs = ($logs | where timestamp > ((date now) - 1hr))
|
||||
|
||||
return {
|
||||
total: $total_logs
|
||||
recent_count: ($recent_logs | length)
|
||||
error_patterns: $error_counts
|
||||
source_distribution: $source_dist
|
||||
analysis_timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
}
|
||||
|
||||
# Export metrics in various formats
|
||||
export def export-metrics [
|
||||
metrics: record
|
||||
--format(-f): string = "json" # json, yaml, csv
|
||||
--output(-o): string # Output file path
|
||||
]: nothing -> any {
|
||||
let formatted_data = match $format {
|
||||
"yaml" => ($metrics | to yaml)
|
||||
"csv" => {
|
||||
# Flatten metrics for CSV export
|
||||
let flattened = ($metrics | flatten | transpose key value)
|
||||
$flattened | to csv
|
||||
}
|
||||
_ => ($metrics | to json)
|
||||
}
|
||||
|
||||
if ($output | is-not-empty) {
|
||||
$formatted_data | save $output
|
||||
print $"Metrics exported to ($output)"
|
||||
} else {
|
||||
$formatted_data
|
||||
}
|
||||
}
|
||||
|
||||
# Health monitoring function
|
||||
export def health-monitor [
|
||||
--interval(-i): int = 60 # Collection interval in seconds
|
||||
--duration(-d): int = 300 # Total monitoring duration in seconds
|
||||
--output(-o): string # Output file for continuous monitoring
|
||||
]: nothing -> nothing {
|
||||
let start_time = (date now)
|
||||
let end_time = ($start_time + ($duration * 1sec))
|
||||
|
||||
print $"🔍 Starting health monitoring for ($duration) seconds with ($interval)s intervals"
|
||||
print $"📊 Collecting system and container metrics"
|
||||
|
||||
while (date now) < $end_time {
|
||||
let current_time = (date now)
|
||||
let system_metrics = (collect-system-metrics)
|
||||
let container_metrics = (collect-container-metrics)
|
||||
|
||||
let combined_metrics = {
|
||||
collection_time: ($current_time | format date "%Y-%m-%d %H:%M:%S")
|
||||
system: $system_metrics
|
||||
containers: $container_metrics
|
||||
}
|
||||
|
||||
if ($output | is-not-empty) {
|
||||
$combined_metrics | to json | save -a $output
|
||||
} else {
|
||||
print $"⏰ ($current_time | format date "%H:%M:%S") - CPU: ($system_metrics.cpu.load_1m?)% | Memory: ($system_metrics.memory.usage_percent?)%"
|
||||
}
|
||||
|
||||
sleep ($interval * 1sec)
|
||||
}
|
||||
|
||||
print "✅ Health monitoring completed"
|
||||
}
|
||||
|
||||
# Quick system status check
|
||||
export def status-check []: nothing -> record {
|
||||
let system = (collect-system-metrics)
|
||||
let containers = (collect-container-metrics)
|
||||
|
||||
# Determine overall health
|
||||
mut health_status = "healthy"
|
||||
mut alerts = []
|
||||
|
||||
# CPU load check
|
||||
if (($system.cpu.load_1m? | default 0) > 4.0) {
|
||||
$health_status = "warning"
|
||||
$alerts = ($alerts | append "High CPU load")
|
||||
}
|
||||
|
||||
# Memory usage check
|
||||
if (($system.memory.usage_percent? | default 0) > 90) {
|
||||
$health_status = "critical"
|
||||
$alerts = ($alerts | append "High memory usage")
|
||||
}
|
||||
|
||||
# Disk usage check
|
||||
try {
|
||||
let high_disk = ($system.disk | where {|x| ($x.percent | str replace "%" "" | into float) > 90})
|
||||
if ($high_disk | length) > 0 {
|
||||
$health_status = "warning"
|
||||
$alerts = ($alerts | append "High disk usage")
|
||||
}
|
||||
} catch {}
|
||||
|
||||
return {
|
||||
status: $health_status
|
||||
alerts: $alerts
|
||||
metrics: {
|
||||
system: $system
|
||||
containers: $containers
|
||||
}
|
||||
timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
}
|
||||
419
taskservs/nushell/observability/process.nu
Normal file
419
taskservs/nushell/observability/process.nu
Normal file
|
|
@ -0,0 +1,419 @@
|
|||
# Log Processing and Analysis Scripts for Nushell Infrastructure
|
||||
# Advanced log parsing, filtering, and transformation capabilities
|
||||
|
||||
# Parse structured logs from various formats
|
||||
export def parse-logs [
|
||||
--format(-f): string = "auto" # json, syslog, apache, nginx, auto
|
||||
--filter: string # Filter expression
|
||||
--transform: string # Transform expression
|
||||
] -> list<record> {
|
||||
let input_data = $in
|
||||
|
||||
# Auto-detect format if not specified
|
||||
let detected_format = if $format == "auto" {
|
||||
if ($input_data | first | str starts-with "{") {
|
||||
"json"
|
||||
} else if ($input_data | first | str contains "T") {
|
||||
"syslog"
|
||||
} else {
|
||||
"text"
|
||||
}
|
||||
} else {
|
||||
$format
|
||||
}
|
||||
|
||||
# Parse based on format
|
||||
mut parsed_logs = match $detected_format {
|
||||
"json" => {
|
||||
$input_data | lines | where $it != "" | each { |line|
|
||||
try {
|
||||
$line | from json
|
||||
} catch {
|
||||
{raw: $line, parse_error: true}
|
||||
}
|
||||
}
|
||||
}
|
||||
"syslog" => {
|
||||
$input_data | lines | each { |line|
|
||||
# RFC3164 syslog format: <priority>timestamp hostname tag: message
|
||||
let syslog_pattern = '<(?P<priority>\d+)>(?P<timestamp>\w+\s+\d+\s+\d+:\d+:\d+)\s+(?P<hostname>\S+)\s+(?P<tag>\S+):\s*(?P<message>.*)'
|
||||
try {
|
||||
let matches = ($line | parse -r $syslog_pattern)
|
||||
if ($matches | length) > 0 {
|
||||
$matches | first
|
||||
} else {
|
||||
{raw: $line, format: "syslog"}
|
||||
}
|
||||
} catch {
|
||||
{raw: $line, parse_error: true}
|
||||
}
|
||||
}
|
||||
}
|
||||
"apache" => {
|
||||
$input_data | lines | each { |line|
|
||||
# Apache Combined Log Format
|
||||
let apache_pattern = '(?P<ip>\S+)\s+\S+\s+\S+\s+\[(?P<timestamp>[^\]]+)\]\s+"(?P<method>\S+)\s+(?P<url>\S+)\s+(?P<protocol>[^"]+)"\s+(?P<status>\d+)\s+(?P<size>\d+|-)\s+"(?P<referer>[^"]*)"\s+"(?P<user_agent>[^"]*)"'
|
||||
try {
|
||||
let matches = ($line | parse -r $apache_pattern)
|
||||
if ($matches | length) > 0 {
|
||||
$matches | first
|
||||
} else {
|
||||
{raw: $line, format: "apache"}
|
||||
}
|
||||
} catch {
|
||||
{raw: $line, parse_error: true}
|
||||
}
|
||||
}
|
||||
}
|
||||
"nginx" => {
|
||||
$input_data | lines | each { |line|
|
||||
# Nginx default log format
|
||||
let nginx_pattern = '(?P<ip>\S+)\s+-\s+-\s+\[(?P<timestamp>[^\]]+)\]\s+"(?P<method>\S+)\s+(?P<url>\S+)\s+(?P<protocol>[^"]+)"\s+(?P<status>\d+)\s+(?P<size>\d+)\s+"(?P<referer>[^"]*)"\s+"(?P<user_agent>[^"]*)"'
|
||||
try {
|
||||
let matches = ($line | parse -r $nginx_pattern)
|
||||
if ($matches | length) > 0 {
|
||||
$matches | first
|
||||
} else {
|
||||
{raw: $line, format: "nginx"}
|
||||
}
|
||||
} catch {
|
||||
{raw: $line, parse_error: true}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
$input_data | lines | enumerate | each { |item|
|
||||
{
|
||||
line_number: $item.index
|
||||
message: $item.item
|
||||
timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Apply filter if specified
|
||||
if ($filter | is-not-empty) {
|
||||
$parsed_logs = ($parsed_logs | filter { |log|
|
||||
try {
|
||||
nu -c $"($log) | ($filter)"
|
||||
} catch {
|
||||
false
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
# Apply transformation if specified
|
||||
if ($transform | is-not-empty) {
|
||||
$parsed_logs = ($parsed_logs | each { |log|
|
||||
try {
|
||||
nu -c $"($log) | ($transform)"
|
||||
} catch {
|
||||
$log
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return $parsed_logs
|
||||
}
|
||||
|
||||
# Aggregate logs by time windows
|
||||
export def aggregate-by-time [
|
||||
logs: list<record>
|
||||
--window(-w): string = "1h" # Time window: 1m, 5m, 1h, 1d
|
||||
--field(-f): string = "timestamp" # Timestamp field name
|
||||
--metric(-m): string = "count" # Aggregation metric: count, sum, avg, max, min
|
||||
--group(-g): string # Group by field
|
||||
] -> list<record> {
|
||||
# Parse time window
|
||||
let window_duration = match $window {
|
||||
"1m" => 60
|
||||
"5m" => 300
|
||||
"1h" => 3600
|
||||
"1d" => 86400
|
||||
_ => 3600 # Default to 1 hour
|
||||
}
|
||||
|
||||
# Convert timestamps to epoch and create time buckets
|
||||
mut processed_logs = ($logs | each { |log|
|
||||
let timestamp_value = ($log | get -i $field | default (date now))
|
||||
let epoch = ($timestamp_value | date to-timezone UTC | format date "%s" | into int)
|
||||
let bucket = (($epoch / $window_duration) * $window_duration)
|
||||
|
||||
$log | insert time_bucket $bucket | insert epoch $epoch
|
||||
})
|
||||
|
||||
# Group by time bucket and optional field
|
||||
let grouped = if ($group | is-not-empty) {
|
||||
$processed_logs | group-by time_bucket $group
|
||||
} else {
|
||||
$processed_logs | group-by time_bucket
|
||||
}
|
||||
|
||||
# Aggregate based on metric
|
||||
$grouped | transpose bucket logs | each { |bucket_data|
|
||||
let bucket_timestamp = ($bucket_data.bucket | into int | into datetime | format date "%Y-%m-%d %H:%M:%S")
|
||||
let logs_in_bucket = $bucket_data.logs
|
||||
|
||||
match $metric {
|
||||
"count" => {
|
||||
{
|
||||
timestamp: $bucket_timestamp
|
||||
window: $window
|
||||
count: ($logs_in_bucket | length)
|
||||
}
|
||||
}
|
||||
"sum" => {
|
||||
# Requires a numeric field to sum
|
||||
{
|
||||
timestamp: $bucket_timestamp
|
||||
window: $window
|
||||
sum: ($logs_in_bucket | get value | math sum)
|
||||
}
|
||||
}
|
||||
"avg" => {
|
||||
{
|
||||
timestamp: $bucket_timestamp
|
||||
window: $window
|
||||
average: ($logs_in_bucket | get value | math avg)
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
{
|
||||
timestamp: $bucket_timestamp
|
||||
window: $window
|
||||
count: ($logs_in_bucket | length)
|
||||
logs: $logs_in_bucket
|
||||
}
|
||||
}
|
||||
}
|
||||
} | sort-by timestamp
|
||||
}
|
||||
|
||||
# Detect anomalies in log patterns
|
||||
export def detect-anomalies [
|
||||
logs: list<record>
|
||||
--field(-f): string = "message" # Field to analyze
|
||||
--threshold(-t): float = 2.0 # Standard deviation threshold
|
||||
--window(-w): string = "1h" # Time window for baseline
|
||||
] -> list<record> {
|
||||
# Calculate baseline statistics
|
||||
let baseline_window = match $window {
|
||||
"1m" => 60
|
||||
"5m" => 300
|
||||
"1h" => 3600
|
||||
"1d" => 86400
|
||||
_ => 3600
|
||||
}
|
||||
|
||||
let now = (date now)
|
||||
let baseline_start = ($now - ($baseline_window * 1sec))
|
||||
|
||||
# Filter logs for baseline period
|
||||
let baseline_logs = ($logs | where {|log|
|
||||
let log_time = ($log | get -i timestamp | default $now)
|
||||
$log_time >= $baseline_start and $log_time <= $now
|
||||
})
|
||||
|
||||
if ($baseline_logs | length) == 0 {
|
||||
return []
|
||||
}
|
||||
|
||||
# Count occurrences by time buckets
|
||||
let time_series = ($baseline_logs | aggregate-by-time --window "5m" --field timestamp --metric count)
|
||||
|
||||
# Calculate statistics
|
||||
let counts = ($time_series | get count)
|
||||
let mean = ($counts | math avg)
|
||||
let std_dev = ($counts | math stddev)
|
||||
|
||||
# Find anomalies (values beyond threshold standard deviations)
|
||||
let anomaly_threshold_high = ($mean + ($threshold * $std_dev))
|
||||
let anomaly_threshold_low = ($mean - ($threshold * $std_dev))
|
||||
|
||||
let anomalies = ($time_series | where {|bucket|
|
||||
$bucket.count > $anomaly_threshold_high or $bucket.count < $anomaly_threshold_low
|
||||
})
|
||||
|
||||
return ($anomalies | each { |anomaly|
|
||||
$anomaly | insert {
|
||||
anomaly_type: (if $anomaly.count > $anomaly_threshold_high { "spike" } else { "drop" })
|
||||
severity: (if (($anomaly.count - $mean) | math abs) > (3 * $std_dev) { "high" } else { "medium" })
|
||||
baseline_mean: $mean
|
||||
baseline_stddev: $std_dev
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
# Extract patterns and insights from logs
|
||||
export def extract-patterns [
|
||||
logs: list<record>
|
||||
--field(-f): string = "message" # Field to analyze
|
||||
--pattern-type(-t): string = "error" # error, ip, url, email, custom
|
||||
--custom-regex(-r): string # Custom regex pattern
|
||||
--min-frequency(-m): int = 2 # Minimum pattern frequency
|
||||
] -> list<record> {
|
||||
let field_values = ($logs | get $field | where $it != null)
|
||||
|
||||
let patterns = match $pattern_type {
|
||||
"error" => {
|
||||
# Common error patterns
|
||||
let error_regexes = [
|
||||
'error:?\s*(.+)',
|
||||
'exception:?\s*(.+)',
|
||||
'failed:?\s*(.+)',
|
||||
'timeout:?\s*(.+)',
|
||||
'connection\s*refused:?\s*(.+)'
|
||||
]
|
||||
|
||||
mut all_matches = []
|
||||
for regex in $error_regexes {
|
||||
let matches = ($field_values | each { |value|
|
||||
try {
|
||||
$value | parse -r $regex | each { |match| $match."capture0" }
|
||||
} catch {
|
||||
[]
|
||||
}
|
||||
} | flatten)
|
||||
$all_matches = ($all_matches | append $matches)
|
||||
}
|
||||
$all_matches
|
||||
}
|
||||
"ip" => {
|
||||
# IP address pattern
|
||||
let ip_regex = '\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b'
|
||||
$field_values | each { |value|
|
||||
try {
|
||||
$value | parse -r $ip_regex
|
||||
} catch {
|
||||
[]
|
||||
}
|
||||
} | flatten
|
||||
}
|
||||
"url" => {
|
||||
# URL pattern
|
||||
let url_regex = 'https?://[^\s<>"]+'
|
||||
$field_values | each { |value|
|
||||
try {
|
||||
$value | parse -r $url_regex
|
||||
} catch {
|
||||
[]
|
||||
}
|
||||
} | flatten
|
||||
}
|
||||
"email" => {
|
||||
# Email pattern
|
||||
let email_regex = '\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
|
||||
$field_values | each { |value|
|
||||
try {
|
||||
$value | parse -r $email_regex
|
||||
} catch {
|
||||
[]
|
||||
}
|
||||
} | flatten
|
||||
}
|
||||
"custom" => {
|
||||
if ($custom_regex | is-not-empty) {
|
||||
$field_values | each { |value|
|
||||
try {
|
||||
$value | parse -r $custom_regex
|
||||
} catch {
|
||||
[]
|
||||
}
|
||||
} | flatten
|
||||
} else {
|
||||
[]
|
||||
}
|
||||
}
|
||||
_ => []
|
||||
}
|
||||
|
||||
# Count pattern frequencies
|
||||
let pattern_counts = ($patterns | group-by {|x| $x} | transpose pattern occurrences | each { |item|
|
||||
{
|
||||
pattern: $item.pattern
|
||||
frequency: ($item.occurrences | length)
|
||||
examples: ($item.occurrences | first 3)
|
||||
}
|
||||
} | where frequency >= $min_frequency | sort-by frequency -r)
|
||||
|
||||
return $pattern_counts
|
||||
}
|
||||
|
||||
# Generate log summary report
|
||||
export def generate-summary [
|
||||
logs: list<record>
|
||||
--timeframe(-t): string = "24h" # Timeframe for analysis
|
||||
--include-patterns(-p) # Include pattern analysis
|
||||
--include-anomalies(-a) # Include anomaly detection
|
||||
] -> record {
|
||||
let total_logs = ($logs | length)
|
||||
let start_time = (date now | format date "%Y-%m-%d %H:%M:%S")
|
||||
|
||||
if $total_logs == 0 {
|
||||
return {
|
||||
summary: "No logs to analyze"
|
||||
timestamp: $start_time
|
||||
total_logs: 0
|
||||
}
|
||||
}
|
||||
|
||||
# Basic statistics
|
||||
let time_range = ($logs | get -i timestamp | default [] | each { |ts| $ts | date to-timezone UTC })
|
||||
let earliest = ($time_range | math min)
|
||||
let latest = ($time_range | math max)
|
||||
|
||||
# Log level distribution
|
||||
let level_distribution = ($logs | get -i level | default [] | group-by {|x| $x} | transpose level count | each { |item|
|
||||
{level: $item.level, count: ($item.count | length)}
|
||||
} | sort-by count -r)
|
||||
|
||||
# Source distribution
|
||||
let source_distribution = ($logs | get -i source | default [] | group-by {|x| $x} | transpose source count | each { |item|
|
||||
{source: $item.source, count: ($item.count | length)}
|
||||
} | sort-by count -r)
|
||||
|
||||
mut summary_report = {
|
||||
analysis_timestamp: $start_time
|
||||
timeframe: $timeframe
|
||||
total_logs: $total_logs
|
||||
time_range: {
|
||||
earliest: ($earliest | format date "%Y-%m-%d %H:%M:%S")
|
||||
latest: ($latest | format date "%Y-%m-%d %H:%M:%S")
|
||||
duration_hours: ((($latest | date to-timezone UTC) - ($earliest | date to-timezone UTC)) / 1hr | math round --precision 2)
|
||||
}
|
||||
distribution: {
|
||||
by_level: $level_distribution
|
||||
by_source: $source_distribution
|
||||
}
|
||||
statistics: {
|
||||
logs_per_hour: (($total_logs / ((($latest | date to-timezone UTC) - ($earliest | date to-timezone UTC)) / 1hr)) | math round --precision 2)
|
||||
unique_sources: ($source_distribution | length)
|
||||
error_rate: (($logs | where {|log| ($log | get -i level | default "") =~ "error|critical|fatal"} | length) / $total_logs * 100 | math round --precision 2)
|
||||
}
|
||||
}
|
||||
|
||||
# Add pattern analysis if requested
|
||||
if $include_patterns {
|
||||
let error_patterns = (extract-patterns $logs --pattern-type error --min-frequency 2)
|
||||
let ip_patterns = (extract-patterns $logs --pattern-type ip --min-frequency 3)
|
||||
|
||||
$summary_report = ($summary_report | insert patterns {
|
||||
errors: $error_patterns
|
||||
ip_addresses: ($ip_patterns | first 10)
|
||||
})
|
||||
}
|
||||
|
||||
# Add anomaly detection if requested
|
||||
if $include_anomalies {
|
||||
let anomalies = (detect-anomalies $logs --threshold 2.0 --window "1h")
|
||||
|
||||
$summary_report = ($summary_report | insert anomalies {
|
||||
detected: ($anomalies | length)
|
||||
high_severity: ($anomalies | where severity == "high" | length)
|
||||
details: ($anomalies | first 5)
|
||||
})
|
||||
}
|
||||
|
||||
return $summary_report
|
||||
}
|
||||
50
taskservs/nushell/observability/simple-test.nu
Normal file
50
taskservs/nushell/observability/simple-test.nu
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
# Simple test script for Nushell infrastructure
|
||||
# Validates basic functionality without complex dependencies
|
||||
|
||||
export def test-basic-functionality []: nothing -> record {
|
||||
{
|
||||
nushell_version: (version | get version)
|
||||
current_time: (date now | format date "%Y-%m-%d %H:%M:%S")
|
||||
hostname: ($env.HOSTNAME? | default "unknown")
|
||||
user: ($env.USER? | default "unknown")
|
||||
working_directory: $env.PWD
|
||||
test_status: "passed"
|
||||
}
|
||||
}
|
||||
|
||||
export def test-security-environment []: nothing -> record {
|
||||
{
|
||||
readonly_mode: ($env.NUSHELL_READONLY_MODE? | default "unknown")
|
||||
execution_mode: ($env.NUSHELL_EXECUTION_MODE? | default "unknown")
|
||||
audit_enabled: ($env.NUSHELL_AUDIT_ENABLED? | default "unknown")
|
||||
session_timeout: ($env.NUSHELL_SESSION_TIMEOUT? | default "unknown")
|
||||
test_status: "passed"
|
||||
}
|
||||
}
|
||||
|
||||
export def test-file-operations []: nothing -> record {
|
||||
let test_results = {
|
||||
can_read_proc: (try { ls /proc | length } catch { 0 })
|
||||
can_read_tmp: (try { ls /tmp | length } catch { 0 })
|
||||
current_processes: (try { ps | length } catch { 0 })
|
||||
disk_usage: (try { df | length } catch { 0 })
|
||||
test_status: "completed"
|
||||
}
|
||||
|
||||
$test_results
|
||||
}
|
||||
|
||||
# Main test function
|
||||
export def run-all-tests []: nothing -> record {
|
||||
let basic_test = (test-basic-functionality)
|
||||
let security_test = (test-security-environment)
|
||||
let file_test = (test-file-operations)
|
||||
|
||||
{
|
||||
timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
|
||||
basic_functionality: $basic_test
|
||||
security_environment: $security_test
|
||||
file_operations: $file_test
|
||||
overall_status: "tests_completed"
|
||||
}
|
||||
}
|
||||
398
taskservs/nushell/observability/telemetry.nu
Normal file
398
taskservs/nushell/observability/telemetry.nu
Normal file
|
|
@ -0,0 +1,398 @@
|
|||
# Telemetry and Monitoring Integration for Nushell Infrastructure
|
||||
# Secure telemetry collection and forwarding capabilities
|
||||
|
||||
# Send telemetry data to configured endpoints
|
||||
export def send-telemetry [
|
||||
data: record
|
||||
--endpoint(-e): string # Override default endpoint
|
||||
--format(-f): string = "json" # json, prometheus, influx
|
||||
--timeout(-t): int = 30 # Request timeout in seconds
|
||||
--retry(-r): int = 3 # Number of retries
|
||||
] -> record {
|
||||
let telemetry_endpoint = ($endpoint | default ($env.NUSHELL_TELEMETRY_ENDPOINT? | default ""))
|
||||
|
||||
if ($telemetry_endpoint | is-empty) {
|
||||
return {
|
||||
success: false
|
||||
error: "No telemetry endpoint configured"
|
||||
data_sent: false
|
||||
}
|
||||
}
|
||||
|
||||
# Prepare data based on format
|
||||
let formatted_data = match $format {
|
||||
"prometheus" => {
|
||||
# Convert to Prometheus exposition format
|
||||
convert-to-prometheus $data
|
||||
}
|
||||
"influx" => {
|
||||
# Convert to InfluxDB line protocol
|
||||
convert-to-influx $data
|
||||
}
|
||||
_ => {
|
||||
# Default JSON format
|
||||
$data | to json
|
||||
}
|
||||
}
|
||||
|
||||
# Add metadata
|
||||
let telemetry_payload = {
|
||||
timestamp: (date now | format date "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
hostname: ($env.HOSTNAME? | default "unknown")
|
||||
agent: "nushell-provisioning"
|
||||
version: "1.0.0"
|
||||
data: $data
|
||||
}
|
||||
|
||||
# Send data with retries
|
||||
mut attempt = 1
|
||||
while $attempt <= $retry {
|
||||
try {
|
||||
let response = (http post $telemetry_endpoint ($telemetry_payload | to json) --timeout ($timeout * 1000) --headers {"Content-Type": "application/json"})
|
||||
|
||||
return {
|
||||
success: true
|
||||
endpoint: $telemetry_endpoint
|
||||
response_status: 200
|
||||
attempt: $attempt
|
||||
data_sent: true
|
||||
timestamp: (date now | format date "%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
|
||||
} catch { |err|
|
||||
if $attempt == $retry {
|
||||
return {
|
||||
success: false
|
||||
error: ($err | get msg)
|
||||
endpoint: $telemetry_endpoint
|
||||
attempts: $attempt
|
||||
data_sent: false
|
||||
}
|
||||
}
|
||||
|
||||
# Wait before retry (exponential backoff)
|
||||
let wait_time = ($attempt * $attempt * 2)
|
||||
sleep ($wait_time * 1sec)
|
||||
}
|
||||
|
||||
$attempt = ($attempt + 1)
|
||||
}
|
||||
|
||||
return {
|
||||
success: false
|
||||
error: "Max retries exceeded"
|
||||
attempts: $retry
|
||||
data_sent: false
|
||||
}
|
||||
}
|
||||
|
||||
# Convert metrics to Prometheus exposition format
|
||||
def convert-to-prometheus [data: record] -> string {
|
||||
mut prometheus_output = ""
|
||||
|
||||
# Process system metrics if available
|
||||
if ($data | get -i system | is-not-empty) {
|
||||
let sys = ($data | get system)
|
||||
|
||||
# CPU metrics
|
||||
if ($sys | get -i cpu | is-not-empty) {
|
||||
let cpu = ($sys | get cpu)
|
||||
$prometheus_output = $prometheus_output + $"# HELP system_load_1m System load average over 1 minute\n"
|
||||
$prometheus_output = $prometheus_output + $"# TYPE system_load_1m gauge\n"
|
||||
$prometheus_output = $prometheus_output + $"system_load_1m{hostname=\"($env.HOSTNAME? | default 'unknown')\"} ($cpu.load_1m? | default 0)\n"
|
||||
|
||||
$prometheus_output = $prometheus_output + $"# HELP system_load_5m System load average over 5 minutes\n"
|
||||
$prometheus_output = $prometheus_output + $"# TYPE system_load_5m gauge\n"
|
||||
$prometheus_output = $prometheus_output + $"system_load_5m{hostname=\"($env.HOSTNAME? | default 'unknown')\"} ($cpu.load_5m? | default 0)\n"
|
||||
}
|
||||
|
||||
# Memory metrics
|
||||
if ($sys | get -i memory | is-not-empty) {
|
||||
let mem = ($sys | get memory)
|
||||
$prometheus_output = $prometheus_output + $"# HELP system_memory_usage_percent Memory usage percentage\n"
|
||||
$prometheus_output = $prometheus_output + $"# TYPE system_memory_usage_percent gauge\n"
|
||||
$prometheus_output = $prometheus_output + $"system_memory_usage_percent{hostname=\"($env.HOSTNAME? | default 'unknown')\"} ($mem.usage_percent? | default 0)\n"
|
||||
|
||||
$prometheus_output = $prometheus_output + $"# HELP system_memory_total_bytes Total memory in bytes\n"
|
||||
$prometheus_output = $prometheus_output + $"# TYPE system_memory_total_bytes gauge\n"
|
||||
$prometheus_output = $prometheus_output + $"system_memory_total_bytes{hostname=\"($env.HOSTNAME? | default 'unknown')\"} (($mem.total_kb? | default 0) * 1024)\n"
|
||||
}
|
||||
}
|
||||
|
||||
return $prometheus_output
|
||||
}
|
||||
|
||||
# Convert metrics to InfluxDB line protocol
|
||||
def convert-to-influx [data: record] -> string {
|
||||
mut influx_lines = []
|
||||
let timestamp = (date now | format date "%s%N")
|
||||
let hostname = ($env.HOSTNAME? | default "unknown")
|
||||
|
||||
# Process system metrics
|
||||
if ($data | get -i system | is-not-empty) {
|
||||
let sys = ($data | get system)
|
||||
|
||||
# CPU metrics
|
||||
if ($sys | get -i cpu | is-not-empty) {
|
||||
let cpu = ($sys | get cpu)
|
||||
$influx_lines = ($influx_lines | append $"system_cpu,hostname=($hostname) load_1m=($cpu.load_1m? | default 0),load_5m=($cpu.load_5m? | default 0),load_15m=($cpu.load_15m? | default 0) ($timestamp)")
|
||||
}
|
||||
|
||||
# Memory metrics
|
||||
if ($sys | get -i memory | is-not-empty) {
|
||||
let mem = ($sys | get memory)
|
||||
$influx_lines = ($influx_lines | append $"system_memory,hostname=($hostname) usage_percent=($mem.usage_percent? | default 0),total_kb=($mem.total_kb? | default 0),used_kb=($mem.used_kb? | default 0) ($timestamp)")
|
||||
}
|
||||
|
||||
# Process metrics
|
||||
if ($sys | get -i processes | is-not-empty) {
|
||||
let proc = ($sys | get processes)
|
||||
$influx_lines = ($influx_lines | append $"system_processes,hostname=($hostname) total=($proc.total? | default 0) ($timestamp)")
|
||||
}
|
||||
}
|
||||
|
||||
return ($influx_lines | str join "\n")
|
||||
}
|
||||
|
||||
# Create and manage telemetry batches
|
||||
export def batch-telemetry [
|
||||
--max-batch-size(-s): int = 100 # Maximum items per batch
|
||||
--max-wait-time(-w): int = 30 # Maximum wait time in seconds
|
||||
--output-file(-o): string # File to store batched data
|
||||
] -> nothing {
|
||||
mut batch = []
|
||||
mut batch_start_time = (date now)
|
||||
|
||||
print $"📊 Starting telemetry batching (max size: ($max_batch_size), max wait: ($max_wait_time)s)"
|
||||
|
||||
# Monitor for telemetry data
|
||||
while true {
|
||||
# Check if we have data to batch (this would typically come from external sources)
|
||||
# For demonstration, we'll create sample data
|
||||
let current_time = (date now)
|
||||
|
||||
# Collect current metrics
|
||||
try {
|
||||
use ../observability/collect.nu *
|
||||
let metrics = (collect-system-metrics)
|
||||
|
||||
# Add to batch
|
||||
$batch = ($batch | append {
|
||||
timestamp: ($current_time | format date "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
type: "system_metrics"
|
||||
data: $metrics
|
||||
})
|
||||
|
||||
# Check batch conditions
|
||||
let batch_size = ($batch | length)
|
||||
let elapsed_time = (($current_time - $batch_start_time) / 1sec)
|
||||
|
||||
if $batch_size >= $max_batch_size or $elapsed_time >= $max_wait_time {
|
||||
# Send batch
|
||||
let batch_result = (send-batch $batch --output-file $output_file)
|
||||
|
||||
if $batch_result.success {
|
||||
print $"✅ Batch sent successfully: ($batch_size) items"
|
||||
} else {
|
||||
print $"❌ Batch send failed: ($batch_result.error)"
|
||||
}
|
||||
|
||||
# Reset batch
|
||||
$batch = []
|
||||
$batch_start_time = (date now)
|
||||
}
|
||||
|
||||
} catch { |err|
|
||||
print $"⚠️ Error collecting metrics: ($err | get msg)"
|
||||
}
|
||||
|
||||
# Wait before next collection
|
||||
sleep 10sec
|
||||
}
|
||||
}
|
||||
|
||||
# Send a batch of telemetry data
|
||||
def send-batch [
|
||||
batch: list<record>
|
||||
--output-file(-o): string
|
||||
] -> record {
|
||||
if ($batch | length) == 0 {
|
||||
return {success: true, message: "Empty batch, nothing to send"}
|
||||
}
|
||||
|
||||
let batch_payload = {
|
||||
batch_id: (random uuid)
|
||||
batch_size: ($batch | length)
|
||||
batch_timestamp: (date now | format date "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
hostname: ($env.HOSTNAME? | default "unknown")
|
||||
agent: "nushell-telemetry"
|
||||
items: $batch
|
||||
}
|
||||
|
||||
# Save to file if specified
|
||||
if ($output_file | is-not-empty) {
|
||||
try {
|
||||
$batch_payload | to json | save -a $output_file
|
||||
return {
|
||||
success: true
|
||||
message: $"Batch saved to file: ($output_file)"
|
||||
batch_size: ($batch | length)
|
||||
}
|
||||
} catch { |err|
|
||||
return {
|
||||
success: false
|
||||
error: $"Failed to save batch: ($err | get msg)"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Send to telemetry endpoint
|
||||
let endpoint = ($env.NUSHELL_TELEMETRY_ENDPOINT? | default "")
|
||||
if ($endpoint | is-not-empty) {
|
||||
return (send-telemetry $batch_payload --endpoint $endpoint)
|
||||
} else {
|
||||
return {
|
||||
success: false
|
||||
error: "No telemetry endpoint configured"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Monitor system health and send alerts
|
||||
export def health-monitoring [
|
||||
--alert-threshold(-t): record = {cpu: 80, memory: 90, disk: 95} # Alert thresholds
|
||||
--check-interval(-i): int = 60 # Check interval in seconds
|
||||
--alert-endpoint(-e): string # Alert webhook endpoint
|
||||
] -> nothing {
|
||||
print $"🔍 Starting health monitoring with thresholds: ($alert_threshold)"
|
||||
|
||||
while true {
|
||||
try {
|
||||
use ../observability/collect.nu *
|
||||
let status = (status-check)
|
||||
|
||||
# Check for threshold violations
|
||||
mut alerts = []
|
||||
|
||||
# CPU check
|
||||
if ($status.metrics.system.cpu.load_1m? | default 0) > ($alert_threshold.cpu / 10.0) {
|
||||
$alerts = ($alerts | append {
|
||||
type: "cpu_high"
|
||||
severity: "warning"
|
||||
message: $"High CPU load: ($status.metrics.system.cpu.load_1m)"
|
||||
threshold: $alert_threshold.cpu
|
||||
current_value: $status.metrics.system.cpu.load_1m
|
||||
})
|
||||
}
|
||||
|
||||
# Memory check
|
||||
if ($status.metrics.system.memory.usage_percent? | default 0) > $alert_threshold.memory {
|
||||
$alerts = ($alerts | append {
|
||||
type: "memory_high"
|
||||
severity: "critical"
|
||||
message: $"High memory usage: ($status.metrics.system.memory.usage_percent)%"
|
||||
threshold: $alert_threshold.memory
|
||||
current_value: $status.metrics.system.memory.usage_percent
|
||||
})
|
||||
}
|
||||
|
||||
# Disk check
|
||||
try {
|
||||
let high_disk_usage = ($status.metrics.system.disk | where {|disk|
|
||||
($disk.percent | str replace "%" "" | into float) > $alert_threshold.disk
|
||||
})
|
||||
|
||||
if ($high_disk_usage | length) > 0 {
|
||||
for disk in $high_disk_usage {
|
||||
$alerts = ($alerts | append {
|
||||
type: "disk_high"
|
||||
severity: "critical"
|
||||
message: $"High disk usage on ($disk.mount): ($disk.percent)"
|
||||
threshold: $alert_threshold.disk
|
||||
current_value: ($disk.percent | str replace "%" "" | into float)
|
||||
filesystem: $disk.filesystem
|
||||
mount: $disk.mount
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
|
||||
# Send alerts if any
|
||||
if ($alerts | length) > 0 {
|
||||
let alert_payload = {
|
||||
timestamp: (date now | format date "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
hostname: ($env.HOSTNAME? | default "unknown")
|
||||
alert_count: ($alerts | length)
|
||||
alerts: $alerts
|
||||
system_status: $status
|
||||
}
|
||||
|
||||
# Send to telemetry endpoint
|
||||
let result = (send-telemetry $alert_payload --endpoint ($alert_endpoint | default ($env.NUSHELL_TELEMETRY_ENDPOINT? | default "")))
|
||||
|
||||
if $result.success {
|
||||
print $"🚨 Sent ($alerts | length) alerts to monitoring system"
|
||||
} else {
|
||||
print $"❌ Failed to send alerts: ($result.error)"
|
||||
}
|
||||
|
||||
# Also log alerts locally
|
||||
$alerts | each { |alert|
|
||||
print $"⚠️ ALERT: ($alert.type) - ($alert.message)"
|
||||
}
|
||||
}
|
||||
|
||||
# Send regular health status
|
||||
let health_payload = {
|
||||
type: "health_check"
|
||||
timestamp: (date now | format date "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
status: $status
|
||||
}
|
||||
|
||||
send-telemetry $health_payload | ignore
|
||||
|
||||
} catch { |err|
|
||||
print $"❌ Health monitoring error: ($err | get msg)"
|
||||
}
|
||||
|
||||
sleep ($check_interval * 1sec)
|
||||
}
|
||||
}
|
||||
|
||||
# Initialize telemetry configuration
|
||||
export def init-telemetry [
|
||||
--endpoint(-e): string # Telemetry endpoint URL
|
||||
--format(-f): string = "json" # Default format
|
||||
--enable-health(-h) # Enable health monitoring
|
||||
--config-file(-c): string # Save configuration to file
|
||||
] -> record {
|
||||
let config = {
|
||||
endpoint: ($endpoint | default "")
|
||||
format: $format
|
||||
health_monitoring: ($enable_health | default false)
|
||||
created: (date now | format date "%Y-%m-%d %H:%M:%S")
|
||||
version: "1.0.0"
|
||||
}
|
||||
|
||||
# Set environment variables
|
||||
$env.NUSHELL_TELEMETRY_ENDPOINT = ($endpoint | default "")
|
||||
$env.NUSHELL_TELEMETRY_FORMAT = $format
|
||||
$env.NUSHELL_TELEMETRY_ENABLED = "true"
|
||||
|
||||
# Save configuration if file specified
|
||||
if ($config_file | is-not-empty) {
|
||||
try {
|
||||
$config | to json | save $config_file
|
||||
print $"📝 Telemetry configuration saved to ($config_file)"
|
||||
} catch { |err|
|
||||
print $"⚠️ Failed to save configuration: ($err | get msg)"
|
||||
}
|
||||
}
|
||||
|
||||
print $"🔧 Telemetry initialized:"
|
||||
print $" Endpoint: ($config.endpoint)"
|
||||
print $" Format: ($config.format)"
|
||||
print $" Health monitoring: ($config.health_monitoring)"
|
||||
|
||||
return $config
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue