agregar utilidad para ssd
This commit is contained in:
parent
23fc456614
commit
74e778345e
|
|
@ -0,0 +1,573 @@
|
|||
#!/usr/bin/env bash
|
||||
# ssd-health.sh — SSD/NVMe health + wear + filesystem usage summary
|
||||
# Exit codes: 0=OK, 1=WARNING/INCONCLUSIVE, 2=CRITICAL
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
MODE="text" # text|json
|
||||
DETAIL=0
|
||||
ONLY_DEV="" # user requested device (may be /dev/sda, sda, /dev/sda1, etc.)
|
||||
TIMEOUT_SECS=8
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
Usage:
|
||||
ssd-health.sh [--json] [--detail] [--dev <blockdev>] [--timeout <secs>]
|
||||
|
||||
Notes:
|
||||
--dev accepts: sda | /dev/sda | nvme0n1 | /dev/nvme0n1 | /dev/sda1 (partition -> parent disk)
|
||||
|
||||
Examples:
|
||||
sudo ./ssd-health.sh
|
||||
sudo ./ssd-health.sh --json
|
||||
sudo ./ssd-health.sh --dev /dev/sda
|
||||
sudo ./ssd-health.sh --dev /dev/nvme0n1
|
||||
EOF
|
||||
}
|
||||
|
||||
need_cmd() { command -v "$1" >/dev/null 2>&1; }
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--json) MODE="json"; shift ;;
|
||||
--detail) DETAIL=1; shift ;;
|
||||
--dev|--disk) ONLY_DEV="${2:-}"; shift 2 ;;
|
||||
--timeout) TIMEOUT_SECS="${2:-8}"; shift 2 ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) echo "Unknown arg: $1" >&2; usage; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if ! need_cmd lsblk || ! need_cmd awk || ! need_cmd sed || ! need_cmd grep; then
|
||||
echo "Missing required base commands (lsblk/awk/sed/grep)." >&2
|
||||
exit 2
|
||||
fi
|
||||
if ! need_cmd smartctl; then
|
||||
echo "Missing smartctl. Install smartmontools:" >&2
|
||||
echo " sudo apt-get update && sudo apt-get install -y smartmontools" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# Normalize user input into a disk name (e.g. sda, nvme0n1).
|
||||
# Accepts /dev/sda, sda, /dev/sda1 (partition), etc.
|
||||
normalize_to_disk_name() {
|
||||
local arg="$1"
|
||||
[[ -z "$arg" ]] && return 1
|
||||
|
||||
# Strip /dev/ prefix if present.
|
||||
arg="${arg#/dev/}"
|
||||
|
||||
# If it's a partition, map to parent disk using lsblk.
|
||||
# Example: sda1 -> sda, nvme0n1p2 -> nvme0n1
|
||||
if lsblk -no TYPE "/dev/$arg" 2>/dev/null | grep -qx "part"; then
|
||||
local parent
|
||||
parent="$(lsblk -no PKNAME "/dev/$arg" 2>/dev/null | head -n1 || true)"
|
||||
[[ -n "$parent" ]] && { echo "$parent"; return 0; }
|
||||
fi
|
||||
|
||||
# If it's already a disk, return it.
|
||||
if lsblk -no TYPE "/dev/$arg" 2>/dev/null | grep -qx "disk"; then
|
||||
echo "$arg"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Unknown.
|
||||
return 1
|
||||
}
|
||||
|
||||
# Determine SMART device type (if any) via smartctl scan-open.
|
||||
smart_scan_type_for() {
|
||||
local dev="/dev/$1"
|
||||
local line
|
||||
line="$(smartctl --scan-open 2>/dev/null | awk -v d="$dev" '$1==d {print; exit}')" || true
|
||||
if [[ -n "$line" ]]; then
|
||||
echo "$line" | awk '{for (i=1;i<=NF;i++) if ($i=="-d") {print $(i+1); exit}}'
|
||||
else
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
|
||||
smart_run() {
|
||||
local name="$1"
|
||||
local dev="/dev/$name"
|
||||
local dtype="$2"
|
||||
local args=(-a -H -i -A -n standby)
|
||||
|
||||
if need_cmd timeout; then
|
||||
if [[ -n "$dtype" ]]; then
|
||||
timeout "${TIMEOUT_SECS}s" smartctl "${args[@]}" -d "$dtype" "$dev" 2>/dev/null || true
|
||||
else
|
||||
timeout "${TIMEOUT_SECS}s" smartctl "${args[@]}" "$dev" 2>/dev/null || true
|
||||
fi
|
||||
else
|
||||
if [[ -n "$dtype" ]]; then
|
||||
smartctl "${args[@]}" -d "$dtype" "$dev" 2>/dev/null || true
|
||||
else
|
||||
smartctl "${args[@]}" "$dev" 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# Detect if smartctl output is usable (bridge might block SMART, timeout, etc.)
|
||||
smart_access_state() {
|
||||
# Heuristics: if we see the START OF INFORMATION SECTION or NVMe SMART section, it's likely usable.
|
||||
awk '
|
||||
BEGIN {found=0}
|
||||
/=== START OF INFORMATION SECTION ===/ {print "OK"; found=1; exit}
|
||||
/SMART\/Health Information/ {print "OK"; found=1; exit}
|
||||
/NVMe Log 0x02/ {print "OK"; found=1; exit}
|
||||
/Device does not support SMART/ {print "UNSUPPORTED"; found=1; exit}
|
||||
/Unknown USB bridge/ {print "UNSUPPORTED"; found=1; exit}
|
||||
END { if (!found) print "UNAVAILABLE" }
|
||||
'
|
||||
}
|
||||
|
||||
parse_health() {
|
||||
awk '
|
||||
/overall-health self-assessment test result:/ {
|
||||
if ($0 ~ /PASSED|OK/) print "PASSED"; else print "FAILED"; found=1; exit
|
||||
}
|
||||
/SMART Health Status:/ {
|
||||
if ($0 ~ /OK/) print "PASSED"; else print "FAILED"; found=1; exit
|
||||
}
|
||||
END { if (!found) print "UNKNOWN" }
|
||||
'
|
||||
}
|
||||
|
||||
parse_temp_c() {
|
||||
awk '
|
||||
/Temperature:/ && $0 ~ /Celsius/ { for(i=1;i<=NF;i++) if ($i ~ /^[0-9]+$/) {print $i; exit} }
|
||||
/Current Drive Temperature:/ { for(i=1;i<=NF;i++) if ($i ~ /^[0-9]+$/) {print $i; exit} }
|
||||
/Temperature_Celsius/ {print $10; exit}
|
||||
/Temperature_Internal/ {print $10; exit}
|
||||
END { }
|
||||
'
|
||||
}
|
||||
|
||||
parse_ata_attr_raw() {
|
||||
local attr="$1"
|
||||
awk -v a="$attr" '$2==a {print $10; exit}'
|
||||
}
|
||||
|
||||
parse_nvme_int() {
|
||||
local label="$1"
|
||||
awk -v l="$label" '
|
||||
index($0,l":")==1 {
|
||||
gsub(/[^0-9]/,"");
|
||||
if ($0=="") print ""; else print $0;
|
||||
exit
|
||||
}
|
||||
'
|
||||
}
|
||||
|
||||
parse_nvme_bytes_units_written() {
|
||||
awk '
|
||||
index($0,"Data Units Written:")==1 {
|
||||
match($0, /\[[0-9.,]+[[:space:]]*[A-Za-z]+\]/);
|
||||
if (RSTART>0) { print substr($0, RSTART+1, RLENGTH-2); } else print "";
|
||||
exit
|
||||
}
|
||||
'
|
||||
}
|
||||
|
||||
parse_nvme_bytes_units_read() {
|
||||
awk '
|
||||
index($0,"Data Units Read:")==1 {
|
||||
match($0, /\[[0-9.,]+[[:space:]]*[A-Za-z]+\]/);
|
||||
if (RSTART>0) { print substr($0, RSTART+1, RLENGTH-2); } else print "";
|
||||
exit
|
||||
}
|
||||
'
|
||||
}
|
||||
|
||||
json_escape() {
|
||||
python3 - <<'PY' "$1"
|
||||
import json,sys
|
||||
print(json.dumps(sys.argv[1]))
|
||||
PY
|
||||
}
|
||||
|
||||
# Determine wear percentage if available.
|
||||
# Returns: "used_pct|remaining_pct|source|confidence"
|
||||
# - NVMe: used_pct is Percentage Used (0..100) -> REAL
|
||||
# - ATA: tries known life attributes; if found and looks like 0..100, treated as "remaining" (BEST EFFORT)
|
||||
wear_probe() {
|
||||
local smart_out="$1"
|
||||
|
||||
# NVMe (reliable)
|
||||
local nvme_used
|
||||
nvme_used="$(printf "%s\n" "$smart_out" | parse_nvme_int "Percentage Used")"
|
||||
if [[ "$nvme_used" =~ ^[0-9]+$ ]]; then
|
||||
local rem=$(( 100 - nvme_used ))
|
||||
if (( rem < 0 )); then rem=0; fi
|
||||
echo "${nvme_used}|${rem}|nvme_percentage_used|real"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# ATA (best effort; vendor-specific)
|
||||
# Prefer explicit "remain" semantics first.
|
||||
local v
|
||||
v="$(printf "%s\n" "$smart_out" | awk '$2=="Percent_Lifetime_Remain" {print $10; exit}')"
|
||||
if [[ "$v" =~ ^[0-9]+$ ]] && (( v>=0 && v<=100 )); then
|
||||
echo "$((100 - v))|$v|ata_percent_lifetime_remain|best_effort"
|
||||
return 0
|
||||
fi
|
||||
|
||||
v="$(printf "%s\n" "$smart_out" | awk '$2=="SSD_Life_Left" {print $10; exit}')"
|
||||
if [[ "$v" =~ ^[0-9]+$ ]] && (( v>=0 && v<=100 )); then
|
||||
echo "$((100 - v))|$v|ata_ssd_life_left|best_effort"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Media_Wearout_Indicator is often "remaining", but not guaranteed → best_effort.
|
||||
v="$(printf "%s\n" "$smart_out" | awk '$2=="Media_Wearout_Indicator" {print $10; exit}')"
|
||||
if [[ "$v" =~ ^[0-9]+$ ]] && (( v>=0 && v<=100 )); then
|
||||
echo "$((100 - v))|$v|ata_media_wearout_indicator_assumed_remaining|best_effort"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "|||unknown|none"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Enumerate candidate disks: TYPE=disk and ROTA=0 (SSD-ish)
|
||||
list_disks() {
|
||||
if [[ -n "$ONLY_DEV" ]]; then
|
||||
local dn
|
||||
if dn="$(normalize_to_disk_name "$ONLY_DEV")"; then
|
||||
echo "$dn"
|
||||
else
|
||||
echo "ERROR: --dev '$ONLY_DEV' is not a valid disk/partition path on this host." >&2
|
||||
exit 2
|
||||
fi
|
||||
return
|
||||
fi
|
||||
lsblk -dn -o NAME,TYPE,ROTA 2>/dev/null \
|
||||
| awk '$2=="disk" {print $1, $3}' \
|
||||
| awk '$2==0 {print $1}'
|
||||
}
|
||||
|
||||
# Filesystem usage for mounted partitions belonging to a disk
|
||||
fs_usage_for_disk() {
|
||||
local disk="$1"
|
||||
lsblk -nr "/dev/$disk" -o NAME,MOUNTPOINT,FSTYPE,SIZE 2>/dev/null \
|
||||
| awk 'NF>=4 && $2!="-" && $2!="" {print $1 "|" $2 "|" $3 "|" $4}'
|
||||
}
|
||||
|
||||
# Severity logic
|
||||
severity_from_metrics() {
|
||||
local health="$1"
|
||||
local temp="${2:-}"
|
||||
local pct_used="${3:-}" # wear used (higher is worse) if known
|
||||
local realloc="${4:-0}"
|
||||
local pending="${5:-0}"
|
||||
local offline="${6:-0}"
|
||||
local nvme_crit_warn="${7:-0}"
|
||||
local media_err="${8:-0}"
|
||||
|
||||
local sev=0
|
||||
|
||||
if [[ "$health" == "FAILED" ]]; then sev=2; fi
|
||||
if [[ "$nvme_crit_warn" =~ ^[0-9]+$ ]] && (( nvme_crit_warn > 0 )); then sev=2; fi
|
||||
if [[ "$realloc" =~ ^[0-9]+$ ]] && (( realloc > 0 )); then sev=2; fi
|
||||
if [[ "$pending" =~ ^[0-9]+$ ]] && (( pending > 0 )); then sev=2; fi
|
||||
if [[ "$offline" =~ ^[0-9]+$ ]] && (( offline > 0 )); then sev=2; fi
|
||||
if [[ "$media_err" =~ ^[0-9]+$ ]] && (( media_err > 0 )); then sev=2; fi
|
||||
|
||||
if [[ "$pct_used" =~ ^[0-9]+$ ]]; then
|
||||
if (( pct_used >= 90 )); then sev=2
|
||||
elif (( pct_used >= 70 )) && (( sev < 1 )); then sev=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$temp" =~ ^[0-9]+$ ]]; then
|
||||
if (( temp >= 70 )); then sev=2
|
||||
elif (( temp >= 60 )) && (( sev < 1 )); then sev=1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "$sev"
|
||||
}
|
||||
|
||||
main_text() {
|
||||
local overall_sev=0
|
||||
|
||||
echo "=== SSD/NVMe Health Report ($(date -Is)) ==="
|
||||
echo
|
||||
|
||||
while read -r d; do
|
||||
[[ -z "$d" ]] && continue
|
||||
|
||||
local dtype model serial size tran rota smart_out
|
||||
dtype="$(smart_scan_type_for "$d")"
|
||||
model="$(lsblk -dn -o MODEL "/dev/$d" 2>/dev/null | sed 's/[[:space:]]\+/ /g' | sed 's/^ //;s/ $//')"
|
||||
serial="$(lsblk -dn -o SERIAL "/dev/$d" 2>/dev/null | sed 's/[[:space:]]\+/ /g' | sed 's/^ //;s/ $//')"
|
||||
size="$(lsblk -dn -o SIZE "/dev/$d" 2>/dev/null | head -n1)"
|
||||
tran="$(lsblk -dn -o TRAN "/dev/$d" 2>/dev/null | head -n1)"
|
||||
rota="$(lsblk -dn -o ROTA "/dev/$d" 2>/dev/null | head -n1)"
|
||||
|
||||
smart_out="$(smart_run "$d" "$dtype")"
|
||||
local smart_access
|
||||
smart_access="$(printf "%s\n" "$smart_out" | smart_access_state)"
|
||||
|
||||
local health temp
|
||||
health="$(printf "%s\n" "$smart_out" | parse_health)"
|
||||
temp="$(printf "%s\n" "$smart_out" | parse_temp_c)"
|
||||
|
||||
# NVMe fields (if present)
|
||||
local nvme_crit_warn media_err unsafe_shutdowns poh duw dur
|
||||
nvme_crit_warn="$(printf "%s\n" "$smart_out" | parse_nvme_int "Critical Warning")"
|
||||
media_err="$(printf "%s\n" "$smart_out" | parse_nvme_int "Media and Data Integrity Errors")"
|
||||
unsafe_shutdowns="$(printf "%s\n" "$smart_out" | parse_nvme_int "Unsafe Shutdowns")"
|
||||
poh="$(printf "%s\n" "$smart_out" | parse_nvme_int "Power On Hours")"
|
||||
duw="$(printf "%s\n" "$smart_out" | parse_nvme_bytes_units_written)"
|
||||
dur="$(printf "%s\n" "$smart_out" | parse_nvme_bytes_units_read)"
|
||||
|
||||
# ATA attributes (if present)
|
||||
local realloc pending offline crc poh_ata
|
||||
realloc="$(printf "%s\n" "$smart_out" | parse_ata_attr_raw "Reallocated_Sector_Ct")"
|
||||
pending="$(printf "%s\n" "$smart_out" | parse_ata_attr_raw "Current_Pending_Sector")"
|
||||
offline="$(printf "%s\n" "$smart_out" | parse_ata_attr_raw "Offline_Uncorrectable")"
|
||||
crc="$(printf "%s\n" "$smart_out" | parse_ata_attr_raw "UDMA_CRC_Error_Count")"
|
||||
poh_ata="$(printf "%s\n" "$smart_out" | parse_ata_attr_raw "Power_On_Hours")"
|
||||
|
||||
# Wear probe (preferred for "disk agotarse")
|
||||
local wear_used wear_rem wear_source wear_conf
|
||||
IFS="|" read -r wear_used wear_rem wear_source wear_conf < <(wear_probe "$smart_out" || true)
|
||||
|
||||
# Can we diagnose wear?
|
||||
local wear_known=0
|
||||
[[ "$wear_used" =~ ^[0-9]+$ ]] && wear_known=1
|
||||
|
||||
# If SMART is not accessible, we cannot diagnose properly.
|
||||
local diagnosable=1
|
||||
if [[ "$smart_access" != "OK" ]]; then
|
||||
diagnosable=0
|
||||
elif [[ "$wear_known" != "1" && "$health" == "UNKNOWN" ]]; then
|
||||
# We got some SMART text but not enough to assess wear nor health.
|
||||
diagnosable=0
|
||||
fi
|
||||
|
||||
local sev
|
||||
sev="$(severity_from_metrics "$health" "$temp" "${wear_used:-}" "${realloc:-0}" "${pending:-0}" "${offline:-0}" "${nvme_crit_warn:-0}" "${media_err:-0}")"
|
||||
|
||||
# If not diagnosable, force WARNING (so it gets noticed).
|
||||
if [[ "$diagnosable" == "0" && "$sev" -lt 1 ]]; then
|
||||
sev=1
|
||||
fi
|
||||
|
||||
local status
|
||||
if [[ "$sev" == "2" ]]; then
|
||||
status="CRITICAL"
|
||||
elif [[ "$diagnosable" == "0" ]]; then
|
||||
status="INCONCLUSIVE"
|
||||
elif [[ "$sev" == "1" ]]; then
|
||||
status="WARNING"
|
||||
else
|
||||
status="OK"
|
||||
fi
|
||||
|
||||
(( sev > overall_sev )) && overall_sev="$sev"
|
||||
|
||||
echo "Device: /dev/$d ($status)"
|
||||
echo " Model: ${model:-?} Serial: ${serial:-?} Size: ${size:-?} TRAN: ${tran:-?} ROTA: ${rota:-?} smartctl -d: ${dtype:-auto}"
|
||||
echo " SMART access: $smart_access SMART health: $health Temp(C): ${temp:-?}"
|
||||
|
||||
if [[ "$wear_known" == "1" ]]; then
|
||||
echo " Wear: used=${wear_used}% remaining=${wear_rem}% source=${wear_source} confidence=${wear_conf}"
|
||||
else
|
||||
echo " Wear: (cannot determine) — no usable wear metric exposed (common on some USB bridges / drives)"
|
||||
fi
|
||||
|
||||
if [[ -n "${nvme_crit_warn:-}" || -n "${media_err:-}" || -n "${unsafe_shutdowns:-}" || -n "${poh:-}" ]]; then
|
||||
echo " NVMe: crit_warn=${nvme_crit_warn:-?} media_err=${media_err:-?} unsafe_shutdowns=${unsafe_shutdowns:-?} power_on_hours=${poh:-?}"
|
||||
[[ -n "${duw:-}" ]] && echo " NVMe: data_units_written~${duw}"
|
||||
[[ -n "${dur:-}" ]] && echo " NVMe: data_units_read~${dur}"
|
||||
fi
|
||||
|
||||
if [[ -n "${realloc:-}" || -n "${pending:-}" || -n "${offline:-}" || -n "${crc:-}" ]]; then
|
||||
echo " ATA: realloc=${realloc:-?} pending=${pending:-?} offline_uncorrectable=${offline:-?} crc_errors=${crc:-?} power_on_hours=${poh_ata:-?}"
|
||||
fi
|
||||
|
||||
if [[ "$diagnosable" == "0" ]]; then
|
||||
echo " DIAGNOSTIC NOTE: Insufficient SMART data to give a reliable wear/health diagnosis for this device."
|
||||
fi
|
||||
|
||||
echo " Filesystems:"
|
||||
local fsline foundfs=0
|
||||
while IFS= read -r fsline; do
|
||||
[[ -z "$fsline" ]] && continue
|
||||
foundfs=1
|
||||
local pname mnt fstype psize
|
||||
pname="${fsline%%|*}"; fsline="${fsline#*|}"
|
||||
mnt="${fsline%%|*}"; fsline="${fsline#*|}"
|
||||
fstype="${fsline%%|*}"; psize="${fsline#*|}"
|
||||
local dfline
|
||||
dfline="$(df -P "$mnt" 2>/dev/null | awk 'NR==2{print $2,$3,$4,$5}')"
|
||||
echo " /dev/$pname mnt=$mnt fstype=$fstype part_size=$psize df(total used avail use%)=($dfline)"
|
||||
done < <(fs_usage_for_disk "$d" || true)
|
||||
[[ "$foundfs" == "0" ]] && echo " (no mounted partitions found)"
|
||||
|
||||
if [[ "$DETAIL" == "1" ]]; then
|
||||
echo " --- smartctl raw (trimmed) ---"
|
||||
printf "%s\n" "$smart_out" | sed -n '1,140p'
|
||||
echo " --- end ---"
|
||||
fi
|
||||
|
||||
echo
|
||||
done < <(list_disks)
|
||||
|
||||
if [[ "$overall_sev" == "2" ]]; then
|
||||
echo "Overall: CRITICAL"
|
||||
exit 2
|
||||
elif [[ "$overall_sev" == "1" ]]; then
|
||||
echo "Overall: WARNING/INCONCLUSIVE"
|
||||
exit 1
|
||||
else
|
||||
echo "Overall: OK"
|
||||
exit 0
|
||||
fi
|
||||
}
|
||||
|
||||
main_json() {
|
||||
local overall_sev=0
|
||||
local first=1
|
||||
|
||||
echo "{"
|
||||
echo " \"timestamp\": \"$(date -Is)\","
|
||||
echo " \"devices\": ["
|
||||
|
||||
while read -r d; do
|
||||
[[ -z "$d" ]] && continue
|
||||
|
||||
local dtype model serial size tran rota smart_out
|
||||
dtype="$(smart_scan_type_for "$d")"
|
||||
model="$(lsblk -dn -o MODEL "/dev/$d" 2>/dev/null | sed 's/[[:space:]]\+/ /g' | sed 's/^ //;s/ $//')"
|
||||
serial="$(lsblk -dn -o SERIAL "/dev/$d" 2>/dev/null | sed 's/[[:space:]]\+/ /g' | sed 's/^ //;s/ $//')"
|
||||
size="$(lsblk -dn -o SIZE "/dev/$d" 2>/dev/null | head -n1)"
|
||||
tran="$(lsblk -dn -o TRAN "/dev/$d" 2>/dev/null | head -n1)"
|
||||
rota="$(lsblk -dn -o ROTA "/dev/$d" 2>/dev/null | head -n1)"
|
||||
|
||||
smart_out="$(smart_run "$d" "$dtype")"
|
||||
local smart_access
|
||||
smart_access="$(printf "%s\n" "$smart_out" | smart_access_state)"
|
||||
|
||||
local health temp
|
||||
health="$(printf "%s\n" "$smart_out" | parse_health)"
|
||||
temp="$(printf "%s\n" "$smart_out" | parse_temp_c)"
|
||||
|
||||
local nvme_crit_warn media_err unsafe_shutdowns poh duw dur
|
||||
nvme_crit_warn="$(printf "%s\n" "$smart_out" | parse_nvme_int "Critical Warning")"
|
||||
media_err="$(printf "%s\n" "$smart_out" | parse_nvme_int "Media and Data Integrity Errors")"
|
||||
unsafe_shutdowns="$(printf "%s\n" "$smart_out" | parse_nvme_int "Unsafe Shutdowns")"
|
||||
poh="$(printf "%s\n" "$smart_out" | parse_nvme_int "Power On Hours")"
|
||||
duw="$(printf "%s\n" "$smart_out" | parse_nvme_bytes_units_written)"
|
||||
dur="$(printf "%s\n" "$smart_out" | parse_nvme_bytes_units_read)"
|
||||
|
||||
local realloc pending offline crc poh_ata
|
||||
realloc="$(printf "%s\n" "$smart_out" | parse_ata_attr_raw "Reallocated_Sector_Ct")"
|
||||
pending="$(printf "%s\n" "$smart_out" | parse_ata_attr_raw "Current_Pending_Sector")"
|
||||
offline="$(printf "%s\n" "$smart_out" | parse_ata_attr_raw "Offline_Uncorrectable")"
|
||||
crc="$(printf "%s\n" "$smart_out" | parse_ata_attr_raw "UDMA_CRC_Error_Count")"
|
||||
poh_ata="$(printf "%s\n" "$smart_out" | parse_ata_attr_raw "Power_On_Hours")"
|
||||
|
||||
local wear_used wear_rem wear_source wear_conf
|
||||
IFS="|" read -r wear_used wear_rem wear_source wear_conf < <(wear_probe "$smart_out" || true)
|
||||
|
||||
local wear_known=0
|
||||
[[ "$wear_used" =~ ^[0-9]+$ ]] && wear_known=1
|
||||
|
||||
local diagnosable=1
|
||||
if [[ "$smart_access" != "OK" ]]; then
|
||||
diagnosable=0
|
||||
elif [[ "$wear_known" != "1" && "$health" == "UNKNOWN" ]]; then
|
||||
diagnosable=0
|
||||
fi
|
||||
|
||||
local sev
|
||||
sev="$(severity_from_metrics "$health" "$temp" "${wear_used:-}" "${realloc:-0}" "${pending:-0}" "${offline:-0}" "${nvme_crit_warn:-0}" "${media_err:-0}")"
|
||||
if [[ "$diagnosable" == "0" && "$sev" -lt 1 ]]; then
|
||||
sev=1
|
||||
fi
|
||||
|
||||
(( sev > overall_sev )) && overall_sev="$sev"
|
||||
|
||||
[[ "$first" == "1" ]] || echo " ,"
|
||||
first=0
|
||||
|
||||
# Filesystems
|
||||
local fs_items=""
|
||||
local fsline
|
||||
while IFS= read -r fsline; do
|
||||
[[ -z "$fsline" ]] && continue
|
||||
local pname mnt fstype psize
|
||||
pname="${fsline%%|*}"; fsline="${fsline#*|}"
|
||||
mnt="${fsline%%|*}"; fsline="${fsline#*|}"
|
||||
fstype="${fsline%%|*}"; psize="${fsline#*|}"
|
||||
local df_total df_used df_avail df_usep
|
||||
read -r df_total df_used df_avail df_usep < <(df -P "$mnt" 2>/dev/null | awk 'NR==2{print $2,$3,$4,$5}')
|
||||
fs_items+="{\"partition\":\"/dev/$pname\",\"mount\":$(json_escape "$mnt"),\"fstype\":$(json_escape "$fstype"),\"part_size\":$(json_escape "$psize"),\"df\":{\"total\":$(json_escape "${df_total:-}"),\"used\":$(json_escape "${df_used:-}"),\"avail\":$(json_escape "${df_avail:-}"),\"use_pct\":$(json_escape "${df_usep:-}")}},"
|
||||
done < <(fs_usage_for_disk "$d" || true)
|
||||
local fs_json="[]"
|
||||
[[ -n "$fs_items" ]] && fs_json="[${fs_items%,}]"
|
||||
|
||||
cat <<EOF
|
||||
{
|
||||
"device": "/dev/$d",
|
||||
"model": $(json_escape "${model:-}"),
|
||||
"serial": $(json_escape "${serial:-}"),
|
||||
"size": $(json_escape "${size:-}"),
|
||||
"tran": $(json_escape "${tran:-}"),
|
||||
"rota": $(json_escape "${rota:-}"),
|
||||
"smartctl_device_type": $(json_escape "${dtype:-auto}"),
|
||||
"smart_access": $(json_escape "$smart_access"),
|
||||
"health": $(json_escape "$health"),
|
||||
"temp_c": $(json_escape "${temp:-}"),
|
||||
"wear": {
|
||||
"used_pct": $(json_escape "${wear_used:-}"),
|
||||
"remaining_pct": $(json_escape "${wear_rem:-}"),
|
||||
"source": $(json_escape "${wear_source:-}"),
|
||||
"confidence": $(json_escape "${wear_conf:-}"),
|
||||
"known": $wear_known
|
||||
},
|
||||
"nvme": {
|
||||
"critical_warning": $(json_escape "${nvme_crit_warn:-}"),
|
||||
"media_errors": $(json_escape "${media_err:-}"),
|
||||
"unsafe_shutdowns": $(json_escape "${unsafe_shutdowns:-}"),
|
||||
"power_on_hours": $(json_escape "${poh:-}"),
|
||||
"data_units_written": $(json_escape "${duw:-}"),
|
||||
"data_units_read": $(json_escape "${dur:-}")
|
||||
},
|
||||
"ata": {
|
||||
"reallocated_sectors": $(json_escape "${realloc:-}"),
|
||||
"pending_sectors": $(json_escape "${pending:-}"),
|
||||
"offline_uncorrectable": $(json_escape "${offline:-}"),
|
||||
"crc_errors": $(json_escape "${crc:-}"),
|
||||
"power_on_hours": $(json_escape "${poh_ata:-}")
|
||||
},
|
||||
"diagnosable": $diagnosable,
|
||||
"severity": $sev,
|
||||
"filesystems": $fs_json
|
||||
}
|
||||
EOF
|
||||
|
||||
done < <(list_disks)
|
||||
|
||||
echo " ],"
|
||||
|
||||
local overall="OK"
|
||||
[[ "$overall_sev" == "1" ]] && overall="WARNING/INCONCLUSIVE"
|
||||
[[ "$overall_sev" == "2" ]] && overall="CRITICAL"
|
||||
|
||||
echo " \"overall\": \"$overall\","
|
||||
echo " \"overall_severity\": $overall_sev"
|
||||
echo "}"
|
||||
|
||||
exit "$overall_sev"
|
||||
}
|
||||
|
||||
if [[ "$MODE" == "json" ]]; then
|
||||
main_json
|
||||
else
|
||||
main_text
|
||||
fi
|
||||
Loading…
Reference in New Issue