Refactored to take into account ssh-agent, and time of last alert
This commit is contained in:
parent
15f6697dad
commit
0540fed30b
114
psi-alerts.sh
114
psi-alerts.sh
@ -52,9 +52,25 @@ ssh_user="${SSH_USER}"
|
|||||||
ssh_id_path="${SSH_ID_PATH}"
|
ssh_id_path="${SSH_ID_PATH}"
|
||||||
clear_threshold="${CLEAR_THRESHOLD}"
|
clear_threshold="${CLEAR_THRESHOLD}"
|
||||||
notification_cmd="${NOTIFICATION_CMD}"
|
notification_cmd="${NOTIFICATION_CMD}"
|
||||||
|
notification_hist_cmd="${NOTIFICATION_HIST_CMD}"
|
||||||
notification_opts="${NOTIFICATION_OPTS}"
|
notification_opts="${NOTIFICATION_OPTS}"
|
||||||
id_idx="${NOTIFICATION_IDX}"
|
id_idx="${NOTIFICATION_IDX}"
|
||||||
|
|
||||||
|
get_ssh_agent () {
|
||||||
|
for dir in /tmp/ssh-*; do
|
||||||
|
if [[ -O ${dir} ]]; then
|
||||||
|
# only choose the last agent
|
||||||
|
export SSH_AUTH_SOCK=$(ls ${dir}/agent.* | tail -1)
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -S ${SSH_AUTH_SOCK} ]]; then
|
||||||
|
# we found an ssh_agent socket
|
||||||
|
true
|
||||||
|
else
|
||||||
|
false
|
||||||
|
fi
|
||||||
|
}
|
||||||
print_psi () {
|
print_psi () {
|
||||||
local psi_file="${1}"
|
local psi_file="${1}"
|
||||||
cat "${(P)$(tr '[[:upper:]]' '[[:lower:]]' <<< "${psi_file}")}"
|
cat "${(P)$(tr '[[:upper:]]' '[[:lower:]]' <<< "${psi_file}")}"
|
||||||
@ -101,13 +117,27 @@ send_notice () {
|
|||||||
esac
|
esac
|
||||||
|
|
||||||
integer notification_id
|
integer notification_id
|
||||||
if ! notification_id=$(ssh -q -i "${ssh_id_path}" "${ssh_user}@${ssh_host}" -p ${ssh_port} \
|
if get_ssh_agent && [[ -S ${SSH_AUTH_SOCK} ]]; then
|
||||||
"${notification_cmd} ${notification_opts} '${host}: PSI ${psi_type} triggered!' '${psi}'"); then
|
if ! notification_id=$(ssh -q "${ssh_user}@${ssh_host}" -p ${ssh_port} \
|
||||||
print "Connection to notification daemon failed!" >&2
|
"${notification_cmd} ${notification_opts} '${host}: PSI ${psi_type} triggered!' '${psi}'"); then
|
||||||
false
|
print "Connection to notification daemon failed!" >&2
|
||||||
|
false
|
||||||
|
else
|
||||||
|
echo ${notification_id}
|
||||||
|
true
|
||||||
|
fi
|
||||||
|
elif [[ -n "${ssh_id_path}" ]]; then
|
||||||
|
if ! notification_id=$(ssh -q -i "${ssh_id_path}" "${ssh_user}@${ssh_host}" -p ${ssh_port} \
|
||||||
|
"${notification_cmd} ${notification_opts} '${host}: PSI ${psi_type} triggered!' '${psi}'"); then
|
||||||
|
print "Connection to notification daemon failed!" >&2
|
||||||
|
false
|
||||||
|
else
|
||||||
|
echo ${notification_id}
|
||||||
|
true
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
echo ${notification_id}
|
echo "No SSH notifications configured. Returning." >&2
|
||||||
true
|
false
|
||||||
fi
|
fi
|
||||||
#set +x
|
#set +x
|
||||||
}
|
}
|
||||||
@ -171,11 +201,11 @@ is_clear () {
|
|||||||
local psi_type="${1}"
|
local psi_type="${1}"
|
||||||
local psi_file="$(tr '[[:upper:]]' '[[:lower:]]' <<< "${psi_type}")"
|
local psi_file="$(tr '[[:upper:]]' '[[:lower:]]' <<< "${psi_type}")"
|
||||||
|
|
||||||
local avg10=$(grep some "${(P)psi_file}" | awk '{print $2}' | awk -F= '{print $2}')
|
local avg300=$(grep some "${(P)psi_file}" | awk '{print $4}' | awk -F= '{print $2}')
|
||||||
local prev=$(grep some "${(P)psi_file}" | awk '{print $5}' | awk -F= '{print $2}')
|
local prev=$(grep some "${(P)psi_file}" | awk '{print $5}' | awk -F= '{print $2}')
|
||||||
|
|
||||||
|
|
||||||
if [[ ${avg10} -lt ${clear_threshold} ]]; then
|
if [[ ${avg300} -lt ${clear_threshold} ]]; then
|
||||||
true
|
true
|
||||||
else
|
else
|
||||||
false
|
false
|
||||||
@ -213,35 +243,69 @@ check_dunst_id_is_visible () {
|
|||||||
local dunst_id="${1}"
|
local dunst_id="${1}"
|
||||||
|
|
||||||
typeset -a ids
|
typeset -a ids
|
||||||
if ids=$(ssh -q "${ssh_host}" -p ${ssh_port} \
|
if ! ids=$(ssh -q "${ssh_host}" -p ${ssh_port} -l "${ssh_user}" \
|
||||||
"dunstctl history | jq '.data[0][][${id_idx}].data'"); then
|
"${notification_hist_cmd} | jq '.data[0][].id.data'"); then
|
||||||
echo "Connection to dunst failed!" >&2
|
if ! ids=$(ssh -qi "${ssh_id_path}" -p ${ssh_port} -l "${ssh_user}" \
|
||||||
return 2
|
"${ssh_host}" "${notification_hist_cmd} | jq '.data[0][].id.data'"); then
|
||||||
|
echo "Connection to dunst failed!" >&2
|
||||||
|
return 2
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# if the alert is visible, it's not in the dunst history
|
||||||
if grep -qP "\b${dunst_id}\b" <<< "${ids}"; then
|
if grep -qP "\b${dunst_id}\b" <<< "${ids}"; then
|
||||||
true
|
|
||||||
else
|
|
||||||
false
|
false
|
||||||
|
else
|
||||||
|
true
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
#set -x
|
|
||||||
local current_alarm=""
|
local current_alarm=""
|
||||||
local last_alarm=""
|
local last_alarm=""
|
||||||
typeset -A notice_sent
|
typeset -A notice_sent
|
||||||
typeset -A secs
|
typeset -A secs
|
||||||
integer last_dunst_id=-1
|
integer last_dunst_id=-1
|
||||||
|
local last_line=""
|
||||||
|
|
||||||
journalctl -b 0 -fn 3 -u "${svc}" | \
|
set -x
|
||||||
while read line; do
|
while true; do
|
||||||
|
local line=$(journalctl -u ${svc} -n1)
|
||||||
|
if [[ "${last_line}" == "${line}" ]]; then
|
||||||
|
# line hasn't changed since last run, do nothing
|
||||||
|
sleep 1
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
last_line="${line}"
|
||||||
|
local now=$(date +%s)
|
||||||
|
local last_timestamp=$(date -d $(awk '{print $1" "$2" "$3}' <<< "${line}") +%s)
|
||||||
|
local time_diff=$(( now - last_timestamp ))
|
||||||
|
if (( time_diff >= 3 )); then
|
||||||
|
# haven't seen a monitor alert for 3 seconds, see if we can clear them
|
||||||
|
if [[ -n "${current_alarms}" ]]; then
|
||||||
|
typeset -a alarms=( $(tr '|' ' ' <<< "$current_alarms") )
|
||||||
|
for alarm in ${alarms}; do
|
||||||
|
integer elapsed=$(( now - ${secs[${alarm}]} ))
|
||||||
|
if is_clear "${alarm}" && (( elapsed >= 300 )); then
|
||||||
|
current_alarms=$(sed -E "s/${alarm}\|?//" <<< "${current_alarms}")
|
||||||
|
last_alarm=$(awk -F'|' '{print $NF}' <<< "${current_alarms}")
|
||||||
|
unset "notice_sent[${alarm}]"
|
||||||
|
unset "secs[${alarm}]"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
continue
|
||||||
|
fi
|
||||||
local psi_type="$(grep -Eo "(CPU|MEM|IO) PSI event" <<< "${line}" | grep -Eo "CPU|MEM|IO")"
|
local psi_type="$(grep -Eo "(CPU|MEM|IO) PSI event" <<< "${line}" | grep -Eo "CPU|MEM|IO")"
|
||||||
if [[ -n "${psi_type}" ]]; then
|
if [[ -n "${psi_type}" ]]; then
|
||||||
secs+=(${psi_type} $(date +%s))
|
secs+=(${psi_type} ${now})
|
||||||
if [[ "${psi_type}" != "${last_alarm}" ]]; then
|
if [[ "${psi_type}" != "${last_alarm}" ]]; then
|
||||||
if [[ ! ${notice_sent[${psi_type}]} ]]; then
|
if [[ ! ${notice_sent[${psi_type}]} ]]; then
|
||||||
last_dunst_id=$(exec_notices "${psi_type}" "${current_alarms}")
|
last_dunst_id=$(exec_notices "${psi_type}" "${current_alarms}")
|
||||||
notice_sent+=(${psi_type} true)
|
notice_sent+=(${psi_type} true)
|
||||||
elif (( last_dunst_id >= 0 )) && ! check_dunst_id_is_visible "${last_dunst_id}"; then
|
elif (( last_dunst_id >= 0 )) && check_dunst_id_is_visible "${last_dunst_id}"; then
|
||||||
|
last_alarm="${psi_type}"
|
||||||
|
sleep 1
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
@ -253,16 +317,8 @@ while read line; do
|
|||||||
current_alarms="${current_alarms}|${psi_type}"
|
current_alarms="${current_alarms}|${psi_type}"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
else
|
|
||||||
typeset -a alarms=( $(tr '|' ' ' <<< "$current_alarms") )
|
|
||||||
for alarm in ${alarms}; do
|
|
||||||
integer elapsed=$(( $(date +%s) - ${secs[${alarm}]} ))
|
|
||||||
if is_clear "${alarm}" && (( elapsed > 300 )); then
|
|
||||||
current_alarms=$(sed -E "s/${alarm}\|?//" <<< "${current_alarms}")
|
|
||||||
last_alarm=$(awk -F'|' '{print $NF}' <<< "${current_alarms}")
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
fi
|
fi
|
||||||
|
sleep 1
|
||||||
done
|
done
|
||||||
#set +x
|
set +x
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user