diff --git a/psi-alerts.sh b/psi-alerts.sh index 6013780..9f04c61 100755 --- a/psi-alerts.sh +++ b/psi-alerts.sh @@ -52,9 +52,25 @@ ssh_user="${SSH_USER}" ssh_id_path="${SSH_ID_PATH}" clear_threshold="${CLEAR_THRESHOLD}" notification_cmd="${NOTIFICATION_CMD}" +notification_hist_cmd="${NOTIFICATION_HIST_CMD}" notification_opts="${NOTIFICATION_OPTS}" id_idx="${NOTIFICATION_IDX}" +get_ssh_agent () { + for dir in /tmp/ssh-*; do + if [[ -O ${dir} ]]; then + # only choose the last agent + export SSH_AUTH_SOCK=$(ls ${dir}/agent.* | tail -1) + fi + done + + if [[ -S ${SSH_AUTH_SOCK} ]]; then + # we found an ssh_agent socket + true + else + false + fi +} print_psi () { local psi_file="${1}" cat "${(P)$(tr '[[:upper:]]' '[[:lower:]]' <<< "${psi_file}")}" @@ -101,13 +117,27 @@ send_notice () { esac integer notification_id - if ! notification_id=$(ssh -q -i "${ssh_id_path}" "${ssh_user}@${ssh_host}" -p ${ssh_port} \ - "${notification_cmd} ${notification_opts} '${host}: PSI ${psi_type} triggered!' '${psi}'"); then - print "Connection to notification daemon failed!" >&2 - false + if get_ssh_agent && [[ -S ${SSH_AUTH_SOCK} ]]; then + if ! notification_id=$(ssh -q "${ssh_user}@${ssh_host}" -p ${ssh_port} \ + "${notification_cmd} ${notification_opts} '${host}: PSI ${psi_type} triggered!' '${psi}'"); then + print "Connection to notification daemon failed!" >&2 + false + else + echo ${notification_id} + true + fi + elif [[ -n "${ssh_id_path}" ]]; then + if ! notification_id=$(ssh -q -i "${ssh_id_path}" "${ssh_user}@${ssh_host}" -p ${ssh_port} \ + "${notification_cmd} ${notification_opts} '${host}: PSI ${psi_type} triggered!' '${psi}'"); then + print "Connection to notification daemon failed!" >&2 + false + else + echo ${notification_id} + true + fi else - echo ${notification_id} - true + echo "No SSH notifications configured. Returning." >&2 + false fi #set +x } @@ -171,11 +201,11 @@ is_clear () { local psi_type="${1}" local psi_file="$(tr '[[:upper:]]' '[[:lower:]]' <<< "${psi_type}")" - local avg10=$(grep some "${(P)psi_file}" | awk '{print $2}' | awk -F= '{print $2}') + local avg300=$(grep some "${(P)psi_file}" | awk '{print $4}' | awk -F= '{print $2}') local prev=$(grep some "${(P)psi_file}" | awk '{print $5}' | awk -F= '{print $2}') - if [[ ${avg10} -lt ${clear_threshold} ]]; then + if [[ ${avg300} -lt ${clear_threshold} ]]; then true else false @@ -213,35 +243,69 @@ check_dunst_id_is_visible () { local dunst_id="${1}" typeset -a ids - if ids=$(ssh -q "${ssh_host}" -p ${ssh_port} \ - "dunstctl history | jq '.data[0][][${id_idx}].data'"); then - echo "Connection to dunst failed!" >&2 - return 2 + if ! ids=$(ssh -q "${ssh_host}" -p ${ssh_port} -l "${ssh_user}" \ + "${notification_hist_cmd} | jq '.data[0][].id.data'"); then + if ! ids=$(ssh -qi "${ssh_id_path}" -p ${ssh_port} -l "${ssh_user}" \ + "${ssh_host}" "${notification_hist_cmd} | jq '.data[0][].id.data'"); then + echo "Connection to dunst failed!" >&2 + return 2 + fi fi + # if the alert is visible, it's not in the dunst history if grep -qP "\b${dunst_id}\b" <<< "${ids}"; then - true - else false + else + true fi } -#set -x + local current_alarm="" local last_alarm="" typeset -A notice_sent typeset -A secs integer last_dunst_id=-1 +local last_line="" -journalctl -b 0 -fn 3 -u "${svc}" | \ -while read line; do +set -x +while true; do + local line=$(journalctl -u ${svc} -n1) + if [[ "${last_line}" == "${line}" ]]; then + # line hasn't changed since last run, do nothing + sleep 1 + continue + fi + last_line="${line}" + local now=$(date +%s) + local last_timestamp=$(date -d $(awk '{print $1" "$2" "$3}' <<< "${line}") +%s) + local time_diff=$(( now - last_timestamp )) + if (( time_diff >= 3 )); then + # haven't seen a monitor alert for 3 seconds, see if we can clear them + if [[ -n "${current_alarms}" ]]; then + typeset -a alarms=( $(tr '|' ' ' <<< "$current_alarms") ) + for alarm in ${alarms}; do + integer elapsed=$(( now - ${secs[${alarm}]} )) + if is_clear "${alarm}" && (( elapsed >= 300 )); then + current_alarms=$(sed -E "s/${alarm}\|?//" <<< "${current_alarms}") + last_alarm=$(awk -F'|' '{print $NF}' <<< "${current_alarms}") + unset "notice_sent[${alarm}]" + unset "secs[${alarm}]" + fi + done + fi + sleep 1 + continue + fi local psi_type="$(grep -Eo "(CPU|MEM|IO) PSI event" <<< "${line}" | grep -Eo "CPU|MEM|IO")" if [[ -n "${psi_type}" ]]; then - secs+=(${psi_type} $(date +%s)) + secs+=(${psi_type} ${now}) if [[ "${psi_type}" != "${last_alarm}" ]]; then if [[ ! ${notice_sent[${psi_type}]} ]]; then last_dunst_id=$(exec_notices "${psi_type}" "${current_alarms}") notice_sent+=(${psi_type} true) - elif (( last_dunst_id >= 0 )) && ! check_dunst_id_is_visible "${last_dunst_id}"; then + elif (( last_dunst_id >= 0 )) && check_dunst_id_is_visible "${last_dunst_id}"; then + last_alarm="${psi_type}" + sleep 1 continue fi fi @@ -253,16 +317,8 @@ while read line; do current_alarms="${current_alarms}|${psi_type}" fi fi - else - typeset -a alarms=( $(tr '|' ' ' <<< "$current_alarms") ) - for alarm in ${alarms}; do - integer elapsed=$(( $(date +%s) - ${secs[${alarm}]} )) - if is_clear "${alarm}" && (( elapsed > 300 )); then - current_alarms=$(sed -E "s/${alarm}\|?//" <<< "${current_alarms}") - last_alarm=$(awk -F'|' '{print $NF}' <<< "${current_alarms}") - fi - done fi + sleep 1 done -#set +x +set +x