Refactored to take into account ssh-agent, and time of last alert
This commit is contained in:
		
							
								
								
									
										114
									
								
								psi-alerts.sh
									
									
									
									
									
								
							
							
						
						
									
										114
									
								
								psi-alerts.sh
									
									
									
									
									
								
							@@ -52,9 +52,25 @@ ssh_user="${SSH_USER}"
 | 
			
		||||
ssh_id_path="${SSH_ID_PATH}"
 | 
			
		||||
clear_threshold="${CLEAR_THRESHOLD}"
 | 
			
		||||
notification_cmd="${NOTIFICATION_CMD}"
 | 
			
		||||
notification_hist_cmd="${NOTIFICATION_HIST_CMD}"
 | 
			
		||||
notification_opts="${NOTIFICATION_OPTS}"
 | 
			
		||||
id_idx="${NOTIFICATION_IDX}"
 | 
			
		||||
 | 
			
		||||
get_ssh_agent () {
 | 
			
		||||
    for dir in /tmp/ssh-*; do
 | 
			
		||||
        if [[ -O ${dir} ]]; then
 | 
			
		||||
            # only choose the last agent
 | 
			
		||||
            export SSH_AUTH_SOCK=$(ls ${dir}/agent.* | tail -1)
 | 
			
		||||
        fi
 | 
			
		||||
    done
 | 
			
		||||
 | 
			
		||||
    if [[ -S ${SSH_AUTH_SOCK} ]]; then
 | 
			
		||||
        # we found an ssh_agent socket
 | 
			
		||||
        true
 | 
			
		||||
    else
 | 
			
		||||
        false
 | 
			
		||||
    fi
 | 
			
		||||
}
 | 
			
		||||
print_psi () {
 | 
			
		||||
    local psi_file="${1}"
 | 
			
		||||
    cat "${(P)$(tr '[[:upper:]]' '[[:lower:]]' <<< "${psi_file}")}"
 | 
			
		||||
@@ -101,13 +117,27 @@ send_notice () {
 | 
			
		||||
    esac
 | 
			
		||||
 | 
			
		||||
    integer notification_id
 | 
			
		||||
    if ! notification_id=$(ssh -q -i "${ssh_id_path}" "${ssh_user}@${ssh_host}" -p ${ssh_port} \
 | 
			
		||||
        "${notification_cmd} ${notification_opts} '${host}:  PSI ${psi_type} triggered!' '${psi}'"); then
 | 
			
		||||
            print "Connection to notification daemon failed!" >&2
 | 
			
		||||
            false
 | 
			
		||||
    if get_ssh_agent && [[ -S ${SSH_AUTH_SOCK} ]]; then
 | 
			
		||||
        if ! notification_id=$(ssh -q "${ssh_user}@${ssh_host}" -p ${ssh_port} \
 | 
			
		||||
            "${notification_cmd} ${notification_opts} '${host}:  PSI ${psi_type} triggered!' '${psi}'"); then
 | 
			
		||||
                print "Connection to notification daemon failed!" >&2
 | 
			
		||||
                false
 | 
			
		||||
        else
 | 
			
		||||
            echo ${notification_id}
 | 
			
		||||
            true
 | 
			
		||||
        fi
 | 
			
		||||
    elif [[ -n "${ssh_id_path}" ]]; then
 | 
			
		||||
        if ! notification_id=$(ssh -q -i "${ssh_id_path}" "${ssh_user}@${ssh_host}" -p ${ssh_port} \
 | 
			
		||||
            "${notification_cmd} ${notification_opts} '${host}:  PSI ${psi_type} triggered!' '${psi}'"); then
 | 
			
		||||
                print "Connection to notification daemon failed!" >&2
 | 
			
		||||
                false
 | 
			
		||||
        else
 | 
			
		||||
            echo ${notification_id}
 | 
			
		||||
            true
 | 
			
		||||
        fi
 | 
			
		||||
    else
 | 
			
		||||
        echo ${notification_id}
 | 
			
		||||
        true
 | 
			
		||||
        echo "No SSH notifications configured.  Returning." >&2
 | 
			
		||||
        false
 | 
			
		||||
    fi
 | 
			
		||||
    #set +x
 | 
			
		||||
}
 | 
			
		||||
@@ -171,11 +201,11 @@ is_clear () {
 | 
			
		||||
    local psi_type="${1}"
 | 
			
		||||
    local psi_file="$(tr '[[:upper:]]' '[[:lower:]]' <<< "${psi_type}")"
 | 
			
		||||
 | 
			
		||||
    local avg10=$(grep some "${(P)psi_file}" | awk '{print $2}' | awk -F= '{print $2}')
 | 
			
		||||
    local avg300=$(grep some "${(P)psi_file}" | awk '{print $4}' | awk -F= '{print $2}')
 | 
			
		||||
    local prev=$(grep some "${(P)psi_file}" | awk '{print $5}' | awk -F= '{print $2}')
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    if [[ ${avg10} -lt ${clear_threshold} ]]; then
 | 
			
		||||
    if [[ ${avg300} -lt ${clear_threshold} ]]; then
 | 
			
		||||
        true
 | 
			
		||||
    else
 | 
			
		||||
        false
 | 
			
		||||
@@ -213,35 +243,69 @@ check_dunst_id_is_visible () {
 | 
			
		||||
    local dunst_id="${1}"
 | 
			
		||||
 | 
			
		||||
    typeset -a ids
 | 
			
		||||
    if ids=$(ssh -q "${ssh_host}" -p ${ssh_port} \
 | 
			
		||||
        "dunstctl history | jq '.data[0][][${id_idx}].data'"); then
 | 
			
		||||
            echo "Connection to dunst failed!" >&2
 | 
			
		||||
            return 2
 | 
			
		||||
    if ! ids=$(ssh -q "${ssh_host}" -p ${ssh_port} -l "${ssh_user}" \
 | 
			
		||||
        "${notification_hist_cmd} | jq '.data[0][].id.data'"); then
 | 
			
		||||
        if ! ids=$(ssh -qi "${ssh_id_path}" -p ${ssh_port} -l "${ssh_user}" \
 | 
			
		||||
            "${ssh_host}" "${notification_hist_cmd} | jq '.data[0][].id.data'"); then
 | 
			
		||||
                echo "Connection to dunst failed!" >&2
 | 
			
		||||
                return 2
 | 
			
		||||
        fi
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
    # if the alert is visible, it's not in the dunst history
 | 
			
		||||
    if grep -qP "\b${dunst_id}\b" <<< "${ids}"; then
 | 
			
		||||
        true
 | 
			
		||||
    else
 | 
			
		||||
        false
 | 
			
		||||
    else
 | 
			
		||||
        true
 | 
			
		||||
    fi
 | 
			
		||||
}
 | 
			
		||||
#set -x
 | 
			
		||||
 | 
			
		||||
local current_alarm=""
 | 
			
		||||
local last_alarm=""
 | 
			
		||||
typeset -A notice_sent
 | 
			
		||||
typeset -A secs
 | 
			
		||||
integer last_dunst_id=-1
 | 
			
		||||
local last_line=""
 | 
			
		||||
 | 
			
		||||
journalctl -b 0 -fn 3 -u "${svc}"  | \
 | 
			
		||||
while read line; do
 | 
			
		||||
set -x
 | 
			
		||||
while true; do
 | 
			
		||||
    local line=$(journalctl -u ${svc} -n1)
 | 
			
		||||
    if [[ "${last_line}" == "${line}" ]]; then
 | 
			
		||||
        # line hasn't changed since last run, do nothing
 | 
			
		||||
        sleep 1
 | 
			
		||||
        continue
 | 
			
		||||
    fi
 | 
			
		||||
    last_line="${line}"
 | 
			
		||||
    local now=$(date +%s)
 | 
			
		||||
    local last_timestamp=$(date -d $(awk '{print $1" "$2" "$3}' <<< "${line}") +%s)
 | 
			
		||||
    local time_diff=$(( now - last_timestamp ))
 | 
			
		||||
    if (( time_diff >= 3 )); then
 | 
			
		||||
        # haven't seen a monitor alert for 3 seconds, see if we can clear them
 | 
			
		||||
        if [[ -n "${current_alarms}" ]]; then
 | 
			
		||||
            typeset -a alarms=( $(tr '|' ' ' <<< "$current_alarms") )
 | 
			
		||||
            for alarm in ${alarms}; do
 | 
			
		||||
                integer elapsed=$(( now - ${secs[${alarm}]} ))
 | 
			
		||||
                if is_clear "${alarm}" && (( elapsed >= 300 )); then
 | 
			
		||||
                    current_alarms=$(sed -E "s/${alarm}\|?//" <<< "${current_alarms}")
 | 
			
		||||
                    last_alarm=$(awk -F'|' '{print $NF}' <<< "${current_alarms}")
 | 
			
		||||
                    unset "notice_sent[${alarm}]"
 | 
			
		||||
                    unset "secs[${alarm}]"
 | 
			
		||||
                fi
 | 
			
		||||
            done
 | 
			
		||||
        fi
 | 
			
		||||
        sleep 1
 | 
			
		||||
        continue
 | 
			
		||||
    fi
 | 
			
		||||
    local psi_type="$(grep -Eo "(CPU|MEM|IO) PSI event" <<< "${line}" | grep -Eo "CPU|MEM|IO")"
 | 
			
		||||
    if [[ -n "${psi_type}" ]]; then
 | 
			
		||||
        secs+=(${psi_type} $(date +%s))
 | 
			
		||||
        secs+=(${psi_type} ${now})
 | 
			
		||||
        if [[ "${psi_type}" != "${last_alarm}" ]]; then
 | 
			
		||||
            if [[ ! ${notice_sent[${psi_type}]} ]]; then
 | 
			
		||||
                last_dunst_id=$(exec_notices "${psi_type}" "${current_alarms}")
 | 
			
		||||
                notice_sent+=(${psi_type} true)
 | 
			
		||||
            elif (( last_dunst_id >= 0 )) && ! check_dunst_id_is_visible "${last_dunst_id}"; then
 | 
			
		||||
            elif (( last_dunst_id >= 0 )) && check_dunst_id_is_visible "${last_dunst_id}"; then
 | 
			
		||||
                last_alarm="${psi_type}"
 | 
			
		||||
                sleep 1
 | 
			
		||||
                continue
 | 
			
		||||
            fi
 | 
			
		||||
        fi
 | 
			
		||||
@@ -253,16 +317,8 @@ while read line; do
 | 
			
		||||
                current_alarms="${current_alarms}|${psi_type}"
 | 
			
		||||
            fi
 | 
			
		||||
        fi
 | 
			
		||||
    else
 | 
			
		||||
        typeset -a alarms=( $(tr '|' ' ' <<< "$current_alarms") )
 | 
			
		||||
        for alarm in ${alarms}; do
 | 
			
		||||
            integer elapsed=$(( $(date +%s) - ${secs[${alarm}]} ))
 | 
			
		||||
            if is_clear "${alarm}" && (( elapsed > 300 )); then
 | 
			
		||||
                current_alarms=$(sed -E "s/${alarm}\|?//" <<< "${current_alarms}")
 | 
			
		||||
                last_alarm=$(awk -F'|' '{print $NF}' <<< "${current_alarms}")
 | 
			
		||||
            fi
 | 
			
		||||
        done
 | 
			
		||||
    fi
 | 
			
		||||
    sleep 1
 | 
			
		||||
done
 | 
			
		||||
#set +x
 | 
			
		||||
set +x
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user