Commit bb0ec749 authored by tobias.watermann02's avatar tobias.watermann02
Browse files

Scripts for Omnipath Metrics via perfquery and opainfo

parent 943d717d
#!/bin/bash
# This will report some custom Omnipath info to telegraf/influx
#
# Adopted from infiniband.sh by: Watermann, Tobias <watermann@zib.de>
# date: March 9, 2020
isdebug=false # make it true to get debug info when calling from a terminal
#ignore_list=$(seq -f dge%03g 16 45; seq -f gwdo%03g 161 180) # The nodes which should be skipped
HOSTNAME="$(hostname -s)"
the_right_host_among() {
ISIN=false
for item in $1
do
if [ "$HOSTNAME" == "$item" ]; then
ISIN=true
break
fi
done
if $ISIN; then
return 0
else
return 1
fi
}
if the_right_host_among "$ignore_list"; then
exit 0
fi
INFLOGPATH=/dev/shm/omnipath-logger
INFLOGFILE=$INFLOGPATH/omnipath.log
get_metric() {
m=$(echo "$1" | sed -n "s/$2:\.*\([0-9]*\)/\1/p")
re='^[0-9]+$'
if ! [[ $m =~ $re ]] ; then
m=-1
fi
echo $m
}
debug() {
if $isdebug; then
echo "$1"
fi
}
METRICS32="SymbolErrorCounter LinkErrorRecoveryCounter LinkDownedCounter PortRcvErrors PortRcvRemotePhysicalErrors PortRcvSwitchRelayErrors PortXmitDiscards PortXmitConstraintErrors PortRcvConstraintErrors LocalLinkIntegrityErrors ExcessiveBufferOverrunErrors VL15Dropped"
METRICS64="PortXmitData PortRcvData PortXmitPkts PortRcvPkts PortUnicastXmitPkts PortUnicastRcvPkts PortMulticastXmitPkts PortMulticastRcvPkts"
METRICSOPA="Test"
# Initialize the array if the logs don't exist
if [ ! -d $INFLOGPATH ]; then
mkdir $INFLOGPATH && touch $INFLOGFILE
zm=""
for m in $METRICS32; do
zm="$m=0\n$zm"
done
for m in $METRICS64; do
zm="$m=0\n$zm"
done
echo -e "$zm" > $INFLOGFILE
fi
# Read previous metrics
declare -A prev_metrics
PREVTS=0
debug "Previous measurements:"
while IFS== read -r key value; do
if [ "$key" = "" ]; then continue; fi
if [ "$key" = "timestamp" ]; then PREVTS=$value; fi
if [ "$value" = "" ]; then value=0; fi
prev_metrics[$key]=$value
debug "$key = $value"
done < "$INFLOGFILE"
## Check for overflow of 32 bit counters
#MAX32=4000000000 # circa 300,000,000 less than maximum
#NEEDRESET32=false
#for m in $METRICS32; do
# if [ "${prev_metrics[$m]}" -gt "$MAX32" ]; then
# NEEDRESET32=true
# break
# fi
#done
debug "Calculating diffs:"
declare -A metrics
declare -A aggr_metrics
CURTS=$(date +%s)
FRACTS=$((CURTS - PREVTS))
if [ "$FRACTS" -le "0" ]; then
FRACTS=1
fi
ibout64=$(/sbin/perfquery -x)
debug "- 64 bit -"
for m in $METRICS64; do
cur_metric=$(get_metric "$ibout64" $m)
prev_metric=${prev_metrics[$m]}
diff=$(( (cur_metric - prev_metric) / FRACTS ))
aggr_metrics[$m]=$cur_metric
debug "$m: ( $cur_metric - $prev_metric ) / $FRACTS = $diff"
if [ "$diff" -ge "0" ]; then
metrics[$m]=$diff
else
metrics[$m]=$cur_metric
fi
done
debug "- 32 bit -"
if $NEEDRESET32; then
debug "- counters are reset -"
# ibout32=$(perfquery -r)
else
ibout32=$(/sbin/perfquery)
fi
for m in $METRICS32; do
cur_metric=$(get_metric "$ibout32" $m)
prev_metric=${prev_metrics[$m]}
diff=$((cur_metric - prev_metric))
aggr_metrics[$m]=$cur_metric
debug "$m: $cur_metric - $prev_metric = $diff"
if [ "$diff" -ge "0" ]; then
metrics[$m]=$diff
else
metrics[$m]=$cur_metric
fi
done
out="infiniband,host=${HOSTNAME}"
isfirst=true
for i in $METRICS32 $METRICS64; do
if $isfirst; then
out="$out "
isfirst=false
else
out="$out,"
fi
out="$out$i=${metrics[$i]}"
done
echo $out
# save the measurements in the file
mf=""
for m in $METRICS32 $METRICS64; do
mf="$m=${aggr_metrics[$m]}\n$mf"
done
mf="timestamp=$CURTS\n$mf"
echo -e "$mf" > $INFLOGFILE
debug "measurements are exported"
#!/bin/bash
# This will report some custom Omnipath info to telegraf/influx
#
# Adopted from infiniband.sh by: Watermann, Tobias <watermann@zib.de>
# date: March 9, 2020
isdebug=false # make it true to get debug info when calling from a terminal
#ignore_list=$(seq -f dge%03g 16 45; seq -f gwdo%03g 161 180) # The nodes which should be skipped
HOSTNAME="$(hostname -s)"
the_right_host_among() {
ISIN=false
for item in $1
do
if [ "$HOSTNAME" == "$item" ]; then
ISIN=true
break
fi
done
if $ISIN; then
return 0
else
return 1
fi
}
if the_right_host_among "$ignore_list"; then
exit 0
fi
INFLOGPATH=/dev/shm/omnipath-logger
INFLOGFILE=$INFLOGPATH/omnipath.log
get_metric() {
m=$(echo "$1" | sed -n "s/^[ \t]*$(echo $2 | sed "s/-/ /g")[ \t]*\([0-9]*\).*/\1/p")
re='[0-9]+$'
if ! [[ $m =~ $re ]] ; then
m=-1
fi
echo $m
}
debug() {
if $isdebug; then
echo "$1"
fi
}
METRICS32="SymbolErrorCounter LinkErrorRecoveryCounter LinkDownedCounter PortRcvErrors PortRcvRemotePhysicalErrors PortRcvSwitchRelayErrors PortXmitDiscards PortXmitConstraintErrors PortRcvConstraintErrors LocalLinkIntegrityErrors ExcessiveBufferOverrunErrors VL15Dropped"
METRICS64="PortXmitData PortRcvData PortXmitPkts PortRcvPkts PortUnicastXmitPkts PortUnicastRcvPkts PortMulticastXmitPkts PortMulticastRcvPkts"
METRICSOPA="Xmit-Data Xmit-Pkts MC-Xmt-Pkts Rcv-Data Rcv-Pkts MC-Rcv-Pkts"
# Initialize the array if the logs don't exist
if [ ! -d $INFLOGPATH ]; then
mkdir $INFLOGPATH && touch $INFLOGFILE
zm=""
for m in $METRICS; do
zm="$m=0\n$zm"
done
echo -e "$zm" > $INFLOGFILE
fi
# Read previous metrics
declare -A prev_metrics
PREVTS=0
debug "Previous measurements:"
while IFS== read -r key value; do
if [ "$key" = "" ]; then continue; fi
if [ "$key" = "timestamp" ]; then PREVTS=$value; fi
if [ "$value" = "" ]; then value=0; fi
prev_metrics[$key]=$value
debug "$key = $value"
done < "$INFLOGFILE"
debug "Calculating diffs:"
declare -A metrics
declare -A aggr_metrics
CURTS=$(date +%s)
FRACTS=$((CURTS - PREVTS))
if [ "$FRACTS" -le "0" ]; then
FRACTS=1
fi
opainfo=$(/sbin/opainfo -o stats)
for m in $METRICSOPA; do
echo $m
echo $(echo $m | sed "s/-/ /g")
echo ---
cur_metric=$(get_metric "$opainfo" $m)
echo $cur_metric
prev_metric=${prev_metrics[$m]}
diff=$(( (cur_metric - prev_metric) / FRACTS ))
aggr_metrics[$m]=$cur_metric
debug "$m: ( $cur_metric - $prev_metric ) / $FRACTS = $diff"
if [ "$diff" -ge "0" ]; then
metrics[$m]=$diff
else
metrics[$m]=$cur_metric
fi
done
out="omnipath,host=${HOSTNAME}"
isfirst=true
for i in $METRICSOPA; do
if $isfirst; then
out="$out "
isfirst=false
else
out="$out,"
fi
out="$out$i=${metrics[$i]}"
done
echo $out
# save the measurements in the file
mf=""
for m in $METRICSOPA; do
mf="$m=${aggr_metrics[$m]}\n$mf"
done
mf="timestamp=$CURTS\n$mf"
echo -e "$mf" > $INFLOGFILE
debug "measurements are exported"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment