UNPKG

statsd/utils/statsd-timer-metric-counts.sh

Version:

1.85 kBapplication/x-shView Raw

1#!/bin/bash
2# is your statsd machine maxing out cpu? ... unable to pull udp packets out of the buffer
3# at a fast enough rate? (see `netstat -su` ) timer metrics are by far the most cpu intensive
4# and tuning the sampling of those is key to keeping cpu load under control.
5# this tool (to be run on your graphite server) shows for all your timing metric keys how many packets
6# it accepted in a given interval like 1hour. using this information you can make informed decisions as
7# to which keys to sample and how much.
8# note that in some bad cases you might see no effect after increasing your amount of sampling, the explanation is
9# that you were first sending so many packets of which only a fraction were being processed and shown in these counts, 
10# that even after sampling more statsd still can't process them all and your count stays in the same range.
11
12graphite_url=http://<your graphite url>
13whisper_dir=/var/lib/carbon/whisper
14timers_subdir=stats/timers
15
16# you may want to adjust this function according to the characteristics of your environment
17# I wish whisper-fetch.py supported the same function API as the http endpoint does, then I could avoid http here.
18function get_indicative_count () {
19    metric=$1
20    url=$graphite_url'/render/?from=-1h&target=summarize('$metric',%221hour%22,%22sum%22)&format=csv'
21    wget -q "$url" -O - | tail -n -1 | sed 's#.*,##' # yields a number ending with .0 or whitespace if values were None
22}
23
24function list_timer_count_files () {
25    find "$whisper_dir/$timers_subdir" -name 'count.wsp' | sed -e "s#$whisper_dir/\($timers_subdir/.*/count\).wsp#\1#" -e 's#/#.#g'
26}
27
28function list_timer_counts () {
29    for metric in $(list_timer_count_files); do
30        echo "$metric $(get_indicative_count $metric)"
31    done
32}
33list_timer_counts | grep 'count .*\.0' | sort -n -k2

1	`#!/bin/bash`
2	`# is your statsd machine maxing out cpu? ... unable to pull udp packets out of the buffer`
3	# at a fast enough rate? (see `netstat -su` ) timer metrics are by far the most cpu intensive
4	`# and tuning the sampling of those is key to keeping cpu load under control.`
5	`# this tool (to be run on your graphite server) shows for all your timing metric keys how many packets`
6	`# it accepted in a given interval like 1hour. using this information you can make informed decisions as`
7	`# to which keys to sample and how much.`
8	`# note that in some bad cases you might see no effect after increasing your amount of sampling, the explanation is`
9	`# that you were first sending so many packets of which only a fraction were being processed and shown in these counts,`
10	`# that even after sampling more statsd still can't process them all and your count stays in the same range.`
11
12	`graphite_url=http://<your graphite url>`
13	`whisper_dir=/var/lib/carbon/whisper`
14	`timers_subdir=stats/timers`
15
16	`# you may want to adjust this function according to the characteristics of your environment`
17	`# I wish whisper-fetch.py supported the same function API as the http endpoint does, then I could avoid http here.`
18	`function get_indicative_count () {`
19	`metric=$1`
20	`url=$graphite_url'/render/?from=-1h&target=summarize('$metric',%221hour%22,%22sum%22)&format=csv'`
21	`wget -q "$url" -O - \| tail -n -1 \| sed 's#.*,##' # yields a number ending with .0 or whitespace if values were None`
22	`}`
23
24	`function list_timer_count_files () {`
25	`find "$whisper_dir/$timers_subdir" -name 'count.wsp' \| sed -e "s#$whisper_dir/\($timers_subdir/.*/count\).wsp#\1#" -e 's#/#.#g'`
26	`}`
27
28	`function list_timer_counts () {`
29	`for metric in $(list_timer_count_files); do`
30	`echo "$metric $(get_indicative_count $metric)"`
31	`done`
32	`}`
33	`list_timer_counts \| grep 'count .*\.0' \| sort -n -k2`