Nk_traffic_breakdown
Description
nk_traffic_breakdown()
is a function for generating a report on the traffic of all domains hosted on a server. The script is divided into several functions, each of which extracts a different type of information from the log files of the domains. It uses the other functions to generate a table with the following information for each domain:
Domain
: the domain name
Requests
: the total number of requests made to the domain
GET/POST
: the number of requests that were GET
and POST
%-Requests
: the percentage of requests made to the domain out of the total requests for all domains
Bandwidth
: the total bandwidth used by the domain
%-Bandwidth
: the percentage of bandwidth used by the domain out of the total bandwidth for all domains
XMLRPC
: the number of requests made to the domain that contain “xmlrpc
” in their URL
Bots
: the number of requests made to the domain by bots, crawlers or spiders.
Example
[root@cloudvpsserver ~]# nk_traffic_breakdown Domain Requests GET/POST %-Requests Bandwidth %-Bandwidth XMLRPC Bots nkern.net 8270 (3820/4288) 100% 77MB 100% 3529(42.6%%) 1358(16.4%%)
Code
nk_traffic_breakdown() { # This is basically a wrapper around gen_traffic_breakdown_body that does formmating. all_domains=$(nk_list_all_domains) gen_traffic_breakdown_body() { gen_latest_domlogs() { domain="$1" user="$(nk_user "$domain")" archive_dir="/home/$user/logs" # Find the two latest archived logs. for archive_log in $(find "$archive_dir" -type f | grep "$archive_dir/$domain-"); do # Get the epoch time and filename of each archived log. stat --format="%Y %n" "$archive_log" done | sort -rn | head -2 | awk '{print $2}' # Sort them by that timestamp and take the two largest numbers. (most recent) # We want the top two for https and http logs. } gen_num_requests() { # Get the number of requests per log. # This is the number of lines when both logs are catted together. for log in $latest_logs ; do zcat "$log" done | wc -l } gen_gets() { # Get the number of GET Requests per log. for log in $latest_logs ; do zgrep -c "GET" "$log" # Then sum them together. done | awk '{sum+=$1} END {print sum}' } gen_posts() { # Get the number of POST requests per log. for log in $latest_logs ; do zgrep -c "POST" "$log" done | awk '{sum+=$1} END {print sum}' } gen_bandwidth() { # Get the Total Bandwidth per log. for log in $latest_logs; do zcat "$log" | awk '{print $10}' | grep -v "-" done | awk '{sum+=$1} END {print sum}' } gen_total_requests() { # Get the total number of requests across all recent domlogs. gen_latest_domlogs_all() { # Run gen_latest_domlogs on every domain on the server. for domain in $all_domains ; do gen_latest_domlogs "$domain" done } # For every log in the result of gen_latest_domlogs_all for domlog in $(gen_latest_domlogs_all); do # Count the numbner of lines in the logs zcat "$domlog" | wc -l # Once the lines for each file have been printed, sum them. done | awk '{sum+=$1} END {print sum}' } gen_total_bandwidth() { # Generate the total bandwidth bewtween all matched logs. gen_latest_domlogs_all() { # Run gen_latest_domlogs for every domain on the server. Found via nk_list_all_domains. for domain in $all_domains ; do gen_latest_domlogs "$domain" done } # Now for all the logs found by gen_latest_domlogs_all for domlog in $(gen_latest_domlogs_all); do # print out the value of the 10th field. Which is the bytes transferred. # remove any results that are "-" as they'll mess up our addition later. zcat "$domlog" | awk '{print $10}' | grep -v "-" # Once they've all been printed out, sum them. done | awk '{sum+=$1} END {print sum}' } gen_xmlrpc() { # Check how many xmlrpc hits are in the logs. # First generate the list of all the matching domlogs. for log in $latest_logs; do # then count the number of times "xmlrpc" is mentioned. zgrep -c "xmlrpc" "$log" # Lastly sum them together. done | awk '{sum+=$1} END {print sum}' } gen_bots() { # Check how many hits in the logs are from bots. # For every log found by gen_latest_domlogs. for log in $latest_logs; do # count the number of times that the string "bot" "crawl" or "spider" pops up in the log pops up in the log zgrep -cEi '(bot|crawl|spider)' "$log" # Once both logs have their count, sum them. done | awk '{sum+=$1} END {print sum}' } # First calculate the values for total_bandwidth and total_requests. # These are the same no matter which domain you're looking at so they're defined outside of the loop. total_bandwidth="$(gen_total_bandwidth)" total_requests="$(gen_total_requests)" # Writ the header for our table. echo "Domain | Requests GET/POST %-Requests | Bandwidth %-Bandwidth | XMLRPC Bots |" echo "--- - --- --- --- - --- --- - --- --- -" # Now for every domain on the server, found with nk_list_all_domains for domain in $all_domains ; do # Define these variable. # requests : The number of requests in the domlogs. Found with gen_num_requests # gets : The numbers of those requests that are GET. Found via gen_gets # posts : The number of those requests that are Post. found via gen_posts. # gets_posts : gets and posts formmatted together as such (GETS/POSTS) # percent_requests : The site is responsible for what percent of total requests. # bandwidth : The amount of bandwidth in the site's logs. found via gen_bandwidth # bandwidth_human : bandwidth formmated into human readable. ie kb, mb, gb # bandwidth_percent : The bandwidth for the site is what percent of total bandwidth. # xmlrpc : The number of requests to xmlrpc in the log. # xmlrpc_percent : Percent of site requests that are xmlrpc. # xmlrpc_result : Format xmlrpc and xmlrpc into a single result. ie 213(13%) # bots : The number of requests that are bots. # bots_percent : The bots are what percent of total traffic to the site. # bots_result : Format bots, and bots percent together. ie 143(9%) latest_logs="$(gen_latest_domlogs "$domain")" requests="$(gen_num_requests)" gets="$(gen_gets)" posts="$(gen_posts)" gets_posts="($gets/$posts)" percent_requests="$(nk_percent "$requests" "$total_requests")" bandwidth="$(gen_bandwidth)" bandwidth_human="$(numfmt --to=iec --suffix=B "$bandwidth")" bandwidth_percent="$(nk_percent "$bandwidth" "$total_bandwidth")" xmlrpc="$(gen_xmlrpc)" xmlrpc_percent="$(nk_percent "$xmlrpc" "$requests")" xmlrpc_result="$xmlrpc($xmlrpc_percent%)" bots="$(gen_bots)" bots_percent="$(nk_percent "$bots" "$requests")" bots_result="$bots($bots_percent%)" # Print out row values using the variable we defined above. echo "$domain | $requests $gets_posts $percent_requests | $bandwidth_human $bandwidth_percent | $xmlrpc_result $bots_result |" done | sort -rn -k 2 # Once the table rows have finished, sort them by highest request first. } # Run gen_traffic_breakdown_body and format it as a table. gen_traffic_breakdown_body | column -t }