diff --git a/content/post/2017-11.md b/content/post/2017-11.md index b75f1a728..382708db7 100644 --- a/content/post/2017-11.md +++ b/content/post/2017-11.md @@ -476,3 +476,29 @@ proxy_set_header User-Agent $ua; - I will whip up something in nginx later - Run system updates on CGSpace and reboot the server - Re-deploy latest `5_x-prod` branch on CGSpace and DSpace Test (includes the clickable thumbnails, CCAFS phase II project tags, and updated news text) + +## 2017-11-09 + +- Awesome, it seems my bot mapping stuff in nginx actually reduced the number of Tomcat sessions used by the CIAT scraper today, total requests and unique sessions: + +``` +# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep '09/Nov/2017' | grep -c 104.196.152.243 +5769 +$ grep 104.196.152.243 dspace.log.2017-11-09 | grep -o -E 'session_id=[A-Z0-9]{32}' | sort -n | uniq | wc -l +223 +``` + +- Versus the same stats for yesterday and the day before: + +``` +# zcat -f -- /var/log/nginx/access.log.1 /var/log/nginx/access.log.2.gz | grep '08/Nov/2017' | grep -c 104.196.152.243 +10216 +$ grep 104.196.152.243 dspace.log.2017-11-08 | grep -o -E 'session_id=[A-Z0-9]{32}' | sort -n | uniq | wc -l +2592 +# zcat -f -- /var/log/nginx/access.log.2.gz /var/log/nginx/access.log.3.gz | grep '07/Nov/2017' | grep -c 104.196.152.243 +8120 +$ grep 104.196.152.243 dspace.log.2017-11-07 | grep -o -E 'session_id=[A-Z0-9]{32}' | sort -n | uniq | wc -l +3506 +``` + +- The number of total requests vary by a few thousand, but the number of sessions is over *ten times less*! diff --git a/public/2017-11/index.html b/public/2017-11/index.html index 8448e2932..3fb07be9e 100644 --- a/public/2017-11/index.html +++ b/public/2017-11/index.html @@ -38,7 +38,7 @@ COPY 54701 - + @@ -86,9 +86,9 @@ COPY 54701 "@type": "BlogPosting", "headline": "November, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-11/", - "wordCount": "2725", + "wordCount": "2877", "datePublished": "2017-11-02T09:37:54+02:00", - "dateModified": "2017-11-08T22:26:37+02:00", + "dateModified": "2017-11-08T22:36:15+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -678,6 +678,36 @@ proxy_set_header User-Agent $ua;
  • Re-deploy latest 5_x-prod branch on CGSpace and DSpace Test (includes the clickable thumbnails, CCAFS phase II project tags, and updated news text)
  • +

    2017-11-09

    + + + +
    # cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep '09/Nov/2017' | grep -c 104.196.152.243 
    +5769
    +$ grep 104.196.152.243 dspace.log.2017-11-09 | grep -o -E 'session_id=[A-Z0-9]{32}' | sort -n | uniq | wc -l
    +223
    +
    + + + +
    # zcat -f -- /var/log/nginx/access.log.1 /var/log/nginx/access.log.2.gz | grep '08/Nov/2017' | grep -c 104.196.152.243 
    +10216
    +$ grep 104.196.152.243 dspace.log.2017-11-08 | grep -o -E 'session_id=[A-Z0-9]{32}' | sort -n | uniq | wc -l
    +2592
    +# zcat -f -- /var/log/nginx/access.log.2.gz /var/log/nginx/access.log.3.gz | grep '07/Nov/2017' | grep -c 104.196.152.243                                          
    +8120
    +$ grep 104.196.152.243 dspace.log.2017-11-07 | grep -o -E 'session_id=[A-Z0-9]{32}' | sort -n | uniq | wc -l         
    +3506
    +
    + + + diff --git a/public/sitemap.xml b/public/sitemap.xml index 530ecf7aa..bf181634a 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2017-11/ - 2017-11-08T22:26:37+02:00 + 2017-11-08T22:36:15+02:00 @@ -134,7 +134,7 @@ https://alanorth.github.io/cgspace-notes/ - 2017-11-08T22:26:37+02:00 + 2017-11-08T22:36:15+02:00 0 @@ -145,7 +145,7 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2017-11-08T22:26:37+02:00 + 2017-11-08T22:36:15+02:00 0 @@ -157,13 +157,13 @@ https://alanorth.github.io/cgspace-notes/post/ - 2017-11-08T22:26:37+02:00 + 2017-11-08T22:36:15+02:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2017-11-08T22:26:37+02:00 + 2017-11-08T22:36:15+02:00 0