diff --git a/content/post/2017-11.md b/content/post/2017-11.md index 0f0f0241b..8bd797044 100644 --- a/content/post/2017-11.md +++ b/content/post/2017-11.md @@ -482,8 +482,8 @@ proxy_set_header User-Agent $ua; - Awesome, it seems my bot mapping stuff in nginx actually reduced the number of Tomcat sessions used by the CIAT scraper today, total requests and unique sessions: ``` -# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep '09/Nov/2017' | grep -c 104.196.152.243 -5769 +# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep '09/Nov/2017' | grep -c 104.196.152.243 +7648 $ grep 104.196.152.243 dspace.log.2017-11-09 | grep -o -E 'session_id=[A-Z0-9]{32}' | sort -n | uniq | wc -l 223 ``` @@ -501,6 +501,6 @@ $ grep 104.196.152.243 dspace.log.2017-11-07 | grep -o -E 'session_id=[A-Z0-9]{3 3506 ``` -- The number of total requests vary by a few thousand, but the number of sessions is over *ten times less*! +- The number of sessions is over *ten times less*! - This gets me thinking, I wonder if I can use something like nginx's rate limiter to automatically change the user agent of clients who make too many requests - Perhaps using a combination of geo and map, like illustrated here: https://www.nginx.com/blog/rate-limiting-nginx/ diff --git a/public/2017-11/index.html b/public/2017-11/index.html index 83504b203..ca13b51fd 100644 --- a/public/2017-11/index.html +++ b/public/2017-11/index.html @@ -38,7 +38,7 @@ COPY 54701 - + @@ -86,9 +86,9 @@ COPY 54701 "@type": "BlogPosting", "headline": "November, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-11/", - "wordCount": "2921", + "wordCount": "2910", "datePublished": "2017-11-02T09:37:54+02:00", - "dateModified": "2017-11-09T17:52:14+02:00", + "dateModified": "2017-11-09T18:05:32+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -684,8 +684,8 @@ proxy_set_header User-Agent $ua;
  • Awesome, it seems my bot mapping stuff in nginx actually reduced the number of Tomcat sessions used by the CIAT scraper today, total requests and unique sessions:
  • -
    # cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep '09/Nov/2017' | grep -c 104.196.152.243
    -5769
    +
    # cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep '09/Nov/2017' | grep -c 104.196.152.243 
    +7648
     $ grep 104.196.152.243 dspace.log.2017-11-09 | grep -o -E 'session_id=[A-Z0-9]{32}' | sort -n | uniq | wc -l
     223
     
    @@ -705,7 +705,7 @@ $ grep 104.196.152.243 dspace.log.2017-11-07 | grep -o -E 'session_id=[A-Z0-9]{3
    diff --git a/public/robots.txt b/public/robots.txt index b90b33492..51a461e57 100644 --- a/public/robots.txt +++ b/public/robots.txt @@ -29,7 +29,7 @@ Disallow: /cgspace-notes/2015-12/ Disallow: /cgspace-notes/2015-11/ Disallow: /cgspace-notes/ Disallow: /cgspace-notes/categories/ -Disallow: /cgspace-notes/categories/notes/ Disallow: /cgspace-notes/tags/notes/ +Disallow: /cgspace-notes/categories/notes/ Disallow: /cgspace-notes/post/ Disallow: /cgspace-notes/tags/ diff --git a/public/sitemap.xml b/public/sitemap.xml index 90e12b779..ca725f576 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2017-11/ - 2017-11-09T17:52:14+02:00 + 2017-11-09T18:05:32+02:00 @@ -134,7 +134,7 @@ https://alanorth.github.io/cgspace-notes/ - 2017-11-09T17:52:14+02:00 + 2017-11-09T18:05:32+02:00 0 @@ -143,27 +143,27 @@ 0 + + https://alanorth.github.io/cgspace-notes/tags/notes/ + 2017-11-09T18:05:32+02:00 + 0 + + https://alanorth.github.io/cgspace-notes/categories/notes/ 2017-09-28T12:00:49+03:00 0 - - https://alanorth.github.io/cgspace-notes/tags/notes/ - 2017-11-09T17:52:14+02:00 - 0 - - https://alanorth.github.io/cgspace-notes/post/ - 2017-11-09T17:52:14+02:00 + 2017-11-09T18:05:32+02:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2017-11-09T17:52:14+02:00 + 2017-11-09T18:05:32+02:00 0