diff --git a/content/posts/2019-03.md b/content/posts/2019-03.md index 81f99352d..35707d387 100644 --- a/content/posts/2019-03.md +++ b/content/posts/2019-03.md @@ -917,5 +917,20 @@ $ grep -o -E 'session_id=[A-Z0-9]{32}:ip_addr=(18.195.78.144|18.196.196.108)' ds ``` - I will add their IPs to the list of bot IPs in nginx so I can tag them as bots to let Tomcat's Crawler Session Manager Valve to force them to re-use their session +- Another user agent behaving badly in Colombia is "GuzzleHttp/6.3.3 curl/7.47.0 PHP/7.0.30-0ubuntu0.16.04.1" +- I will add curl to the Tomcat Crawler Session Manager because anyone using curl is most likely an automated read-only request +- I will add GuzzleHttp to the nginx badbots rate limiting, because it is making requests to dynamic Discovery pages + +``` +# zcat --force /var/log/nginx/{access,error,library-access}.log /var/log/nginx/{access,error,library-access}.log.1 | grep 45.5.184.72 | grep -E "26/Mar/2019:" | grep -E '(discover|browse)' | wc -l +119 +``` + +- What's strange is that I can't see any of their requests in the DSpace log... + +``` +$ grep -I -c 45.5.184.72 dspace.log.2019-03-26 +0 +``` diff --git a/docs/2019-03/index.html b/docs/2019-03/index.html index 6eab9d923..d005676d2 100644 --- a/docs/2019-03/index.html +++ b/docs/2019-03/index.html @@ -25,7 +25,7 @@ I think I will need to ask Udana to re-copy and paste the abstracts with more ca - + @@ -55,9 +55,9 @@ I think I will need to ask Udana to re-copy and paste the abstracts with more ca "@type": "BlogPosting", "headline": "March, 2019", "url": "https://alanorth.github.io/cgspace-notes/2019-03/", - "wordCount": "5785", + "wordCount": "5878", "datePublished": "2019-03-01T12:16:30+01:00", - "dateModified": "2019-03-26T18:25:05+02:00", + "dateModified": "2019-03-26T19:41:33+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -1201,8 +1201,23 @@ $ ./delete-metadata-values.py -i /tmp/2019-03-26-AGROVOC-79-deletions.csv -db ds +
# zcat --force /var/log/nginx/{access,error,library-access}.log /var/log/nginx/{access,error,library-access}.log.1 | grep 45.5.184.72 | grep -E "26/Mar/2019:" | grep -E '(discover|browse)' | wc -l                                        
+119
+
+ + + +
$ grep -I -c 45.5.184.72 dspace.log.2019-03-26 
+0
+
+ diff --git a/docs/robots.txt b/docs/robots.txt index 2b88ae317..9c8f41429 100644 --- a/docs/robots.txt +++ b/docs/robots.txt @@ -45,7 +45,7 @@ Disallow: /cgspace-notes/2015-12/ Disallow: /cgspace-notes/2015-11/ Disallow: /cgspace-notes/ Disallow: /cgspace-notes/categories/ -Disallow: /cgspace-notes/categories/notes/ Disallow: /cgspace-notes/tags/notes/ +Disallow: /cgspace-notes/categories/notes/ Disallow: /cgspace-notes/posts/ Disallow: /cgspace-notes/tags/ diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 0716957ae..f3b459776 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2019-03/ - 2019-03-26T18:25:05+02:00 + 2019-03-26T19:41:33+02:00 @@ -214,7 +214,7 @@ https://alanorth.github.io/cgspace-notes/ - 2019-03-26T18:25:05+02:00 + 2019-03-26T19:41:33+02:00 0 @@ -223,27 +223,27 @@ 0 + + https://alanorth.github.io/cgspace-notes/tags/notes/ + 2019-03-26T19:41:33+02:00 + 0 + + https://alanorth.github.io/cgspace-notes/categories/notes/ 2018-03-09T22:10:33+02:00 0 - - https://alanorth.github.io/cgspace-notes/tags/notes/ - 2019-03-26T18:25:05+02:00 - 0 - - https://alanorth.github.io/cgspace-notes/posts/ - 2019-03-26T18:25:05+02:00 + 2019-03-26T19:41:33+02:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2019-03-26T18:25:05+02:00 + 2019-03-26T19:41:33+02:00 0