From e74be8ab0a457258d2532ea9da2dc0dbd2c626b3 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Sun, 21 Oct 2018 08:06:40 +0300 Subject: [PATCH] Update notes for 2018-10-20 --- content/posts/2018-10.md | 42 +++++++++++++++++++++++++++++++++ docs/2018-10/index.html | 50 +++++++++++++++++++++++++++++++++++++--- docs/sitemap.xml | 10 ++++---- 3 files changed, 94 insertions(+), 8 deletions(-) diff --git a/content/posts/2018-10.md b/content/posts/2018-10.md index 2baf57274..cd8d2ceb5 100644 --- a/content/posts/2018-10.md +++ b/content/posts/2018-10.md @@ -446,5 +446,47 @@ ERROR: Error CREATEing SolrCore 'statistics': Unable to create core [statistics] - Apparently a bunch of variable types were removed in [Solr 5](https://issues.apache.org/jira/browse/SOLR-5936) - So for now it's actually a huge pain in the ass to run the tests for my dspace-statistics-api +- Linode sent a message that the CPU usage was high on CGSpace (linode18) last night +- According to the nginx logs around that time it was 5.9.6.51 (MegaIndex) again: + +``` +# zcat --force /var/log/nginx/*.log /var/log/nginx/*.log.1 | grep -E "20/Oct/2018:(14|15|16)" | awk '{print $1}' | sort + | uniq -c | sort -n | tail -n 10 + 249 207.46.13.179 + 250 157.55.39.173 + 301 54.166.207.223 + 303 157.55.39.213 + 310 66.249.64.95 + 362 34.218.226.147 + 381 66.249.64.93 + 415 35.237.175.180 + 1205 66.249.64.91 + 1227 5.9.6.51 +``` + +- This bot is only using the XMLUI and it does *not* seem to be re-using its sessions: + +``` +# grep -c 5.9.6.51 /var/log/nginx/*.log +/var/log/nginx/access.log:9323 +/var/log/nginx/error.log:0 +/var/log/nginx/library-access.log:0 +/var/log/nginx/oai.log:0 +/var/log/nginx/rest.log:0 +/var/log/nginx/statistics.log:0 +# grep -c -E 'session_id=[A-Z0-9]{32}:ip_addr=5.9.6.51' dspace.log.2018-10-20 | sort | uniq +8915 +``` + +- Last month I added "crawl" to the Tomcat Crawler Session Manager Valve's regular expression matching, and it seems to be working for MegaIndex's user agent: + +``` +$ http --print Hh 'https://dspacetest.cgiar.org/handle/10568/1' User-Agent:'"Mozilla/5.0 (compatible; MegaIndex.ru/2.0; +http://megaindex.com/crawler)"' +``` + +- So I'm not sure why this bot uses so many sessions — is it because it requests very slowly? + +## 2018-10-21 + diff --git a/docs/2018-10/index.html b/docs/2018-10/index.html index 82600f16c..aaff333fd 100644 --- a/docs/2018-10/index.html +++ b/docs/2018-10/index.html @@ -9,7 +9,7 @@ - + @@ -24,9 +24,9 @@ "@type": "BlogPosting", "headline": "October, 2018", "url": "https://alanorth.github.io/cgspace-notes/2018-10/", - "wordCount": "3376", + "wordCount": "3542", "datePublished": "2018-10-01T22:31:54+03:00", - "dateModified": "2018-10-18T23:57:22+03:00", + "dateModified": "2018-10-20T18:17:59+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -599,8 +599,52 @@ ERROR: Error CREATEing SolrCore 'statistics': Unable to create core [statistics] +
# zcat --force /var/log/nginx/*.log /var/log/nginx/*.log.1 | grep -E "20/Oct/2018:(14|15|16)" | awk '{print $1}' | sort
+ | uniq -c | sort -n | tail -n 10
+    249 207.46.13.179
+    250 157.55.39.173
+    301 54.166.207.223
+    303 157.55.39.213
+    310 66.249.64.95
+    362 34.218.226.147
+    381 66.249.64.93
+    415 35.237.175.180
+   1205 66.249.64.91
+   1227 5.9.6.51
+
+ + + +
# grep -c 5.9.6.51 /var/log/nginx/*.log
+/var/log/nginx/access.log:9323
+/var/log/nginx/error.log:0
+/var/log/nginx/library-access.log:0
+/var/log/nginx/oai.log:0
+/var/log/nginx/rest.log:0
+/var/log/nginx/statistics.log:0
+# grep -c -E 'session_id=[A-Z0-9]{32}:ip_addr=5.9.6.51' dspace.log.2018-10-20 | sort | uniq
+8915
+
+ + + +
$ http --print Hh 'https://dspacetest.cgiar.org/handle/10568/1' User-Agent:'"Mozilla/5.0 (compatible; MegaIndex.ru/2.0; +http://megaindex.com/crawler)"'
+
+ + + +

2018-10-21

+ diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 0aa05c3ca..d4d750bc9 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2018-10/ - 2018-10-18T23:57:22+03:00 + 2018-10-20T18:17:59+03:00 @@ -189,7 +189,7 @@ https://alanorth.github.io/cgspace-notes/ - 2018-10-18T23:57:22+03:00 + 2018-10-20T18:17:59+03:00 0 @@ -200,7 +200,7 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2018-10-18T23:57:22+03:00 + 2018-10-20T18:17:59+03:00 0 @@ -212,13 +212,13 @@ https://alanorth.github.io/cgspace-notes/posts/ - 2018-10-18T23:57:22+03:00 + 2018-10-20T18:17:59+03:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2018-10-18T23:57:22+03:00 + 2018-10-20T18:17:59+03:00 0