From 845ec58520d68757f268ea5bc397c9929771e7be Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Tue, 19 Feb 2019 16:34:52 -0800 Subject: [PATCH] Update notes for 2019-02-19 --- content/posts/2019-02.md | 46 ++++++++++++++++++++++++++++++ docs/2019-02/index.html | 60 ++++++++++++++++++++++++++++++++++++++-- docs/sitemap.xml | 10 +++---- 3 files changed, 108 insertions(+), 8 deletions(-) diff --git a/content/posts/2019-02.md b/content/posts/2019-02.md index 3a12be7ed..dcaa14ae8 100644 --- a/content/posts/2019-02.md +++ b/content/posts/2019-02.md @@ -932,5 +932,51 @@ $ tidy -xml -utf8 -iq -m -w 0 dspace/config/controlled-vocabularies/cg-creator-i ![Usage stats](/cgspace-notes/2019/02/usage-stats.png) - I need to follow up with the DSpace developers and Atmire to see how they classify which requests are bots so we can try to estimate the impact caused by these users and perhaps try to update the list to make the stats more accurate +- I found one IP address in Nigeria that has an Android user agent and has requested a bitstream from [10568/96140](https://cgspace.cgiar.org/handle/10568/96140) almost 200 times: + +``` +# grep 41.190.30.105 /var/log/nginx/access.log | grep -c 'acgg_progress_report.pdf' +185 +``` + +- Wow, and another IP in Nigeria made a bunch more yesterday from the same user agent: + +``` +# grep 41.190.3.229 /var/log/nginx/access.log.1 | grep -c 'acgg_progress_report.pdf' +346 +``` + +- In the last two days alone there were 1,000 requests for this PDF, mostly from Nigeria! + +``` +# zcat --force /var/log/nginx/{access,error,library-access}.log /var/log/nginx/{access,error,library-access}.log.1 | grep acgg_progress_report.pdf | grep -v 'upstream response is buffered' | awk '{print $1}' | sort | uniq -c | sort -n + 1 139.162.146.60 + 1 157.55.39.159 + 1 196.188.127.94 + 1 196.190.127.16 + 1 197.183.33.222 + 1 66.249.66.221 + 2 104.237.146.139 + 2 175.158.209.61 + 2 196.190.63.120 + 2 196.191.127.118 + 2 213.55.99.121 + 2 82.145.223.103 + 3 197.250.96.248 + 4 196.191.127.125 + 4 197.156.77.24 + 5 105.112.75.237 + 185 41.190.30.105 + 346 41.190.3.229 + 503 41.190.31.73 +``` + +- That is so weird, they are all using this Android user agent: + +``` +Mozilla/5.0 (Linux; Android 7.0; TECNO Camon CX Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36 +``` + +- I wrote a quick and dirty Python script called `resolve-addresses.py` to resolve IP addresses to their owning organization's name, ASN, and country using the [IPAPI.co API](https://ipapi.co) diff --git a/docs/2019-02/index.html b/docs/2019-02/index.html index 75b32090c..270b7a560 100644 --- a/docs/2019-02/index.html +++ b/docs/2019-02/index.html @@ -42,7 +42,7 @@ sys 0m1.979s - + @@ -89,9 +89,9 @@ sys 0m1.979s "@type": "BlogPosting", "headline": "February, 2019", "url": "https://alanorth.github.io/cgspace-notes/2019-02/", - "wordCount": "5236", + "wordCount": "5473", "datePublished": "2019-02-01T21:37:30+02:00", - "dateModified": "2019-02-18T16:30:34-08:00", + "dateModified": "2019-02-19T12:42:33-08:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -1212,6 +1212,60 @@ $ tidy -xml -utf8 -iq -m -w 0 dspace/config/controlled-vocabularies/cg-creator-i

Usage stats

+ + +
# grep 41.190.30.105 /var/log/nginx/access.log | grep -c 'acgg_progress_report.pdf'
+185
+
+ + + +
# grep 41.190.3.229 /var/log/nginx/access.log.1 | grep -c 'acgg_progress_report.pdf'
+346
+
+ + + +
# zcat --force /var/log/nginx/{access,error,library-access}.log /var/log/nginx/{access,error,library-access}.log.1 | grep acgg_progress_report.pdf | grep -v 'upstream response is buffered' | awk '{print $1}' | sort | uniq -c | sort -n
+      1 139.162.146.60
+      1 157.55.39.159
+      1 196.188.127.94
+      1 196.190.127.16
+      1 197.183.33.222
+      1 66.249.66.221
+      2 104.237.146.139
+      2 175.158.209.61
+      2 196.190.63.120
+      2 196.191.127.118
+      2 213.55.99.121
+      2 82.145.223.103
+      3 197.250.96.248
+      4 196.191.127.125
+      4 197.156.77.24
+      5 105.112.75.237
+    185 41.190.30.105
+    346 41.190.3.229
+    503 41.190.31.73
+
+ + + +
Mozilla/5.0 (Linux; Android 7.0; TECNO Camon CX Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36
+
+ + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index a71195d76..2d0bec815 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2019-02/ - 2019-02-18T16:30:34-08:00 + 2019-02-19T12:42:33-08:00 @@ -209,7 +209,7 @@ https://alanorth.github.io/cgspace-notes/ - 2019-02-18T16:30:34-08:00 + 2019-02-19T12:42:33-08:00 0 @@ -220,7 +220,7 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2019-02-18T16:30:34-08:00 + 2019-02-19T12:42:33-08:00 0 @@ -232,13 +232,13 @@ https://alanorth.github.io/cgspace-notes/posts/ - 2019-02-18T16:30:34-08:00 + 2019-02-19T12:42:33-08:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2019-02-18T16:30:34-08:00 + 2019-02-19T12:42:33-08:00 0