diff --git a/content/posts/2021-02.md b/content/posts/2021-02.md index 8a751e6ea..ad3640b69 100644 --- a/content/posts/2021-02.md +++ b/content/posts/2021-02.md @@ -401,4 +401,62 @@ $ curl -XDELETE 'http://localhost:9200/openrxv-items-2021-02-15' - Call with Abdullah from CodeObia to discuss community and collection statistics reporting +## 2021-02-16 + +- Linode emailed me to say that CGSpace (linode18) had a high CPU usage this afternoon +- I looked in the nginx logs and found a few heavy users: + - 45.146.165.203 in Russia with user agent `Opera/9.80 (Windows NT 6.1; U; cs) Presto/2.2.15 Version/10.00` + - 130.255.161.231 in Sweden with user agent `Mozilla/5.0 (Macintosh; Intel Mac OS X 11.1; rv:84.0) Gecko/20100101 Firefox/84.0` +- They are definitely bots posing as users, as I see they have created six thousand DSpace sessions today: + +```console +$ cat dspace.log.2021-02-16 | grep -E 'session_id=[A-Z0-9]{32}:ip_addr=45.146.165.203' | sort | uniq | wc -l +4007 +$ cat dspace.log.2021-02-16 | grep -E 'session_id=[A-Z0-9]{32}:ip_addr=130.255.161.231' | sort | uniq | wc -l +2128 +``` + +- Ah, actually 45.146.165.203 is making requests like this: + +```console +"http://cgspace.cgiar.org:80/bitstream/handle/10568/238/Res_report_no3.pdf;jsessionid=7311DD88B30EEF9A8F526FF89378C2C5%' AND 4313=CONCAT(CHAR(113)+CHAR(98)+CHAR(106)+CHAR(112)+CHAR(113),(SELECT (CASE WHEN (4313=4313) THEN CHAR(49) ELSE CHAR(48) END)),CHAR(113)+CHAR(106)+CHAR(98)+CHAR(112)+CHAR(113)) AND 'XzQO%'='XzQO" +``` + +- I purged the hits from these two using my `check-spider-ip-hits.sh`: + +```console +$ ./ilri/check-spider-ip-hits.sh -f /tmp/ips -p +Purging 4005 hits from 45.146.165.203 in statistics +Purging 3493 hits from 130.255.161.231 in statistics + +Total number of bot hits purged: 7498 +``` + +- Ugh, I looked in Solr for the top IPs in 2021-01 and found a few more of these Russian IPs so I purged them too: + +```console +$ ./ilri/check-spider-ip-hits.sh -f /tmp/ips -p +Purging 27163 hits from 45.146.164.176 in statistics +Purging 19556 hits from 45.146.165.105 in statistics +Purging 15927 hits from 45.146.165.83 in statistics +Purging 8085 hits from 45.146.165.104 in statistics + +Total number of bot hits purged: 70731 +``` + +- My god, and 64.39.99.15 is from Qualys, the domain scanning security people, who are making queries trying to see if we are vulnerable or something (?) + - Looking in Solr I see a few different IPs with DNS like `sn003.s02.iad01.qualys.com.` so I will purge their requests too: + +```console +$ ./ilri/check-spider-ip-hits.sh -f /tmp/ips -p +Purging 3 hits from 130.255.161.231 in statistics +Purging 16773 hits from 64.39.99.15 in statistics +Purging 6976 hits from 64.39.99.13 in statistics +Purging 13 hits from 64.39.99.63 in statistics +Purging 12 hits from 64.39.99.65 in statistics +Purging 12 hits from 64.39.99.94 in statistics + +Total number of bot hits purged: 23789 +``` + diff --git a/docs/2021-02/index.html b/docs/2021-02/index.html index 115d3312e..ebfd3018c 100644 --- a/docs/2021-02/index.html +++ b/docs/2021-02/index.html @@ -32,7 +32,7 @@ $ curl -s 'http://localhost:9200/openrxv-items-temp/_count?q=*&pretty - + @@ -70,9 +70,9 @@ $ curl -s 'http://localhost:9200/openrxv-items-temp/_count?q=*&pretty "@type": "BlogPosting", "headline": "February, 2021", "url": "https://alanorth.github.io/cgspace-notes/2021-02/", - "wordCount": "2397", + "wordCount": "2725", "datePublished": "2021-02-01T10:13:54+02:00", - "dateModified": "2021-02-14T20:00:24+02:00", + "dateModified": "2021-02-16T12:56:10+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -553,7 +553,60 @@ $ curl -XDELETE 'http://localhost:9200/openrxv-items-2021-02-15' - +

2021-02-16

+ +
$ cat dspace.log.2021-02-16 | grep -E 'session_id=[A-Z0-9]{32}:ip_addr=45.146.165.203' | sort | uniq | wc -l
+4007
+$ cat dspace.log.2021-02-16 | grep -E 'session_id=[A-Z0-9]{32}:ip_addr=130.255.161.231' | sort | uniq | wc -l
+2128
+
+
"http://cgspace.cgiar.org:80/bitstream/handle/10568/238/Res_report_no3.pdf;jsessionid=7311DD88B30EEF9A8F526FF89378C2C5%' AND 4313=CONCAT(CHAR(113)+CHAR(98)+CHAR(106)+CHAR(112)+CHAR(113),(SELECT (CASE WHEN (4313=4313) THEN CHAR(49) ELSE CHAR(48) END)),CHAR(113)+CHAR(106)+CHAR(98)+CHAR(112)+CHAR(113)) AND 'XzQO%'='XzQO"
+
+
$ ./ilri/check-spider-ip-hits.sh -f /tmp/ips -p
+Purging 4005 hits from 45.146.165.203 in statistics
+Purging 3493 hits from 130.255.161.231 in statistics
+
+Total number of bot hits purged: 7498
+
+
$ ./ilri/check-spider-ip-hits.sh -f /tmp/ips -p
+Purging 27163 hits from 45.146.164.176 in statistics
+Purging 19556 hits from 45.146.165.105 in statistics
+Purging 15927 hits from 45.146.165.83 in statistics
+Purging 8085 hits from 45.146.165.104 in statistics
+
+Total number of bot hits purged: 70731
+
+
$ ./ilri/check-spider-ip-hits.sh -f /tmp/ips -p
+Purging 3 hits from 130.255.161.231 in statistics
+Purging 16773 hits from 64.39.99.15 in statistics
+Purging 6976 hits from 64.39.99.13 in statistics
+Purging 13 hits from 64.39.99.63 in statistics
+Purging 12 hits from 64.39.99.65 in statistics
+Purging 12 hits from 64.39.99.94 in statistics
+
+Total number of bot hits purged: 23789
+
diff --git a/docs/categories/index.html b/docs/categories/index.html index c4d6be288..e5f86ca60 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 608ae9498..261dbe18a 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index fe27da5c5..9fcee9b5f 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 2acc54df6..13a214a4b 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index cd533e284..5f54579db 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 0a0ae3f96..e0e6a1dd4 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 2232d12a1..37e2136e4 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 13d25f8ae..f3589301a 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index f3c687f77..44ccd96f8 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 465bf11ad..080f0b72e 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 8ddcd3e7a..7c084dffe 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index c93960e28..be74ad51a 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index c09dab313..fe492a84e 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 5c2ce51bf..a718b17cd 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 9c61d1e4c..2a71d56ef 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index c69dd0dec..e31b91ae3 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 994795a8c..a8e76da02 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 3496188be..f483bf209 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index d1bd163a0..74ac694fc 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index 34a54eef9..bf890dbff 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 9f5948cf4..16bd1ffc1 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,27 +4,27 @@ https://alanorth.github.io/cgspace-notes/categories/ - 2021-02-14T20:00:24+02:00 + 2021-02-16T12:56:10+02:00 https://alanorth.github.io/cgspace-notes/ - 2021-02-14T20:00:24+02:00 + 2021-02-16T12:56:10+02:00 https://alanorth.github.io/cgspace-notes/2021-02/ - 2021-02-14T20:00:24+02:00 + 2021-02-16T12:56:10+02:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2021-02-14T20:00:24+02:00 + 2021-02-16T12:56:10+02:00 https://alanorth.github.io/cgspace-notes/posts/ - 2021-02-14T20:00:24+02:00 + 2021-02-16T12:56:10+02:00