diff --git a/content/posts/2021-11.md b/content/posts/2021-11.md index 6f31babdd..bc14bd91c 100644 --- a/content/posts/2021-11.md +++ b/content/posts/2021-11.md @@ -36,4 +36,41 @@ $ ./run.sh -s http://localhost:8081/solr/statistics-2019 -a import -o statistics - I checked on CGSpace's and I can't find them there either, but I see them in Solr when I query in the admin UI - I need to debug that, but it doesn't seem to be related to the sharding... +## 2021-11-04 + +- I spent a little bit of time debugging the Solr bug with the statistics-2019 shard but couldn't reproduce it for the few items I tested + - So that's good, it seems the sharding worked +- Linode alerted me to high CPU usage on CGSpace (linode18) yesterday + - Looking at the Solr hits from yesterday I see 91.213.50.11 making 2,300 requests + - According to AbuseIPDB.com this is owned by Registrarus LLC (registrarus.ru) and it has been reported for malicious activity by several users + - The ASN is 50340 (SELECTEL-MSK, RU) + - They are attempting SQL injection: + +```console +91.213.50.11 - - [03/Nov/2021:06:47:20 +0100] "HEAD /bitstream/handle/10568/106239/U19ArtSimonikovaChromosomeInthomNodev.pdf?sequence=1%60%20WHERE%206158%3D6158%20AND%204894%3D4741--%20kIlq&isAllowed=y HTTP/1.1" 200 0 "https://cgspace.cgiar.org:443/bitstream/handle/10568/106239/U19ArtSimonikovaChromosomeInthomNodev.pdf" "Mozilla/5.0 (X11; U; Linux i686; en-CA; rv:1.8.0.10) Gecko/20070223 Fedora/1.5.0.10-1.fc5 Firefox/1.5.0.10" +``` + +- Another is in China, and they grabbed 1,200 PDFs from the REST API in under an hour: + +```console +# zgrep 222.129.53.160 /var/log/nginx/rest.log.2.gz | wc -l +1178 +``` + +- I will continue to split the Solr statistics back into year-shards on DSpace Test (linode26) + - Today I did all 2018 stats... + - I want to see if there is a noticeable change in JVM memory, Solr response time, etc + +## 2021-11-07 + +- Update all Docker containers on AReS and rebuild OpenRXV: + +```console +$ docker images | grep -v ^REPO | sed 's/ \+/:/g' | cut -d: -f1,2 | xargs -L1 docker pull +$ docker-compose build +``` + +- Then restart the server and start a fresh harvest +- Continue splitting the Solr statistics into yearly shards on DSpace Test (doing 2017 today) + diff --git a/docs/2021-11/index.html b/docs/2021-11/index.html index 75ac9c104..0213996ac 100644 --- a/docs/2021-11/index.html +++ b/docs/2021-11/index.html @@ -18,7 +18,7 @@ $ zstd statistics-2019.json - + @@ -42,9 +42,9 @@ $ zstd statistics-2019.json "@type": "BlogPosting", "headline": "November, 2021", "url": "https://alanorth.github.io/cgspace-notes/2021-11/", - "wordCount": "238", + "wordCount": "468", "datePublished": "2021-11-02T22:27:07+02:00", - "dateModified": "2021-11-01T10:49:21+02:00", + "dateModified": "2021-11-03T15:56:15+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -149,6 +149,46 @@ $ ./run.sh -s http://localhost:8081/solr/statistics-2019 -a import -o statistics +

2021-11-04

+ +
91.213.50.11 - - [03/Nov/2021:06:47:20 +0100] "HEAD /bitstream/handle/10568/106239/U19ArtSimonikovaChromosomeInthomNodev.pdf?sequence=1%60%20WHERE%206158%3D6158%20AND%204894%3D4741--%20kIlq&isAllowed=y HTTP/1.1" 200 0 "https://cgspace.cgiar.org:443/bitstream/handle/10568/106239/U19ArtSimonikovaChromosomeInthomNodev.pdf" "Mozilla/5.0 (X11; U; Linux i686; en-CA; rv:1.8.0.10) Gecko/20070223 Fedora/1.5.0.10-1.fc5 Firefox/1.5.0.10"
+
+
# zgrep 222.129.53.160 /var/log/nginx/rest.log.2.gz | wc -l
+1178
+
+

2021-11-07

+ +
$ docker images | grep -v ^REPO | sed 's/ \+/:/g' | cut -d: -f1,2 | xargs -L1 docker pull
+$ docker-compose build
+
diff --git a/docs/categories/index.html b/docs/categories/index.html index b172252f8..3c509cb31 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 2068c03fb..c2db6fed6 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index a55d9c86a..cc704f181 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 8fd006ff7..0b1528376 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 38d341bc2..7d43fabc4 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 766206779..020c2d635 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index 3f594d8ff..2a516bea5 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index d6497a8ef..620e5f1a9 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 908260e67..30fc08c50 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index d11c95475..6d1aab47d 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 38c254a80..d4eb6b8cf 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 4ca901ee7..7041dd162 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 6f7e1fecf..52fcf4caa 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index 274a322ec..5937bff4f 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index ca5460e33..7a20079a0 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 06b3378ef..dc16cd239 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 5d412973c..583d72272 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index 89369601e..bc6677106 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index b6b54cfb8..628881e6b 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 360347e8c..0e94f3bb3 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 69fbce8ad..a65b5ca7a 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index a95b1aa3e..59b81f955 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index 0b5cf2c4c..21b9089d4 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 2102d49f2..b23dce96d 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2021-11-01T10:49:21+02:00 + 2021-11-03T15:56:15+02:00 https://alanorth.github.io/cgspace-notes/ - 2021-11-01T10:49:21+02:00 + 2021-11-03T15:56:15+02:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2021-11-01T10:49:21+02:00 + 2021-11-03T15:56:15+02:00 https://alanorth.github.io/cgspace-notes/2021-11/ - 2021-11-01T10:49:21+02:00 + 2021-11-03T15:56:15+02:00 https://alanorth.github.io/cgspace-notes/posts/ - 2021-11-01T10:49:21+02:00 + 2021-11-03T15:56:15+02:00 https://alanorth.github.io/cgspace-notes/2021-10/ 2021-11-01T10:48:13+02:00