diff --git a/docs/2019-01/index.html b/docs/2019-01/index.html
index 5b219b04f..fabb895b3 100644
--- a/docs/2019-01/index.html
+++ b/docs/2019-01/index.html
@@ -27,7 +27,7 @@ I don’t see anything interesting in the web server logs around that time t
-
+
@@ -62,7 +62,7 @@ I don’t see anything interesting in the web server logs around that time t
"url": "https://alanorth.github.io/cgspace-notes/2019-01/",
"wordCount": "5531",
"datePublished": "2019-01-02T09:48:30+02:00",
- "dateModified": "2020-10-19T15:23:30+03:00",
+ "dateModified": "2022-03-22T22:03:59+03:00",
"author": {
"@type": "Person",
"name": "Alan Orth"
diff --git a/docs/2022-03/index.html b/docs/2022-03/index.html
index 68094e82d..4c8bb6611 100644
--- a/docs/2022-03/index.html
+++ b/docs/2022-03/index.html
@@ -19,7 +19,7 @@ $ csvjoin -c id /tmp/2022-03-01-tac-batch4-701-980.csv /tmp/tac4-filenames.csv &
-
+
@@ -44,9 +44,9 @@ $ csvjoin -c id /tmp/2022-03-01-tac-batch4-701-980.csv /tmp/tac4-filenames.csv &
"@type": "BlogPosting",
"headline": "March, 2022",
"url": "https://alanorth.github.io/cgspace-notes/2022-03/",
- "wordCount": "684",
+ "wordCount": "1011",
"datePublished": "2022-03-01T16:46:54+03:00",
- "dateModified": "2022-03-16T18:32:01+03:00",
+ "dateModified": "2022-03-22T22:03:45+03:00",
"author": {
"@type": "Person",
"name": "Alan Orth"
@@ -258,6 +258,49 @@ isNotNull(value.match('821'))
$ csvjoin -c id /tmp/2022-03-22-tac-duplicates.csv /tmp/tac-filenames.csv > /tmp/tac-final-duplicates.csv
- I sent the resulting 76 items to Gaia to check
+- UptimeRobot said that CGSpace was down
+
+- I looked and found many locks belonging to the REST API application:
+
+
+
+$ psql -c 'SELECT * FROM pg_locks pl LEFT JOIN pg_stat_activity psa ON pl.pid = psa.pid;' | grep -o -E '(dspaceWeb|dspaceApi)' | sort | uniq -c | sort -n
+ 301 dspaceWeb
+ 2390 dspaceApi
+
+- Looking at nginx’s logs, I found the top addresses making requests today:
+
+# awk '{print $1}' /var/log/nginx/rest.log | sort | uniq -c | sort -h
+ 1977 45.5.184.2
+ 3167 70.32.90.172
+ 4754 54.195.118.125
+ 5411 205.186.128.185
+ 6826 137.184.159.211
+
+- 137.184.159.211 is on DigitalOcean using this user agent:
GuzzleHttp/6.3.3 curl/7.81.0 PHP/7.4.28
+
+- I blocked this IP in nginx and the load went down immediately
+
+
+- 205.186.128.185 is on Media Temple, but it’s OK because it’s the CCAFS publications importer bot
+- 54.195.118.125 is on Amazon, but is also a CCAFS publications importer bot apparently (perhaps a test server)
+- 70.32.90.172 is on Media Temple and has no user agent
+- What is surprising to me is that we already have an nginx rule to return HTTP 403 for requests without a user agent
+
+- I verified it works as expected with an empty user agent:
+
+
+
+$ curl -H User-Agent:'' 'https://dspacetest.cgiar.org/rest/handle/10568/34799?expand=all'
+Due to abuse we no longer permit requests without a user agent. Please specify a descriptive user agent, for example containing the word 'bot', if you are accessing the site programmatically. For more information see here: https://dspacetest.cgiar.org/page/about.
+
+- I note that the nginx log shows ‘-’ for a request with an empty user agent, which would be indistinguishable from a request with a ‘-’, for example these were successful:
+
+70.32.90.172 - - [22/Mar/2022:11:59:10 +0100] "GET /rest/handle/10568/34374?expand=all HTTP/1.0" 200 10671 "-" "-"
+70.32.90.172 - - [22/Mar/2022:11:59:14 +0100] "GET /rest/handle/10568/34795?expand=all HTTP/1.0" 200 11394 "-" "-"
+
+- I can only assume that these requests used a literal ‘-’ so I will have to add an nginx rule to block those too
+- Otherwise, I see from my notes that 70.32.90.172 is the wle.cgiar.org REST API harvester… I should ask Macaroni Bros about that
diff --git a/docs/categories/index.html b/docs/categories/index.html
index 05010b205..98112439d 100644
--- a/docs/categories/index.html
+++ b/docs/categories/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html
index 7193af5ff..02f023cf9 100644
--- a/docs/categories/notes/index.html
+++ b/docs/categories/notes/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html
index d1ca6876b..f344847ab 100644
--- a/docs/categories/notes/page/2/index.html
+++ b/docs/categories/notes/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html
index e6224b266..b14ccc418 100644
--- a/docs/categories/notes/page/3/index.html
+++ b/docs/categories/notes/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html
index 42844c14f..82dfab110 100644
--- a/docs/categories/notes/page/4/index.html
+++ b/docs/categories/notes/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html
index e0996f4cb..0169b8ed6 100644
--- a/docs/categories/notes/page/5/index.html
+++ b/docs/categories/notes/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html
index cff8fc0f8..bc77d8bbd 100644
--- a/docs/categories/notes/page/6/index.html
+++ b/docs/categories/notes/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/index.html b/docs/index.html
index cd5a72f38..067e31fc1 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/2/index.html b/docs/page/2/index.html
index 0786d85ff..723b90ffa 100644
--- a/docs/page/2/index.html
+++ b/docs/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/3/index.html b/docs/page/3/index.html
index f9dfefa30..4c3bd1fdc 100644
--- a/docs/page/3/index.html
+++ b/docs/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/4/index.html b/docs/page/4/index.html
index 0077ba098..19b5b2c3b 100644
--- a/docs/page/4/index.html
+++ b/docs/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/5/index.html b/docs/page/5/index.html
index 5564ee902..416929058 100644
--- a/docs/page/5/index.html
+++ b/docs/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/6/index.html b/docs/page/6/index.html
index 7fc4a4333..49739475c 100644
--- a/docs/page/6/index.html
+++ b/docs/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/7/index.html b/docs/page/7/index.html
index eff4597fd..d7bf11f2a 100644
--- a/docs/page/7/index.html
+++ b/docs/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/8/index.html b/docs/page/8/index.html
index beac025f3..074ad70aa 100644
--- a/docs/page/8/index.html
+++ b/docs/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/index.html b/docs/posts/index.html
index 82397ee67..9ad24afbe 100644
--- a/docs/posts/index.html
+++ b/docs/posts/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html
index f86186648..5f9162fbe 100644
--- a/docs/posts/page/2/index.html
+++ b/docs/posts/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html
index 02ac6421d..24ad874f4 100644
--- a/docs/posts/page/3/index.html
+++ b/docs/posts/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html
index e7ac5bf91..32a5289b9 100644
--- a/docs/posts/page/4/index.html
+++ b/docs/posts/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html
index 19ef91097..c3c1847f9 100644
--- a/docs/posts/page/5/index.html
+++ b/docs/posts/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html
index f7897b6e2..899d4fda9 100644
--- a/docs/posts/page/6/index.html
+++ b/docs/posts/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html
index f5742bbf1..82ee26d38 100644
--- a/docs/posts/page/7/index.html
+++ b/docs/posts/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html
index fcd7986ee..cbfeb490e 100644
--- a/docs/posts/page/8/index.html
+++ b/docs/posts/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 23611dfb4..71e1e0338 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -3,19 +3,19 @@
xmlns:xhtml="http://www.w3.org/1999/xhtml">
https://alanorth.github.io/cgspace-notes/categories/
- 2022-03-16T18:32:01+03:00
+ 2022-03-22T22:03:59+03:00
https://alanorth.github.io/cgspace-notes/
- 2022-03-16T18:32:01+03:00
+ 2022-03-22T22:03:59+03:00
https://alanorth.github.io/cgspace-notes/2022-03/
- 2022-03-16T18:32:01+03:00
+ 2022-03-22T22:03:45+03:00
https://alanorth.github.io/cgspace-notes/categories/notes/
- 2022-03-16T18:32:01+03:00
+ 2022-03-22T22:03:59+03:00
https://alanorth.github.io/cgspace-notes/posts/
- 2022-03-16T18:32:01+03:00
+ 2022-03-22T22:03:59+03:00
https://alanorth.github.io/cgspace-notes/2022-02/
2022-03-01T17:17:27+03:00
@@ -141,7 +141,7 @@
2019-10-28T13:39:25+02:00
https://alanorth.github.io/cgspace-notes/2019-01/
- 2020-10-19T15:23:30+03:00
+ 2022-03-22T22:03:59+03:00
https://alanorth.github.io/cgspace-notes/2018-12/
2019-10-28T13:39:25+02:00