diff --git a/content/posts/2021-10.md b/content/posts/2021-10.md index f9109890f..7773fa6c4 100644 --- a/content/posts/2021-10.md +++ b/content/posts/2021-10.md @@ -87,7 +87,7 @@ $ csvgrep -c asn -m 14618 /tmp/mozilla-4.0-ips.csv | csvcut -c ip | sed 1d | tee 290382 GET /handle/10568/83389 ``` -- Before I purge all those I will ask someone Samuel Stacey from the System office to hopefully get an insight... +- Before I purge all those I will ask someone Samuel Stacey from the System Office to hopefully get an insight... - Meeting with Michael Victor, Peter, Jane, and Abenet about the future of repositories in the One CGIAR - Meeting with Michelle from Altmetric about their new CSV upload system - I sent her some examples of Handles that have DOIs, but no linked score (yet) to see if an association will be created when she uploads them @@ -107,4 +107,17 @@ $ ./ilri/agrovoc-lookup.py -i /tmp/agrovoc-sorted.txt -o /tmp/agrovoc-matches.cs $ csvgrep -c 'number of matches' -m '0' /tmp/agrovoc-matches.csv | csvcut -c 1 > /tmp/invalid-agrovoc.csv ``` +## 2021-10-05 + +- Sam put me in touch with Dodi from the System Office web team and he confirmed that the Amazon requests are not theirs + - I added `Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)` to the list of bad bots in nginx + - I purged all the Amazon IPs using this user agent, as well as the few other IPs I identified yesterday + +```console +$ ./ilri/check-spider-ip-hits.sh -f /tmp/robot-ips.txt -p +... + +Total number of bot hits purged: 465119 +``` + diff --git a/docs/2021-10/index.html b/docs/2021-10/index.html index 5e6507691..259c15b42 100644 --- a/docs/2021-10/index.html +++ b/docs/2021-10/index.html @@ -25,7 +25,7 @@ So we have 1879/7100 (26.46%) matching already - + @@ -56,9 +56,9 @@ So we have 1879/7100 (26.46%) matching already "@type": "BlogPosting", "headline": "October, 2021", "url": "https://alanorth.github.io/cgspace-notes/2021-10/", - "wordCount": "697", + "wordCount": "771", "datePublished": "2021-10-01T11:14:07+03:00", - "dateModified": "2021-10-01T11:14:07+03:00", + "dateModified": "2021-10-04T19:40:13+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -216,7 +216,7 @@ $ csvgrep -c asn -m 14618 /tmp/mozilla-4.0-ips.csv | csvcut -c ip | sed 1d | tee 1607 GET /handle/10568/103816 290382 GET /handle/10568/83389
$ csvcut -c 'dcterms.subject[en_US]' ~/Downloads/2021-10-03-non-IWMI-publications.csv | sed -e 1d -e 's/||/\n/g' -e 's/"//g' | sort -u > /tmp/agrovoc.txt
$ ./ilri/agrovoc-lookup.py -i /tmp/agrovoc-sorted.txt -o /tmp/agrovoc-matches.csv
$ csvgrep -c 'number of matches' -m '0' /tmp/agrovoc-matches.csv | csvcut -c 1 > /tmp/invalid-agrovoc.csv
+
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)
to the list of bad bots in nginx$ ./ilri/check-spider-ip-hits.sh -f /tmp/robot-ips.txt -p
+...
+
+Total number of bot hits purged: 465119
diff --git a/docs/categories/index.html b/docs/categories/index.html
index 8785f87b2..b9ffd53f1 100644
--- a/docs/categories/index.html
+++ b/docs/categories/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html
index c7631b7e0..e1f8309e6 100644
--- a/docs/categories/notes/index.html
+++ b/docs/categories/notes/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html
index aac9597c0..7f414a96d 100644
--- a/docs/categories/notes/page/2/index.html
+++ b/docs/categories/notes/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html
index 817a68bac..19e82d031 100644
--- a/docs/categories/notes/page/3/index.html
+++ b/docs/categories/notes/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html
index 04b0dbb7c..8d7b784f9 100644
--- a/docs/categories/notes/page/4/index.html
+++ b/docs/categories/notes/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html
index eaeeaef20..856fd2e25 100644
--- a/docs/categories/notes/page/5/index.html
+++ b/docs/categories/notes/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html
index a8a3b195c..891cb0853 100644
--- a/docs/categories/notes/page/6/index.html
+++ b/docs/categories/notes/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/index.html b/docs/index.html
index 8c6e3ccb6..d6eecce46 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/2/index.html b/docs/page/2/index.html
index bc8a4a685..5bad837e1 100644
--- a/docs/page/2/index.html
+++ b/docs/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/3/index.html b/docs/page/3/index.html
index 4d73d8598..053cc7900 100644
--- a/docs/page/3/index.html
+++ b/docs/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/4/index.html b/docs/page/4/index.html
index b154d2afd..8f3c08e83 100644
--- a/docs/page/4/index.html
+++ b/docs/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/5/index.html b/docs/page/5/index.html
index 277b232cf..c8aa8f95f 100644
--- a/docs/page/5/index.html
+++ b/docs/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/6/index.html b/docs/page/6/index.html
index 74ff5ba2e..a4e5d204f 100644
--- a/docs/page/6/index.html
+++ b/docs/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/7/index.html b/docs/page/7/index.html
index 56102c578..66288e6c3 100644
--- a/docs/page/7/index.html
+++ b/docs/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/8/index.html b/docs/page/8/index.html
index 2d73f68bd..ead11d5f5 100644
--- a/docs/page/8/index.html
+++ b/docs/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/index.html b/docs/posts/index.html
index 043934fb5..90bf70758 100644
--- a/docs/posts/index.html
+++ b/docs/posts/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html
index 41d6e79a2..ea2b27f1b 100644
--- a/docs/posts/page/2/index.html
+++ b/docs/posts/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html
index e2f0cd5ca..22e21b841 100644
--- a/docs/posts/page/3/index.html
+++ b/docs/posts/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html
index 32a24cabf..a2d377914 100644
--- a/docs/posts/page/4/index.html
+++ b/docs/posts/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html
index ab4d0831c..72732ad07 100644
--- a/docs/posts/page/5/index.html
+++ b/docs/posts/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html
index d2e59e458..dc1f3d3bc 100644
--- a/docs/posts/page/6/index.html
+++ b/docs/posts/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html
index 5fb7ea43d..364136673 100644
--- a/docs/posts/page/7/index.html
+++ b/docs/posts/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html
index 32ae104c8..9d7454065 100644
--- a/docs/posts/page/8/index.html
+++ b/docs/posts/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index f65add679..07b567c3f 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -3,19 +3,19 @@
xmlns:xhtml="http://www.w3.org/1999/xhtml">