diff --git a/content/posts/2024-01.md b/content/posts/2024-01.md index 4e08eecb0..44f958f41 100644 --- a/content/posts/2024-01.md +++ b/content/posts/2024-01.md @@ -67,5 +67,43 @@ stream { - I noticed tons of logs from pm2 in the systemd journal, so I disabled those in the systemd unit because they are available from pm2's log directory anyway - I also noticed the same for Solr, so I disabled stdout for that systemd unit as well - I spent a lot of time bringing back the nginx rate limits we used in DSpace 6 and it seems to have helped +- I see some client doing weird HEAD requests to search pages: + +``` +47.76.35.19 - - [07/Jan/2024:00:00:02 +0100] "HEAD /search/?f.accessRights=Open+Access%2Cequals&f.actionArea=Resilient+Agrifood+Systems%2Cequals&f.author=Burkart%2C+Stefan%2Cequals&f.country=Kenya%2Cequals&f.impactArea=Climate+adaptation+and+mitigation%2Cequals&f.itemtype=Brief%2Cequals&f.publisher=CGIAR+System+Organization%2Cequals&f.region=Asia%2Cequals&f.sdg=SDG+12+-+Responsible+consumption+and+production%2Cequals&f.sponsorship=CGIAR+Trust+Fund%2Cequals&f.subject=environmental+factors%2Cequals&spc.page=1 HTTP/1.1" 499 0 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.2504.63 Safari/537.36" +``` + +- I will add their network blocks (AS45102) and regenerate my list of bot networks: + +```console +$ wget https://asn.ipinfo.app/api/text/list/AS16276 \ + https://asn.ipinfo.app/api/text/list/AS23576 \ + https://asn.ipinfo.app/api/text/list/AS24940 \ + https://asn.ipinfo.app/api/text/list/AS13238 \ + https://asn.ipinfo.app/api/text/list/AS14061 \ + https://asn.ipinfo.app/api/text/list/AS12876 \ + https://asn.ipinfo.app/api/text/list/AS55286 \ + https://asn.ipinfo.app/api/text/list/AS203020 \ + https://asn.ipinfo.app/api/text/list/AS204287 \ + https://asn.ipinfo.app/api/text/list/AS50245 \ + https://asn.ipinfo.app/api/text/list/AS6939 \ + https://asn.ipinfo.app/api/text/list/AS45102 \ + https://asn.ipinfo.app/api/text/list/AS21859 +$ cat AS* | sort | uniq | wc -l +4897 +$ cat AS* | ~/go/bin/mapcidr -a > /tmp/networks.txt +$ wc -l /tmp/networks.txt +2017 /tmp/networks.txt +``` + +- I'm surprised to see the number of networks reduced from my current ones... hmmm. +- I will also update my list of Bing networks: + +```console +$ ./ilri/bing-networks-to-ips.sh +$ ~/go/bin/mapcidr -a < /tmp/bing-ips.txt > /tmp/bing-networks.txt +$ wc -l /tmp/bing-networks.txt +250 /tmp/bing-networks.txt +``` diff --git a/docs/2024-01/index.html b/docs/2024-01/index.html index 5f93f6ca9..c953e45ce 100644 --- a/docs/2024-01/index.html +++ b/docs/2024-01/index.html @@ -22,7 +22,7 @@ Work on IFPRI ISNAR archive cleanup - + @@ -50,9 +50,9 @@ Work on IFPRI ISNAR archive cleanup "@type": "BlogPosting", "headline": "January, 2024", "url": "https://alanorth.github.io/cgspace-notes/2024-01/", - "wordCount": "433", + "wordCount": "571", "datePublished": "2024-01-02T10:08:00+03:00", - "dateModified": "2024-01-06T17:46:07+03:00", + "dateModified": "2024-01-07T20:43:02+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -202,8 +202,39 @@ Work on IFPRI ISNAR archive cleanup
  • I spent a lot of time bringing back the nginx rate limits we used in DSpace 6 and it seems to have helped
  • +
  • I see some client doing weird HEAD requests to search pages:
  • - +
    47.76.35.19 - - [07/Jan/2024:00:00:02 +0100] "HEAD /search/?f.accessRights=Open+Access%2Cequals&f.actionArea=Resilient+Agrifood+Systems%2Cequals&f.author=Burkart%2C+Stefan%2Cequals&f.country=Kenya%2Cequals&f.impactArea=Climate+adaptation+and+mitigation%2Cequals&f.itemtype=Brief%2Cequals&f.publisher=CGIAR+System+Organization%2Cequals&f.region=Asia%2Cequals&f.sdg=SDG+12+-+Responsible+consumption+and+production%2Cequals&f.sponsorship=CGIAR+Trust+Fund%2Cequals&f.subject=environmental+factors%2Cequals&spc.page=1 HTTP/1.1" 499 0 "-" "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.2504.63 Safari/537.36"
    +
    +
    $ wget https://asn.ipinfo.app/api/text/list/AS16276 \
    +     https://asn.ipinfo.app/api/text/list/AS23576 \
    +     https://asn.ipinfo.app/api/text/list/AS24940 \
    +     https://asn.ipinfo.app/api/text/list/AS13238 \
    +     https://asn.ipinfo.app/api/text/list/AS14061 \
    +     https://asn.ipinfo.app/api/text/list/AS12876 \
    +     https://asn.ipinfo.app/api/text/list/AS55286 \
    +     https://asn.ipinfo.app/api/text/list/AS203020 \
    +     https://asn.ipinfo.app/api/text/list/AS204287 \
    +     https://asn.ipinfo.app/api/text/list/AS50245 \
    +     https://asn.ipinfo.app/api/text/list/AS6939 \
    +     https://asn.ipinfo.app/api/text/list/AS45102 \
    +     https://asn.ipinfo.app/api/text/list/AS21859
    +$ cat AS* | sort | uniq | wc -l
    +4897
    +$ cat AS* | ~/go/bin/mapcidr -a > /tmp/networks.txt
    +$ wc -l /tmp/networks.txt
    +2017 /tmp/networks.txt
    +
    +
    $ ./ilri/bing-networks-to-ips.sh
    +$ ~/go/bin/mapcidr -a < /tmp/bing-ips.txt > /tmp/bing-networks.txt
    +$ wc -l /tmp/bing-networks.txt
    +250 /tmp/bing-networks.txt
    +
    diff --git a/docs/categories/index.html b/docs/categories/index.html index f4b5b1689..732058a84 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 4bc61692c..a07b1ab1b 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index c68472bab..667d0c06a 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 062795655..d21397d24 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 4c4446381..f49d0ad18 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 9434640a5..4094ba1ff 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index 7e9cb4c55..9c743f59a 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html index 5f95e8ef3..f2442b6c2 100644 --- a/docs/categories/notes/page/7/index.html +++ b/docs/categories/notes/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/8/index.html b/docs/categories/notes/page/8/index.html index 068f5e24d..b1caf7207 100644 --- a/docs/categories/notes/page/8/index.html +++ b/docs/categories/notes/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index b694b5973..a921b461f 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/10/index.html b/docs/page/10/index.html index 50229e347..a746d552d 100644 --- a/docs/page/10/index.html +++ b/docs/page/10/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/11/index.html b/docs/page/11/index.html index c3604fca4..09547b753 100644 --- a/docs/page/11/index.html +++ b/docs/page/11/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index b5fe8bf22..900aaa9c3 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index c66342954..15360d2c7 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index ba79d98f6..f460fc4d7 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index b0764c0e3..1027b422d 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 0dda8f937..64ac41cb5 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index 8513a5296..de975a4fd 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index 1a2e8cda4..fdac68c0c 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/9/index.html b/docs/page/9/index.html index 2762fe10e..6857e9f85 100644 --- a/docs/page/9/index.html +++ b/docs/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 7f5a69e03..003a2e41a 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/10/index.html b/docs/posts/page/10/index.html index b8cfb0b5d..597f541d9 100644 --- a/docs/posts/page/10/index.html +++ b/docs/posts/page/10/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/11/index.html b/docs/posts/page/11/index.html index 0e6245250..cc1e8bb8a 100644 --- a/docs/posts/page/11/index.html +++ b/docs/posts/page/11/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 4e586a2dc..95c4dc032 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index 75b691e0b..73ab3026c 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index bddac5b61..f95dbc3a8 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index f1be390f0..3708b7aba 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 3d55e72b7..03d48820a 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index 79c2926f2..2887e0c5d 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index a92373333..daa7f5f93 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html index 1c32bbafb..52908d89e 100644 --- a/docs/posts/page/9/index.html +++ b/docs/posts/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index cee0b4fa4..a9f58abfd 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2024-01-06T17:46:07+03:00 + 2024-01-07T20:43:02+03:00 https://alanorth.github.io/cgspace-notes/ - 2024-01-06T17:46:07+03:00 + 2024-01-07T20:43:02+03:00 https://alanorth.github.io/cgspace-notes/2024-01/ - 2024-01-06T17:46:07+03:00 + 2024-01-07T20:43:02+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2024-01-06T17:46:07+03:00 + 2024-01-07T20:43:02+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2024-01-06T17:46:07+03:00 + 2024-01-07T20:43:02+03:00 https://alanorth.github.io/cgspace-notes/2023-12/ 2023-12-29T12:08:57+03:00