diff --git a/content/posts/2022-07.md b/content/posts/2022-07.md index 17db51db4..8ab9be97e 100644 --- a/content/posts/2022-07.md +++ b/content/posts/2022-07.md @@ -335,5 +335,23 @@ geo $ua { - This allows me to accomplish the original goal while still only using one bot-networks.conf file for the `limit_req_zone` and the user agent mapping that we pass to Tomcat - Unfortunately this means I will have hundreds of thousands of requests in Solr with a literal `$http_user_agent` - I might try to purge some by enumerating all the networks in my block file and running them through `check-spider-ip-hits.sh` +- I extracted all the IPs/subnets from `bot-networks.conf` and prepared them so I could enumerate their IPs + - I had to add `/32` to all single IPs, which I did with this crazy vim invocation: + +```console +:g!/\/\d\+$/s/^\(\d\+\.\d\+\.\d\+\.\d\+\)$/\1\/32/ +``` + +- Explanation: + - `g!`: global, lines *not* matching (the opposite of `g`) + - `/\/\d\+$/`, pattern matching `/` with one or more digits at the end of the line + - `s/^\(\d\+\.\d\+\.\d\+\.\d\+\)$/\1\/32/`, for lines not matching above, capture the IPv4 address and add `/32` at the end +- Then I ran the list through prips to enumerate the IPs: + +```console +$ while read -r line; do prips "$line" | sed -e '1d; $d'; done < /tmp/bot-networks.conf > /tmp/bot-ips.txt +$ wc -l /tmp/bot-ips.txt +1946968 /tmp/bot-ips.txt +``` diff --git a/docs/2022-07/index.html b/docs/2022-07/index.html index c9ec84202..848c43d96 100644 --- a/docs/2022-07/index.html +++ b/docs/2022-07/index.html @@ -19,7 +19,7 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens - + @@ -44,9 +44,9 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens "@type": "BlogPosting", "headline": "July, 2022", "url": "https://alanorth.github.io/cgspace-notes/2022-07/", - "wordCount": "2156", + "wordCount": "2266", "datePublished": "2022-07-02T14:07:36+03:00", - "dateModified": "2022-07-17T22:45:16+03:00", + "dateModified": "2022-07-18T12:32:23+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -501,8 +501,27 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens
  • I might try to purge some by enumerating all the networks in my block file and running them through check-spider-ip-hits.sh
  • +
  • I extracted all the IPs/subnets from bot-networks.conf and prepared them so I could enumerate their IPs + - +
  • + +
    :g!/\/\d\+$/s/^\(\d\+\.\d\+\.\d\+\.\d\+\)$/\1\/32/
    +
    +
    $ while read -r line; do prips "$line" | sed -e '1d; $d'; done < /tmp/bot-networks.conf > /tmp/bot-ips.txt
    +$ wc -l /tmp/bot-ips.txt                                                                                        
    +1946968 /tmp/bot-ips.txt
    +
    diff --git a/docs/categories/index.html b/docs/categories/index.html index 6002bcddb..ae4e4602a 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 369b23af0..b1a7e0175 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 71f31f177..0c6845648 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index de25155c0..0f3351aad 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index aa245dc46..1634ece89 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 380a03c86..144711a18 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index b3eba0fd7..84ae6d816 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html index 081af3025..796797dfd 100644 --- a/docs/categories/notes/page/7/index.html +++ b/docs/categories/notes/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index b14ac3726..e2429bec3 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 7bb02089f..dd3ba8e09 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 8c25e8ef7..1d2510117 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 70d129ee2..426b28c44 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index a6f5c7bff..4d459354c 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index b0719b969..133cd5689 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index fc65a45ea..9a843246e 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index 410cea6b1..631aa884a 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/9/index.html b/docs/page/9/index.html index c4b902cc5..2cccf680d 100644 --- a/docs/page/9/index.html +++ b/docs/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 3ce7aa806..75887b5cb 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 58011fee2..66b20da96 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index b09a58363..385d2ff11 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 3887f7c06..46f827716 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index be099cd2d..4f114ddff 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 38a07eb9a..555e110f4 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index ecbbf5746..9dd0bed47 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index 700fe5ba5..a3ddbd76d 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html index 826087e8a..9c79413b9 100644 --- a/docs/posts/page/9/index.html +++ b/docs/posts/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index a903298c9..32e40f5ce 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2022-07-17T22:45:16+03:00 + 2022-07-18T12:32:23+03:00 https://alanorth.github.io/cgspace-notes/ - 2022-07-17T22:45:16+03:00 + 2022-07-18T12:32:23+03:00 https://alanorth.github.io/cgspace-notes/2022-07/ - 2022-07-17T22:45:16+03:00 + 2022-07-18T12:32:23+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2022-07-17T22:45:16+03:00 + 2022-07-18T12:32:23+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2022-07-17T22:45:16+03:00 + 2022-07-18T12:32:23+03:00 https://alanorth.github.io/cgspace-notes/2022-06/ 2022-07-04T09:25:14+03:00