diff --git a/content/posts/2022-07.md b/content/posts/2022-07.md
index 17db51db4..8ab9be97e 100644
--- a/content/posts/2022-07.md
+++ b/content/posts/2022-07.md
@@ -335,5 +335,23 @@ geo $ua {
- This allows me to accomplish the original goal while still only using one bot-networks.conf file for the `limit_req_zone` and the user agent mapping that we pass to Tomcat
- Unfortunately this means I will have hundreds of thousands of requests in Solr with a literal `$http_user_agent`
- I might try to purge some by enumerating all the networks in my block file and running them through `check-spider-ip-hits.sh`
+- I extracted all the IPs/subnets from `bot-networks.conf` and prepared them so I could enumerate their IPs
+ - I had to add `/32` to all single IPs, which I did with this crazy vim invocation:
+
+```console
+:g!/\/\d\+$/s/^\(\d\+\.\d\+\.\d\+\.\d\+\)$/\1\/32/
+```
+
+- Explanation:
+ - `g!`: global, lines *not* matching (the opposite of `g`)
+ - `/\/\d\+$/`, pattern matching `/` with one or more digits at the end of the line
+ - `s/^\(\d\+\.\d\+\.\d\+\.\d\+\)$/\1\/32/`, for lines not matching above, capture the IPv4 address and add `/32` at the end
+- Then I ran the list through prips to enumerate the IPs:
+
+```console
+$ while read -r line; do prips "$line" | sed -e '1d; $d'; done < /tmp/bot-networks.conf > /tmp/bot-ips.txt
+$ wc -l /tmp/bot-ips.txt
+1946968 /tmp/bot-ips.txt
+```
diff --git a/docs/2022-07/index.html b/docs/2022-07/index.html
index c9ec84202..848c43d96 100644
--- a/docs/2022-07/index.html
+++ b/docs/2022-07/index.html
@@ -19,7 +19,7 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens
-
+
@@ -44,9 +44,9 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens
"@type": "BlogPosting",
"headline": "July, 2022",
"url": "https://alanorth.github.io/cgspace-notes/2022-07/",
- "wordCount": "2156",
+ "wordCount": "2266",
"datePublished": "2022-07-02T14:07:36+03:00",
- "dateModified": "2022-07-17T22:45:16+03:00",
+ "dateModified": "2022-07-18T12:32:23+03:00",
"author": {
"@type": "Person",
"name": "Alan Orth"
@@ -501,8 +501,27 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens
I might try to purge some by enumerating all the networks in my block file and running them through check-spider-ip-hits.sh
+I extracted all the IPs/subnets from bot-networks.conf
and prepared them so I could enumerate their IPs
+
+- I had to add
/32
to all single IPs, which I did with this crazy vim invocation:
-
+
+
+:g!/\/\d\+$/s/^\(\d\+\.\d\+\.\d\+\.\d\+\)$/\1\/32/
+
+- Explanation:
+
+g!
: global, lines not matching (the opposite of g
)
+/\/\d\+$/
, pattern matching /
with one or more digits at the end of the line
+s/^\(\d\+\.\d\+\.\d\+\.\d\+\)$/\1\/32/
, for lines not matching above, capture the IPv4 address and add /32
at the end
+
+
+- Then I ran the list through prips to enumerate the IPs:
+
+$ while read -r line; do prips "$line" | sed -e '1d; $d'; done < /tmp/bot-networks.conf > /tmp/bot-ips.txt
+$ wc -l /tmp/bot-ips.txt
+1946968 /tmp/bot-ips.txt
+
diff --git a/docs/categories/index.html b/docs/categories/index.html
index 6002bcddb..ae4e4602a 100644
--- a/docs/categories/index.html
+++ b/docs/categories/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html
index 369b23af0..b1a7e0175 100644
--- a/docs/categories/notes/index.html
+++ b/docs/categories/notes/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html
index 71f31f177..0c6845648 100644
--- a/docs/categories/notes/page/2/index.html
+++ b/docs/categories/notes/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html
index de25155c0..0f3351aad 100644
--- a/docs/categories/notes/page/3/index.html
+++ b/docs/categories/notes/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html
index aa245dc46..1634ece89 100644
--- a/docs/categories/notes/page/4/index.html
+++ b/docs/categories/notes/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html
index 380a03c86..144711a18 100644
--- a/docs/categories/notes/page/5/index.html
+++ b/docs/categories/notes/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html
index b3eba0fd7..84ae6d816 100644
--- a/docs/categories/notes/page/6/index.html
+++ b/docs/categories/notes/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html
index 081af3025..796797dfd 100644
--- a/docs/categories/notes/page/7/index.html
+++ b/docs/categories/notes/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/index.html b/docs/index.html
index b14ac3726..e2429bec3 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/2/index.html b/docs/page/2/index.html
index 7bb02089f..dd3ba8e09 100644
--- a/docs/page/2/index.html
+++ b/docs/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/3/index.html b/docs/page/3/index.html
index 8c25e8ef7..1d2510117 100644
--- a/docs/page/3/index.html
+++ b/docs/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/4/index.html b/docs/page/4/index.html
index 70d129ee2..426b28c44 100644
--- a/docs/page/4/index.html
+++ b/docs/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/5/index.html b/docs/page/5/index.html
index a6f5c7bff..4d459354c 100644
--- a/docs/page/5/index.html
+++ b/docs/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/6/index.html b/docs/page/6/index.html
index b0719b969..133cd5689 100644
--- a/docs/page/6/index.html
+++ b/docs/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/7/index.html b/docs/page/7/index.html
index fc65a45ea..9a843246e 100644
--- a/docs/page/7/index.html
+++ b/docs/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/8/index.html b/docs/page/8/index.html
index 410cea6b1..631aa884a 100644
--- a/docs/page/8/index.html
+++ b/docs/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/9/index.html b/docs/page/9/index.html
index c4b902cc5..2cccf680d 100644
--- a/docs/page/9/index.html
+++ b/docs/page/9/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/index.html b/docs/posts/index.html
index 3ce7aa806..75887b5cb 100644
--- a/docs/posts/index.html
+++ b/docs/posts/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html
index 58011fee2..66b20da96 100644
--- a/docs/posts/page/2/index.html
+++ b/docs/posts/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html
index b09a58363..385d2ff11 100644
--- a/docs/posts/page/3/index.html
+++ b/docs/posts/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html
index 3887f7c06..46f827716 100644
--- a/docs/posts/page/4/index.html
+++ b/docs/posts/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html
index be099cd2d..4f114ddff 100644
--- a/docs/posts/page/5/index.html
+++ b/docs/posts/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html
index 38a07eb9a..555e110f4 100644
--- a/docs/posts/page/6/index.html
+++ b/docs/posts/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html
index ecbbf5746..9dd0bed47 100644
--- a/docs/posts/page/7/index.html
+++ b/docs/posts/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html
index 700fe5ba5..a3ddbd76d 100644
--- a/docs/posts/page/8/index.html
+++ b/docs/posts/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html
index 826087e8a..9c79413b9 100644
--- a/docs/posts/page/9/index.html
+++ b/docs/posts/page/9/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index a903298c9..32e40f5ce 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -3,19 +3,19 @@
xmlns:xhtml="http://www.w3.org/1999/xhtml">
https://alanorth.github.io/cgspace-notes/categories/
- 2022-07-17T22:45:16+03:00
+ 2022-07-18T12:32:23+03:00
https://alanorth.github.io/cgspace-notes/
- 2022-07-17T22:45:16+03:00
+ 2022-07-18T12:32:23+03:00
https://alanorth.github.io/cgspace-notes/2022-07/
- 2022-07-17T22:45:16+03:00
+ 2022-07-18T12:32:23+03:00
https://alanorth.github.io/cgspace-notes/categories/notes/
- 2022-07-17T22:45:16+03:00
+ 2022-07-18T12:32:23+03:00
https://alanorth.github.io/cgspace-notes/posts/
- 2022-07-17T22:45:16+03:00
+ 2022-07-18T12:32:23+03:00
https://alanorth.github.io/cgspace-notes/2022-06/
2022-07-04T09:25:14+03:00