diff --git a/content/post/2017-10.md b/content/post/2017-10.md index 9dc067dc9..afa158e1b 100644 --- a/content/post/2017-10.md +++ b/content/post/2017-10.md @@ -338,3 +338,18 @@ WARNING: [SetPropertiesRule]{Server/Service/Engine/Host/Valve} Setting property ``` # goaccess /var/log/nginx/access.log --log-format=COMBINED ``` + +- According to Uptime Robot CGSpace went down and up a few times +- I had a look at goaccess and I saw that CORE was actively indexing +- Also, PostgreSQL connections were at 91 (with the max being 60 per web app, hmmm) +- I'm really starting to get annoyed with these guys, and thinking about blocking their IP address for a few days to see if CGSpace becomes more stable +- Actually, come to think of it, they aren't even obeying `robots.txt`, because we actually disallow `/discover` and `/search-filter` URLs but they are hitting those massively: + +``` +# grep "CORE/0.6" /var/log/nginx/access.log | grep -o -E "GET /(discover|search-filter)" | sort -n | uniq -c | sort -rn + 158058 GET /discover + 14260 GET /search-filter +``` + +- I tested a URL of pattern `/discover` in Google's webmaster tools and it was indeed identified as blocked +- I will send feedback to the CORE bot team diff --git a/public/2017-10/index.html b/public/2017-10/index.html index 329d75553..0a0ddfd6e 100644 --- a/public/2017-10/index.html +++ b/public/2017-10/index.html @@ -28,7 +28,7 @@ Add Katherine Lutz to the groups for content sumission and edit steps of the CGI - + @@ -66,9 +66,9 @@ Add Katherine Lutz to the groups for content sumission and edit steps of the CGI "@type": "BlogPosting", "headline": "October, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-10/", - "wordCount": "2468", + "wordCount": "2613", "datePublished": "2017-10-01T08:07:54+03:00", - "dateModified": "2017-10-31T13:35:56+02:00", + "dateModified": "2017-10-31T15:38:27+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -522,6 +522,24 @@ session_id=6C30F10B4351A4ED83EC6ED50AFD6B6A
# goaccess /var/log/nginx/access.log --log-format=COMBINED
 
+ + +
# grep "CORE/0.6" /var/log/nginx/access.log | grep -o -E "GET /(discover|search-filter)" | sort -n | uniq -c | sort -rn 
+ 158058 GET /discover
+  14260 GET /search-filter
+
+ + + diff --git a/public/robots.txt b/public/robots.txt index b730f0cd9..33d937c67 100644 --- a/public/robots.txt +++ b/public/robots.txt @@ -28,7 +28,7 @@ Disallow: /cgspace-notes/2015-12/ Disallow: /cgspace-notes/2015-11/ Disallow: /cgspace-notes/ Disallow: /cgspace-notes/categories/ -Disallow: /cgspace-notes/categories/notes/ Disallow: /cgspace-notes/tags/notes/ +Disallow: /cgspace-notes/categories/notes/ Disallow: /cgspace-notes/post/ Disallow: /cgspace-notes/tags/ diff --git a/public/sitemap.xml b/public/sitemap.xml index 4d6b07bc1..124bdf3ad 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2017-10/ - 2017-10-31T13:35:56+02:00 + 2017-10-31T15:38:27+02:00 @@ -129,7 +129,7 @@ https://alanorth.github.io/cgspace-notes/ - 2017-10-31T13:35:56+02:00 + 2017-10-31T15:38:27+02:00 0 @@ -138,27 +138,27 @@ 0 + + https://alanorth.github.io/cgspace-notes/tags/notes/ + 2017-10-31T15:38:27+02:00 + 0 + + https://alanorth.github.io/cgspace-notes/categories/notes/ 2017-09-28T12:00:49+03:00 0 - - https://alanorth.github.io/cgspace-notes/tags/notes/ - 2017-10-31T13:35:56+02:00 - 0 - - https://alanorth.github.io/cgspace-notes/post/ - 2017-10-31T13:35:56+02:00 + 2017-10-31T15:38:27+02:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2017-10-31T13:35:56+02:00 + 2017-10-31T15:38:27+02:00 0