From db726df881eb8565f043903e8e1e3975ab0444ef Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Tue, 31 Oct 2017 15:38:27 +0200 Subject: [PATCH] Update notes for 2017-10-31 --- content/post/2017-10.md | 16 ++++++++++++++++ public/2017-10/index.html | 22 +++++++++++++++++++--- public/sitemap.xml | 10 +++++----- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/content/post/2017-10.md b/content/post/2017-10.md index 9b662f3d4..9dc067dc9 100644 --- a/content/post/2017-10.md +++ b/content/post/2017-10.md @@ -322,3 +322,19 @@ WARNING: [SetPropertiesRule]{Server/Service/Engine/Host/Valve} Setting property - Very nice, Linode alerted that CGSpace had high CPU usage at 2AM again - Ask on the dspace-tech mailing list if it's possible to use an existing item as a template for a new item +- To follow up on the CORE bot traffic, there were almost 300,000 request yesterday: + +``` +# grep "CORE/0.6" /var/log/nginx/access.log.1 | awk '{print $1}' | sort -n | uniq -c | sort -h + 139109 137.108.70.6 + 139253 137.108.70.7 +``` + +- I've emailed the CORE people to ask if they can update the repository information from CGIAR Library to CGSpace +- Also, I asked if they could perhaps use the `sitemap.xml`, OAI-PMH, or REST APIs to index us more efficiently, because they mostly seem to be crawling the nearly endless Discovery facets +- I added [GoAccess](https://goaccess.io/) to the list of package to install in the DSpace role of the [Ansible infrastructure scripts](https://github.com/ilri/rmg-ansible-public) +- It makes it very easy to analyze nginx logs from the command line, to see where traffic is coming from: + +``` +# goaccess /var/log/nginx/access.log --log-format=COMBINED +``` diff --git a/public/2017-10/index.html b/public/2017-10/index.html index 0d94bbef6..329d75553 100644 --- a/public/2017-10/index.html +++ b/public/2017-10/index.html @@ -28,7 +28,7 @@ Add Katherine Lutz to the groups for content sumission and edit steps of the CGI - + @@ -66,9 +66,9 @@ Add Katherine Lutz to the groups for content sumission and edit steps of the CGI "@type": "BlogPosting", "headline": "October, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-10/", - "wordCount": "2340", + "wordCount": "2468", "datePublished": "2017-10-01T08:07:54+03:00", - "dateModified": "2017-10-31T11:35:24+02:00", + "dateModified": "2017-10-31T13:35:56+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -504,8 +504,24 @@ session_id=6C30F10B4351A4ED83EC6ED50AFD6B6A +
# grep "CORE/0.6" /var/log/nginx/access.log.1 | awk '{print $1}' | sort -n | uniq -c | sort -h
+ 139109 137.108.70.6
+ 139253 137.108.70.7
+
+ + + +
# goaccess /var/log/nginx/access.log --log-format=COMBINED
+
+ diff --git a/public/sitemap.xml b/public/sitemap.xml index 2e4e22f44..4d6b07bc1 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2017-10/ - 2017-10-31T11:35:24+02:00 + 2017-10-31T13:35:56+02:00 @@ -129,7 +129,7 @@ https://alanorth.github.io/cgspace-notes/ - 2017-10-31T11:35:24+02:00 + 2017-10-31T13:35:56+02:00 0 @@ -146,19 +146,19 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2017-10-31T11:35:24+02:00 + 2017-10-31T13:35:56+02:00 0 https://alanorth.github.io/cgspace-notes/post/ - 2017-10-31T11:35:24+02:00 + 2017-10-31T13:35:56+02:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2017-10-31T11:35:24+02:00 + 2017-10-31T13:35:56+02:00 0