From 7d675f93f8abb176169e2e7ddcd383c5a9459ac3 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Sat, 12 Aug 2017 18:32:45 +0300 Subject: [PATCH] Add notes for 2017-08-12 --- content/post/2017-08.md | 21 +++++++++++++++++++++ public/2017-08/index.html | 27 ++++++++++++++++++++++++--- public/sitemap.xml | 10 +++++----- 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/content/post/2017-08.md b/content/post/2017-08.md index 84bd52515..562b5595c 100644 --- a/content/post/2017-08.md +++ b/content/post/2017-08.md @@ -94,3 +94,24 @@ dspace#= \copy (select distinct text_value, count(*) from metadatavalue where me ## 2017-08-12 - I sent a message to the mailing list about the duplicate content issue with `/rest` and `/bitstream` URLs +- Looking at the logs for the REST API on `/rest`, it looks like there is someone hammering doing testing or something on it... + +``` +# awk '{print $1}' /var/log/nginx/rest.log.1 | sort -n | uniq -c | sort -h | tail -n 5 + 140 66.249.66.91 + 404 66.249.66.90 + 1479 50.116.102.77 + 9794 45.5.184.196 + 85736 70.32.83.92 +``` + +- The top offender is 70.32.83.92 which is actually the same IP as ccafs.cgiar.org, so I will email the Macaroni Bros to see if they can test on DSpace Test instead +- I've enabled logging of `/oai` requests on nginx as well so we can potentially determine bad actors here (also to see if anyone is actually using OAI!) + +``` + # log oai requests + location /oai { + access_log /var/log/nginx/oai.log; + proxy_pass http://tomcat_http; + } +``` diff --git a/public/2017-08/index.html b/public/2017-08/index.html index f2869d7a8..4c69a3169 100644 --- a/public/2017-08/index.html +++ b/public/2017-08/index.html @@ -37,7 +37,7 @@ Then I cleaned up the author authorities and HTML characters in OpenRefine and s - + @@ -85,9 +85,9 @@ Then I cleaned up the author authorities and HTML characters in OpenRefine and s "@type": "BlogPosting", "headline": "August, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-08/", - "wordCount": "1207", + "wordCount": "1327", "datePublished": "2017-08-01T11:51:52+03:00", - "dateModified": "2017-08-12T08:40:59+03:00", + "dateModified": "2017-08-12T09:29:02+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -268,8 +268,29 @@ Then I cleaned up the author authorities and HTML characters in OpenRefine and s +
# awk '{print $1}' /var/log/nginx/rest.log.1 | sort -n | uniq -c | sort -h | tail -n 5
+    140 66.249.66.91
+    404 66.249.66.90
+   1479 50.116.102.77
+   9794 45.5.184.196
+  85736 70.32.83.92
+
+ + + +
    # log oai requests
+    location /oai {
+        access_log /var/log/nginx/oai.log;
+        proxy_pass http://tomcat_http;
+    }
+
+ diff --git a/public/sitemap.xml b/public/sitemap.xml index 8cc9ab3d9..af49ecfb6 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2017-08/ - 2017-08-12T08:40:59+03:00 + 2017-08-12T09:29:02+03:00 @@ -114,7 +114,7 @@ https://alanorth.github.io/cgspace-notes/ - 2017-08-12T08:40:59+03:00 + 2017-08-12T09:29:02+03:00 0 @@ -125,19 +125,19 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2017-08-12T08:40:59+03:00 + 2017-08-12T09:29:02+03:00 0 https://alanorth.github.io/cgspace-notes/post/ - 2017-08-12T08:40:59+03:00 + 2017-08-12T09:29:02+03:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2017-08-12T08:40:59+03:00 + 2017-08-12T09:29:02+03:00 0