diff --git a/content/post/2017-08.md b/content/post/2017-08.md index 84bd52515..562b5595c 100644 --- a/content/post/2017-08.md +++ b/content/post/2017-08.md @@ -94,3 +94,24 @@ dspace#= \copy (select distinct text_value, count(*) from metadatavalue where me ## 2017-08-12 - I sent a message to the mailing list about the duplicate content issue with `/rest` and `/bitstream` URLs +- Looking at the logs for the REST API on `/rest`, it looks like there is someone hammering doing testing or something on it... + +``` +# awk '{print $1}' /var/log/nginx/rest.log.1 | sort -n | uniq -c | sort -h | tail -n 5 + 140 66.249.66.91 + 404 66.249.66.90 + 1479 50.116.102.77 + 9794 45.5.184.196 + 85736 70.32.83.92 +``` + +- The top offender is 70.32.83.92 which is actually the same IP as ccafs.cgiar.org, so I will email the Macaroni Bros to see if they can test on DSpace Test instead +- I've enabled logging of `/oai` requests on nginx as well so we can potentially determine bad actors here (also to see if anyone is actually using OAI!) + +``` + # log oai requests + location /oai { + access_log /var/log/nginx/oai.log; + proxy_pass http://tomcat_http; + } +``` diff --git a/public/2017-08/index.html b/public/2017-08/index.html index f2869d7a8..4c69a3169 100644 --- a/public/2017-08/index.html +++ b/public/2017-08/index.html @@ -37,7 +37,7 @@ Then I cleaned up the author authorities and HTML characters in OpenRefine and s - + @@ -85,9 +85,9 @@ Then I cleaned up the author authorities and HTML characters in OpenRefine and s "@type": "BlogPosting", "headline": "August, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-08/", - "wordCount": "1207", + "wordCount": "1327", "datePublished": "2017-08-01T11:51:52+03:00", - "dateModified": "2017-08-12T08:40:59+03:00", + "dateModified": "2017-08-12T09:29:02+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -268,8 +268,29 @@ Then I cleaned up the author authorities and HTML characters in OpenRefine and s
/rest
and /bitstream
URLs/rest
, it looks like there is someone hammering doing testing or something on it…# awk '{print $1}' /var/log/nginx/rest.log.1 | sort -n | uniq -c | sort -h | tail -n 5
+ 140 66.249.66.91
+ 404 66.249.66.90
+ 1479 50.116.102.77
+ 9794 45.5.184.196
+ 85736 70.32.83.92
+
+
+/oai
requests on nginx as well so we can potentially determine bad actors here (also to see if anyone is actually using OAI!) # log oai requests
+ location /oai {
+ access_log /var/log/nginx/oai.log;
+ proxy_pass http://tomcat_http;
+ }
+
+
diff --git a/public/sitemap.xml b/public/sitemap.xml
index 8cc9ab3d9..af49ecfb6 100644
--- a/public/sitemap.xml
+++ b/public/sitemap.xml
@@ -4,7 +4,7 @@