diff --git a/content/post/2017-11.md b/content/post/2017-11.md index 571e92e63..772e67324 100644 --- a/content/post/2017-11.md +++ b/content/post/2017-11.md @@ -364,3 +364,15 @@ $ grep -E '2017-11-07 0[2-8]' dspace.log.2017-11-07 | grep -o -E 'ip_addr=[0-9.] # grep -E "07/Nov/2017:1[234]:" /var/log/nginx/access.log | grep 104.196.152.243 | grep -c pdf 0 ``` + +- About CIAT, I think I need to encourage them to specify a user agent string for their requests, because they are not reuising their Tomcat session and they are creating thousands of sessions per day +- All CIAT requests vs unique ones: + +``` +$ grep -Io -E 'session_id=[A-Z0-9]{32}:ip_addr=104.196.152.243' dspace.log.2017-11-07 | wc -l +3506 +$ grep -Io -E 'session_id=[A-Z0-9]{32}:ip_addr=104.196.152.243' dspace.log.2017-11-07 | sort | uniq | wc -l +3506 +``` + +- I emailed CIAT about the session issue, user agent issue, and told them they should not scrape the HTML contents of communities, instead using the REST API diff --git a/public/2017-11/index.html b/public/2017-11/index.html index 900f97e96..a5111a0d4 100644 --- a/public/2017-11/index.html +++ b/public/2017-11/index.html @@ -38,7 +38,7 @@ COPY 54701 - + @@ -86,9 +86,9 @@ COPY 54701 "@type": "BlogPosting", "headline": "November, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-11/", - "wordCount": "1905", + "wordCount": "1997", "datePublished": "2017-11-02T09:37:54+02:00", - "dateModified": "2017-11-07T14:50:01+02:00", + "dateModified": "2017-11-07T17:03:49+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -552,6 +552,21 @@ $ grep 104.196.152.243 dspace.log.2017-11-01 | grep -o -E 'session_id=[A-Z0-9]{3 0 + + +
$ grep -Io -E 'session_id=[A-Z0-9]{32}:ip_addr=104.196.152.243' dspace.log.2017-11-07 | wc -l
+3506
+$ grep -Io -E 'session_id=[A-Z0-9]{32}:ip_addr=104.196.152.243' dspace.log.2017-11-07 | sort | uniq | wc -l
+3506
+
+ + + diff --git a/public/robots.txt b/public/robots.txt index 51a461e57..b90b33492 100644 --- a/public/robots.txt +++ b/public/robots.txt @@ -29,7 +29,7 @@ Disallow: /cgspace-notes/2015-12/ Disallow: /cgspace-notes/2015-11/ Disallow: /cgspace-notes/ Disallow: /cgspace-notes/categories/ -Disallow: /cgspace-notes/tags/notes/ Disallow: /cgspace-notes/categories/notes/ +Disallow: /cgspace-notes/tags/notes/ Disallow: /cgspace-notes/post/ Disallow: /cgspace-notes/tags/ diff --git a/public/sitemap.xml b/public/sitemap.xml index ce470f6f2..403e64e7d 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2017-11/ - 2017-11-07T14:50:01+02:00 + 2017-11-07T17:03:49+02:00 @@ -134,7 +134,7 @@ https://alanorth.github.io/cgspace-notes/ - 2017-11-07T14:50:01+02:00 + 2017-11-07T17:03:49+02:00 0 @@ -143,27 +143,27 @@ 0 - - https://alanorth.github.io/cgspace-notes/tags/notes/ - 2017-11-07T14:50:01+02:00 - 0 - - https://alanorth.github.io/cgspace-notes/categories/notes/ 2017-09-28T12:00:49+03:00 0 + + https://alanorth.github.io/cgspace-notes/tags/notes/ + 2017-11-07T17:03:49+02:00 + 0 + + https://alanorth.github.io/cgspace-notes/post/ - 2017-11-07T14:50:01+02:00 + 2017-11-07T17:03:49+02:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2017-11-07T14:50:01+02:00 + 2017-11-07T17:03:49+02:00 0