diff --git a/content/post/2017-07.md b/content/post/2017-07.md index 4b0dcf554..cc1af5929 100644 --- a/content/post/2017-07.md +++ b/content/post/2017-07.md @@ -139,3 +139,11 @@ delete from metadatavalue where resource_type_id=2 and metadata_field_id=235 and - Now just waiting to run them on CGSpace, and then apply the modified input forms after Macaroni Bros give me an updated list - Temporarily increase the nginx upload limit to 200MB for Sisay to upload the CIAT presentations +- Looking at CGSpace activity page, there are 52 Baidu bots concurrently crawling our website (I copied the activity page to a text file and grep it)! + +``` +$ grep 180.76. /tmp/status | awk '{print $5}' | sort | uniq | wc -l +52 +``` + +- From looking at the `dspace.log` I see they are all using the same session, which means our Crawler Session Manager Valve is working diff --git a/public/2017-07/index.html b/public/2017-07/index.html index eb1e377da..a0b590c6b 100644 --- a/public/2017-07/index.html +++ b/public/2017-07/index.html @@ -27,7 +27,7 @@ We can use PostgreSQL’s extended output format (-x) plus sed to format the - + @@ -73,9 +73,9 @@ We can use PostgreSQL’s extended output format (-x) plus sed to format the "@type": "BlogPosting", "headline": "July, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-07/", - "wordCount": "1086", + "wordCount": "1151", "datePublished": "2017-07-01T18:03:52+03:00", - "dateModified": "2017-07-30T14:18:23+03:00", + "dateModified": "2017-07-31T12:06:21+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -313,6 +313,15 @@ delete from metadatavalue where resource_type_id=2 and metadata_field_id=235 and + +
$ grep 180.76. /tmp/status | awk '{print $5}' | sort | uniq | wc -l
+52
+
+ + diff --git a/public/sitemap.xml b/public/sitemap.xml index 61f449f2b..f16967d98 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2017-07/ - 2017-07-30T14:18:23+03:00 + 2017-07-31T12:06:21+03:00 @@ -109,7 +109,7 @@ https://alanorth.github.io/cgspace-notes/ - 2017-07-30T14:18:23+03:00 + 2017-07-31T12:06:21+03:00 0 @@ -120,19 +120,19 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2017-07-30T14:18:23+03:00 + 2017-07-31T12:06:21+03:00 0 https://alanorth.github.io/cgspace-notes/post/ - 2017-07-30T14:18:23+03:00 + 2017-07-31T12:06:21+03:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2017-07-30T14:18:23+03:00 + 2017-07-31T12:06:21+03:00 0