From 9798063ec88c898dd97ac7d4c242392d0ef5ea3e Mon Sep 17 00:00:00 2001
From: Alan Orth
Date: Sat, 9 Nov 2019 21:41:15 +0200
Subject: [PATCH] Update notes for 2019-11-09
---
content/posts/2019-11.md | 15 +++++++++++
docs/2019-11/index.html | 57 +++++++++++++++++++++++-----------------
docs/sitemap.xml | 10 +++----
3 files changed, 53 insertions(+), 29 deletions(-)
diff --git a/content/posts/2019-11.md b/content/posts/2019-11.md
index dd8db36ff..b35e9df56 100644
--- a/content/posts/2019-11.md
+++ b/content/posts/2019-11.md
@@ -246,4 +246,19 @@ $ curl -s 'http://localhost:8081/solr/statistics/select?facet=true&facet.field=d
- `User-Agent:Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) IKU/7.0.5.9226;IKUCID/IKU;`
- I filed [an issue](https://github.com/atmire/COUNTER-Robots/issues/27) on the COUNTER-Robots project to see if they agree to add `User-Agent:` to the list of robot user agents
+## 2019-11-09
+
+- Deploy the latest `5_x-prod` branch on CGSpace (linode19)
+ - This includes the updated CCAFS phase II project tags and the updated spider user agents
+- Run all system updates on CGSpace and reboot the server
+ - After rebooting it seems that all Solr statistics cores came back up fine...
+- I did some work to clean up my bot processing script and removed about 2 million hits from the statistics cores on CGSpace
+ - The script is called `check-spider-hits.sh`
+ - After a bunch of tests and checks I ran it for each statistics shard like so:
+
+```
+$ for shard in statistics statistics-2018 statistics-2017 statistics-2016 statistics-2015 stat
+istics-2014 statistics-2013 statistics-2012 statistics-2011 statistics-2010; do ./check-spider-hits.sh -s $shard -p yes; done
+```
+
diff --git a/docs/2019-11/index.html b/docs/2019-11/index.html
index 4edf477d9..283c1139a 100644
--- a/docs/2019-11/index.html
+++ b/docs/2019-11/index.html
@@ -34,7 +34,7 @@ Let’s see how many of the REST API requests were for bitstreams (because t
-
+
@@ -73,9 +73,9 @@ Let’s see how many of the REST API requests were for bitstreams (because t
"@type": "BlogPosting",
"headline": "November, 2019",
"url": "https:\/\/alanorth.github.io\/cgspace-notes\/2019-11\/",
- "wordCount": "1442",
+ "wordCount": "1573",
"datePublished": "2019-11-04T12:20:30+02:00",
- "dateModified": "2019-11-08T09:27:13+02:00",
+ "dateModified": "2019-11-08T18:29:25+02:00",
"author": {
"@type": "Person",
"name": "Alan Orth"
@@ -390,29 +390,38 @@ $ http –print b ‘36512
760
-
+
+
+
+- That answers Peter's question about why the stats jumped in October...
+
+## 2019-11-08
+
+- I saw a bunch of user agents that have the literal string `User-Agent` in their user agent HTTP header, for example:
+ - `User-Agent: Drupal (+http://drupal.org/)`
+ - `User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31`
+ - `User-Agent:Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) IKU/7.0.5.9226;IKUCID/IKU;`
+ - `User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)`
+ - `User-Agent:User-Agent:Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; GTB7.5; .NET4.0C)IKU/6.7.6.12189;IKUCID/IKU;IKU/6.7.6.12189;IKUCID/IKU;`
+ - `User-Agent:Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) IKU/7.0.5.9226;IKUCID/IKU;`
+- I filed [an issue](https://github.com/atmire/COUNTER-Robots/issues/27) on the COUNTER-Robots project to see if they agree to add `User-Agent:` to the list of robot user agents
+
+## 2019-11-09
+
+- Deploy the latest `5_x-prod` branch on CGSpace (linode19)
+ - This includes the updated CCAFS phase II project tags and the updated spider user agents
+- Run all system updates on CGSpace and reboot the server
+ - After rebooting it seems that all Solr statistics cores came back up fine...
+- I did some work to clean up my bot processing script and removed about 2 million hits from the statistics cores on CGSpace
+ - The script is called `check-spider-hits.sh`
+ - After a bunch of tests and checks I ran it for each statistics shard like so:
+
+
+
+$ for shard in statistics statistics-2018 statistics-2017 statistics-2016 statistics-2015 stat
+istics-2014 statistics-2013 statistics-2012 statistics-2011 statistics-2010; do ./check-spider-hits.sh -s $shard -p yes; done
```
-
-- That answers Peter’s question about why the stats jumped in October…
-
-
-2019-11-08
-
-
-- I saw a bunch of user agents that have the literal string
User-Agent
in their user agent HTTP header, for example:
-
-
-User-Agent: Drupal (+http://drupal.org/)
-User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31
-User-Agent:Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) IKU/7.0.5.9226;IKUCID/IKU;
-User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)
-User-Agent:User-Agent:Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; GTB7.5; .NET4.0C)IKU/6.7.6.12189;IKUCID/IKU;IKU/6.7.6.12189;IKUCID/IKU;
-User-Agent:Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) IKU/7.0.5.9226;IKUCID/IKU;
-
-- I filed an issue on the COUNTER-Robots project to see if they agree to add
User-Agent:
to the list of robot user agents
-
-
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 9c93fa048..22bc87782 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -4,27 +4,27 @@
https://alanorth.github.io/cgspace-notes/categories/
- 2019-11-08T09:27:13+02:00
+ 2019-11-08T18:29:25+02:00
https://alanorth.github.io/cgspace-notes/
- 2019-11-08T09:27:13+02:00
+ 2019-11-08T18:29:25+02:00
https://alanorth.github.io/cgspace-notes/categories/notes/
- 2019-11-08T09:27:13+02:00
+ 2019-11-08T18:29:25+02:00
https://alanorth.github.io/cgspace-notes/2019-11/
- 2019-11-08T09:27:13+02:00
+ 2019-11-08T18:29:25+02:00
https://alanorth.github.io/cgspace-notes/posts/
- 2019-11-08T09:27:13+02:00
+ 2019-11-08T18:29:25+02:00