From d3203216c55bd8c228051dda9944e847367a4689 Mon Sep 17 00:00:00 2001
From: Alan Orth
Date: Mon, 1 Apr 2019 17:02:54 +0300
Subject: [PATCH] Update notes for 2019-04-01
---
content/posts/2019-03.md | 4 +-
content/posts/2019-04.md | 20 +++++++++
docs/2019-03/index.html | 10 ++---
docs/2019-04/index.html | 90 ++++++++++++++++++++++++++++++++++++--
docs/categories/index.html | 28 ++++++++++++
docs/index.html | 28 ++++++++++++
docs/index.xml | 30 ++++++++++++-
docs/posts/index.html | 28 ++++++++++++
docs/posts/index.xml | 30 ++++++++++++-
docs/sitemap.xml | 2 +-
docs/tags/index.html | 28 ++++++++++++
docs/tags/notes/index.html | 28 ++++++++++++
docs/tags/notes/index.xml | 30 ++++++++++++-
13 files changed, 342 insertions(+), 14 deletions(-)
diff --git a/content/posts/2019-03.md b/content/posts/2019-03.md
index e44968d6a..cf0cda7ab 100644
--- a/content/posts/2019-03.md
+++ b/content/posts/2019-03.md
@@ -990,11 +990,11 @@ $ grep -I -c 45.5.184.72 dspace.log.2019-03-26
![linode18 CPU usage after migration](/cgspace-notes/2019/03/cpu-week-migrated.png)
- It is frustrating to see that the load spikes for own own legitimate load on the server were *very* aggravated and drawn out by the contention for CPU on this host
-- We had almost 4.2 million hits this month according to the web server logs:
+- We had 4.2 million hits this month according to the web server logs:
```
# time zcat --force /var/log/nginx/* | grep -cE "[0-9]{1,2}/Mar/2019"
-4170986
+4218841
real 0m26.609s
user 0m31.657s
diff --git a/content/posts/2019-04.md b/content/posts/2019-04.md
index 7bfe02148..2038631aa 100644
--- a/content/posts/2019-04.md
+++ b/content/posts/2019-04.md
@@ -7,6 +7,26 @@ tags: ["Notes"]
## 2019-04-01
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+ - They asked if we had plans to enable RDF support in CGSpace
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+ - I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+```
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+```
+
+- In the last two weeks there have been 47,000 downloads of this *same exact PDF* by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+```
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+```
+
## 2019-04-02
diff --git a/docs/2019-03/index.html b/docs/2019-03/index.html
index ba6f6f93d..343e47d95 100644
--- a/docs/2019-03/index.html
+++ b/docs/2019-03/index.html
@@ -25,7 +25,7 @@ I think I will need to ask Udana to re-copy and paste the abstracts with more ca
-
+
@@ -55,9 +55,9 @@ I think I will need to ask Udana to re-copy and paste the abstracts with more ca
"@type": "BlogPosting",
"headline": "March, 2019",
"url": "https://alanorth.github.io/cgspace-notes/2019-03/",
- "wordCount": "7106",
+ "wordCount": "7105",
"datePublished": "2019-03-01T12:16:30+01:00",
- "dateModified": "2019-03-31T17:35:28+03:00",
+ "dateModified": "2019-04-01T09:02:18+03:00",
"author": {
"@type": "Person",
"name": "Alan Orth"
@@ -1298,11 +1298,11 @@ $ ./delete-metadata-values.py -i /tmp/2019-03-26-AGROVOC-79-deletions.csv -db ds
- It is frustrating to see that the load spikes for own own legitimate load on the server were very aggravated and drawn out by the contention for CPU on this host
-- We had almost 4.2 million hits this month according to the web server logs:
+- We had 4.2 million hits this month according to the web server logs:
# time zcat --force /var/log/nginx/* | grep -cE "[0-9]{1,2}/Mar/2019"
-4170986
+4218841
real 0m26.609s
user 0m31.657s
diff --git a/docs/2019-04/index.html b/docs/2019-04/index.html
index 089f24f9a..76b3c3a38 100644
--- a/docs/2019-04/index.html
+++ b/docs/2019-04/index.html
@@ -6,7 +6,35 @@
-
+
@@ -14,7 +42,35 @@
-
+
@@ -25,7 +81,7 @@
"@type": "BlogPosting",
"headline": "April, 2019",
"url": "https://alanorth.github.io/cgspace-notes/2019-04/",
- "wordCount": "2",
+ "wordCount": "188",
"datePublished": "2019-04-01T09:00:43+03:00",
"dateModified": "2019-04-01T09:01:43+03:00",
"author": {
@@ -97,6 +153,34 @@
2019-04-01
+
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+
+- They asked if we had plans to enable RDF support in CGSpace
+
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+
+- I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+
+
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+
+
+
+- In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
+
2019-04-02
diff --git a/docs/categories/index.html b/docs/categories/index.html
index 86b078cf0..220c1c4e3 100644
--- a/docs/categories/index.html
+++ b/docs/categories/index.html
@@ -100,6 +100,34 @@
2019-04-01
+
+
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+
+- They asked if we had plans to enable RDF support in CGSpace
+
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+
+- I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+
+
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+
+
+
+- In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more →
diff --git a/docs/index.html b/docs/index.html
index 82fc9b3b3..1ecfd9461 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -102,6 +102,34 @@
2019-04-01
+
+
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+
+- They asked if we had plans to enable RDF support in CGSpace
+
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+
+- I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+
+
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+
+
+
+- In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more →
diff --git a/docs/index.xml b/docs/index.xml
index 6b0478b51..2eef14b50 100644
--- a/docs/index.xml
+++ b/docs/index.xml
@@ -17,7 +17,35 @@
Mon, 01 Apr 2019 09:00:43 +0300
https://alanorth.github.io/cgspace-notes/2019-04/
- <h2 id="2019-04-01">2019-04-01</h2>
+ <h2 id="2019-04-01">2019-04-01</h2>
+
+<ul>
+<li>Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+<ul>
+<li>They asked if we had plans to enable RDF support in CGSpace</li>
+</ul></li>
+<li>There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+<ul>
+<li>I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!</li>
+</ul></li>
+</ul>
+
+<pre><code># cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+</code></pre>
+
+<ul>
+<li>In the last two weeks there have been 47,000 downloads of this <em>same exact PDF</em> by these three IP addresses</li>
+<li>Apply country and region corrections and deletions on DSpace Test and CGSpace:</li>
+</ul>
+
+<pre><code>$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+</code></pre>
-
diff --git a/docs/posts/index.html b/docs/posts/index.html
index 31f4e9680..181beeeb9 100644
--- a/docs/posts/index.html
+++ b/docs/posts/index.html
@@ -102,6 +102,34 @@
2019-04-01
+
+
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+
+- They asked if we had plans to enable RDF support in CGSpace
+
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+
+- I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+
+
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+
+
+
+- In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more →
diff --git a/docs/posts/index.xml b/docs/posts/index.xml
index 7db9c2c87..44f852384 100644
--- a/docs/posts/index.xml
+++ b/docs/posts/index.xml
@@ -17,7 +17,35 @@
Mon, 01 Apr 2019 09:00:43 +0300
https://alanorth.github.io/cgspace-notes/2019-04/
- <h2 id="2019-04-01">2019-04-01</h2>
+ <h2 id="2019-04-01">2019-04-01</h2>
+
+<ul>
+<li>Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+<ul>
+<li>They asked if we had plans to enable RDF support in CGSpace</li>
+</ul></li>
+<li>There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+<ul>
+<li>I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!</li>
+</ul></li>
+</ul>
+
+<pre><code># cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+</code></pre>
+
+<ul>
+<li>In the last two weeks there have been 47,000 downloads of this <em>same exact PDF</em> by these three IP addresses</li>
+<li>Apply country and region corrections and deletions on DSpace Test and CGSpace:</li>
+</ul>
+
+<pre><code>$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+</code></pre>
-
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 0b5be6adb..023f87fed 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -9,7 +9,7 @@
https://alanorth.github.io/cgspace-notes/2019-03/
- 2019-03-31T17:35:28+03:00
+ 2019-04-01T09:02:18+03:00
diff --git a/docs/tags/index.html b/docs/tags/index.html
index 96fba5cd8..530c99caf 100644
--- a/docs/tags/index.html
+++ b/docs/tags/index.html
@@ -102,6 +102,34 @@
2019-04-01
+
+
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+
+- They asked if we had plans to enable RDF support in CGSpace
+
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+
+- I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+
+
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+
+
+
+- In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more →
diff --git a/docs/tags/notes/index.html b/docs/tags/notes/index.html
index 5d6b03dc3..d3d7cb160 100644
--- a/docs/tags/notes/index.html
+++ b/docs/tags/notes/index.html
@@ -87,6 +87,34 @@
2019-04-01
+
+
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+
+- They asked if we had plans to enable RDF support in CGSpace
+
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+
+- I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+
+
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+
+
+
+- In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more →
diff --git a/docs/tags/notes/index.xml b/docs/tags/notes/index.xml
index 4a29bd7c1..a25a4d4ed 100644
--- a/docs/tags/notes/index.xml
+++ b/docs/tags/notes/index.xml
@@ -17,7 +17,35 @@
Mon, 01 Apr 2019 09:00:43 +0300
https://alanorth.github.io/cgspace-notes/2019-04/
- <h2 id="2019-04-01">2019-04-01</h2>
+ <h2 id="2019-04-01">2019-04-01</h2>
+
+<ul>
+<li>Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+<ul>
+<li>They asked if we had plans to enable RDF support in CGSpace</li>
+</ul></li>
+<li>There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+<ul>
+<li>I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!</li>
+</ul></li>
+</ul>
+
+<pre><code># cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+</code></pre>
+
+<ul>
+<li>In the last two weeks there have been 47,000 downloads of this <em>same exact PDF</em> by these three IP addresses</li>
+<li>Apply country and region corrections and deletions on DSpace Test and CGSpace:</li>
+</ul>
+
+<pre><code>$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+</code></pre>
-