diff --git a/content/posts/2019-03.md b/content/posts/2019-03.md index e44968d6a..cf0cda7ab 100644 --- a/content/posts/2019-03.md +++ b/content/posts/2019-03.md @@ -990,11 +990,11 @@ $ grep -I -c 45.5.184.72 dspace.log.2019-03-26 ![linode18 CPU usage after migration](/cgspace-notes/2019/03/cpu-week-migrated.png) - It is frustrating to see that the load spikes for own own legitimate load on the server were *very* aggravated and drawn out by the contention for CPU on this host -- We had almost 4.2 million hits this month according to the web server logs: +- We had 4.2 million hits this month according to the web server logs: ``` # time zcat --force /var/log/nginx/* | grep -cE "[0-9]{1,2}/Mar/2019" -4170986 +4218841 real 0m26.609s user 0m31.657s diff --git a/content/posts/2019-04.md b/content/posts/2019-04.md index 7bfe02148..2038631aa 100644 --- a/content/posts/2019-04.md +++ b/content/posts/2019-04.md @@ -7,6 +7,26 @@ tags: ["Notes"] ## 2019-04-01 +- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc + - They asked if we had plans to enable RDF support in CGSpace +- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today + - I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200! + +``` +# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5 + 4432 200 +``` + +- In the last two weeks there have been 47,000 downloads of this *same exact PDF* by these three IP addresses +- Apply country and region corrections and deletions on DSpace Test and CGSpace: + +``` +$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d +$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d +$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d +$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d +``` + ## 2019-04-02 diff --git a/docs/2019-03/index.html b/docs/2019-03/index.html index ba6f6f93d..343e47d95 100644 --- a/docs/2019-03/index.html +++ b/docs/2019-03/index.html @@ -25,7 +25,7 @@ I think I will need to ask Udana to re-copy and paste the abstracts with more ca - + @@ -55,9 +55,9 @@ I think I will need to ask Udana to re-copy and paste the abstracts with more ca "@type": "BlogPosting", "headline": "March, 2019", "url": "https://alanorth.github.io/cgspace-notes/2019-03/", - "wordCount": "7106", + "wordCount": "7105", "datePublished": "2019-03-01T12:16:30+01:00", - "dateModified": "2019-03-31T17:35:28+03:00", + "dateModified": "2019-04-01T09:02:18+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -1298,11 +1298,11 @@ $ ./delete-metadata-values.py -i /tmp/2019-03-26-AGROVOC-79-deletions.csv -db ds
# time zcat --force /var/log/nginx/* | grep -cE "[0-9]{1,2}/Mar/2019"
-4170986
+4218841
real 0m26.609s
user 0m31.657s
diff --git a/docs/2019-04/index.html b/docs/2019-04/index.html
index 089f24f9a..76b3c3a38 100644
--- a/docs/2019-04/index.html
+++ b/docs/2019-04/index.html
@@ -6,7 +6,35 @@
-
+
@@ -14,7 +42,35 @@
-
+
@@ -25,7 +81,7 @@
"@type": "BlogPosting",
"headline": "April, 2019",
"url": "https://alanorth.github.io/cgspace-notes/2019-04/",
- "wordCount": "2",
+ "wordCount": "188",
"datePublished": "2019-04-01T09:00:43+03:00",
"dateModified": "2019-04-01T09:01:43+03:00",
"author": {
@@ -97,6 +153,34 @@
2019-04-01
+
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+
+- They asked if we had plans to enable RDF support in CGSpace
+
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+
+- I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+
+
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+
+
+
+- In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
+
2019-04-02
diff --git a/docs/categories/index.html b/docs/categories/index.html
index 86b078cf0..220c1c4e3 100644
--- a/docs/categories/index.html
+++ b/docs/categories/index.html
@@ -100,6 +100,34 @@
2019-04-01
+
+
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+
+- They asked if we had plans to enable RDF support in CGSpace
+
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+
+- I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+
+
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+
+
+
+- In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more →
diff --git a/docs/index.html b/docs/index.html
index 82fc9b3b3..1ecfd9461 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -102,6 +102,34 @@
2019-04-01
+
+
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+
+- They asked if we had plans to enable RDF support in CGSpace
+
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+
+- I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+
+
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+
+
+
+- In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more →
diff --git a/docs/index.xml b/docs/index.xml
index 6b0478b51..2eef14b50 100644
--- a/docs/index.xml
+++ b/docs/index.xml
@@ -17,7 +17,35 @@
Mon, 01 Apr 2019 09:00:43 +0300
https://alanorth.github.io/cgspace-notes/2019-04/
- <h2 id="2019-04-01">2019-04-01</h2>
+ <h2 id="2019-04-01">2019-04-01</h2>
+
+<ul>
+<li>Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+<ul>
+<li>They asked if we had plans to enable RDF support in CGSpace</li>
+</ul></li>
+<li>There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+<ul>
+<li>I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!</li>
+</ul></li>
+</ul>
+
+<pre><code># cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+</code></pre>
+
+<ul>
+<li>In the last two weeks there have been 47,000 downloads of this <em>same exact PDF</em> by these three IP addresses</li>
+<li>Apply country and region corrections and deletions on DSpace Test and CGSpace:</li>
+</ul>
+
+<pre><code>$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+</code></pre>
-
diff --git a/docs/posts/index.html b/docs/posts/index.html
index 31f4e9680..181beeeb9 100644
--- a/docs/posts/index.html
+++ b/docs/posts/index.html
@@ -102,6 +102,34 @@
2019-04-01
+
+
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+
+- They asked if we had plans to enable RDF support in CGSpace
+
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+
+- I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+
+
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+
+
+
+- In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more →
diff --git a/docs/posts/index.xml b/docs/posts/index.xml
index 7db9c2c87..44f852384 100644
--- a/docs/posts/index.xml
+++ b/docs/posts/index.xml
@@ -17,7 +17,35 @@
Mon, 01 Apr 2019 09:00:43 +0300
https://alanorth.github.io/cgspace-notes/2019-04/
- <h2 id="2019-04-01">2019-04-01</h2>
+ <h2 id="2019-04-01">2019-04-01</h2>
+
+<ul>
+<li>Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+<ul>
+<li>They asked if we had plans to enable RDF support in CGSpace</li>
+</ul></li>
+<li>There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+<ul>
+<li>I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!</li>
+</ul></li>
+</ul>
+
+<pre><code># cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+</code></pre>
+
+<ul>
+<li>In the last two weeks there have been 47,000 downloads of this <em>same exact PDF</em> by these three IP addresses</li>
+<li>Apply country and region corrections and deletions on DSpace Test and CGSpace:</li>
+</ul>
+
+<pre><code>$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+</code></pre>
-
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 0b5be6adb..023f87fed 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -9,7 +9,7 @@
https://alanorth.github.io/cgspace-notes/2019-03/
- 2019-03-31T17:35:28+03:00
+ 2019-04-01T09:02:18+03:00
diff --git a/docs/tags/index.html b/docs/tags/index.html
index 96fba5cd8..530c99caf 100644
--- a/docs/tags/index.html
+++ b/docs/tags/index.html
@@ -102,6 +102,34 @@
2019-04-01
+
+
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+
+- They asked if we had plans to enable RDF support in CGSpace
+
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+
+- I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+
+
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+
+
+
+- In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more →
diff --git a/docs/tags/notes/index.html b/docs/tags/notes/index.html
index 5d6b03dc3..d3d7cb160 100644
--- a/docs/tags/notes/index.html
+++ b/docs/tags/notes/index.html
@@ -87,6 +87,34 @@
2019-04-01
+
+
+- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+
+- They asked if we had plans to enable RDF support in CGSpace
+
+- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+
+- I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
+
+
+
+# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+
+
+
+- In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
+- Apply country and region corrections and deletions on DSpace Test and CGSpace:
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more →
diff --git a/docs/tags/notes/index.xml b/docs/tags/notes/index.xml
index 4a29bd7c1..a25a4d4ed 100644
--- a/docs/tags/notes/index.xml
+++ b/docs/tags/notes/index.xml
@@ -17,7 +17,35 @@
Mon, 01 Apr 2019 09:00:43 +0300
https://alanorth.github.io/cgspace-notes/2019-04/
- <h2 id="2019-04-01">2019-04-01</h2>
+ <h2 id="2019-04-01">2019-04-01</h2>
+
+<ul>
+<li>Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc
+
+<ul>
+<li>They asked if we had plans to enable RDF support in CGSpace</li>
+</ul></li>
+<li>There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today
+
+<ul>
+<li>I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!</li>
+</ul></li>
+</ul>
+
+<pre><code># cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+ 4432 200
+</code></pre>
+
+<ul>
+<li>In the last two weeks there have been 47,000 downloads of this <em>same exact PDF</em> by these three IP addresses</li>
+<li>Apply country and region corrections and deletions on DSpace Test and CGSpace:</li>
+</ul>
+
+<pre><code>$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+</code></pre>
-