diff --git a/content/posts/2019-03.md b/content/posts/2019-03.md index e44968d6a..cf0cda7ab 100644 --- a/content/posts/2019-03.md +++ b/content/posts/2019-03.md @@ -990,11 +990,11 @@ $ grep -I -c 45.5.184.72 dspace.log.2019-03-26 ![linode18 CPU usage after migration](/cgspace-notes/2019/03/cpu-week-migrated.png) - It is frustrating to see that the load spikes for own own legitimate load on the server were *very* aggravated and drawn out by the contention for CPU on this host -- We had almost 4.2 million hits this month according to the web server logs: +- We had 4.2 million hits this month according to the web server logs: ``` # time zcat --force /var/log/nginx/* | grep -cE "[0-9]{1,2}/Mar/2019" -4170986 +4218841 real 0m26.609s user 0m31.657s diff --git a/content/posts/2019-04.md b/content/posts/2019-04.md index 7bfe02148..2038631aa 100644 --- a/content/posts/2019-04.md +++ b/content/posts/2019-04.md @@ -7,6 +7,26 @@ tags: ["Notes"] ## 2019-04-01 +- Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc + - They asked if we had plans to enable RDF support in CGSpace +- There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today + - I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200! + +``` +# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5 + 4432 200 +``` + +- In the last two weeks there have been 47,000 downloads of this *same exact PDF* by these three IP addresses +- Apply country and region corrections and deletions on DSpace Test and CGSpace: + +``` +$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d +$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d +$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d +$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d +``` + ## 2019-04-02 diff --git a/docs/2019-03/index.html b/docs/2019-03/index.html index ba6f6f93d..343e47d95 100644 --- a/docs/2019-03/index.html +++ b/docs/2019-03/index.html @@ -25,7 +25,7 @@ I think I will need to ask Udana to re-copy and paste the abstracts with more ca - + @@ -55,9 +55,9 @@ I think I will need to ask Udana to re-copy and paste the abstracts with more ca "@type": "BlogPosting", "headline": "March, 2019", "url": "https://alanorth.github.io/cgspace-notes/2019-03/", - "wordCount": "7106", + "wordCount": "7105", "datePublished": "2019-03-01T12:16:30+01:00", - "dateModified": "2019-03-31T17:35:28+03:00", + "dateModified": "2019-04-01T09:02:18+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -1298,11 +1298,11 @@ $ ./delete-metadata-values.py -i /tmp/2019-03-26-AGROVOC-79-deletions.csv -db ds
# time zcat --force /var/log/nginx/* | grep -cE "[0-9]{1,2}/Mar/2019"
-4170986
+4218841
 
 real    0m26.609s
 user    0m31.657s
diff --git a/docs/2019-04/index.html b/docs/2019-04/index.html
index 089f24f9a..76b3c3a38 100644
--- a/docs/2019-04/index.html
+++ b/docs/2019-04/index.html
@@ -6,7 +6,35 @@
 
 
 
-
+
 
 
 
@@ -14,7 +42,35 @@
 
 
 
-
+
 
 
 
@@ -25,7 +81,7 @@
   "@type": "BlogPosting",
   "headline": "April, 2019",
   "url": "https://alanorth.github.io/cgspace-notes/2019-04/",
-  "wordCount": "2",
+  "wordCount": "188",
   "datePublished": "2019-04-01T09:00:43+03:00",
   "dateModified": "2019-04-01T09:01:43+03:00",
   "author": {
@@ -97,6 +153,34 @@
   
   

2019-04-01

+ + +
# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+   4432 200
+
+ + + +
$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
+

2019-04-02

diff --git a/docs/categories/index.html b/docs/categories/index.html index 86b078cf0..220c1c4e3 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -100,6 +100,34 @@

2019-04-01

+ + + +
# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+   4432 200
+
+ + + +
$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more → diff --git a/docs/index.html b/docs/index.html index 82fc9b3b3..1ecfd9461 100644 --- a/docs/index.html +++ b/docs/index.html @@ -102,6 +102,34 @@

2019-04-01

+ + + +
# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+   4432 200
+
+ + + +
$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more → diff --git a/docs/index.xml b/docs/index.xml index 6b0478b51..2eef14b50 100644 --- a/docs/index.xml +++ b/docs/index.xml @@ -17,7 +17,35 @@ Mon, 01 Apr 2019 09:00:43 +0300 https://alanorth.github.io/cgspace-notes/2019-04/ - <h2 id="2019-04-01">2019-04-01</h2> + <h2 id="2019-04-01">2019-04-01</h2> + +<ul> +<li>Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc + +<ul> +<li>They asked if we had plans to enable RDF support in CGSpace</li> +</ul></li> +<li>There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today + +<ul> +<li>I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!</li> +</ul></li> +</ul> + +<pre><code># cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5 + 4432 200 +</code></pre> + +<ul> +<li>In the last two weeks there have been 47,000 downloads of this <em>same exact PDF</em> by these three IP addresses</li> +<li>Apply country and region corrections and deletions on DSpace Test and CGSpace:</li> +</ul> + +<pre><code>$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d +$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d +$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d +$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d +</code></pre> diff --git a/docs/posts/index.html b/docs/posts/index.html index 31f4e9680..181beeeb9 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -102,6 +102,34 @@

2019-04-01

+ +
    +
  • Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc + +
      +
    • They asked if we had plans to enable RDF support in CGSpace
    • +
  • +
  • There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today + +
      +
    • I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
    • +
  • +
+ +
# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+   4432 200
+
+ +
    +
  • In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
  • +
  • Apply country and region corrections and deletions on DSpace Test and CGSpace:
  • +
+ +
$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more → diff --git a/docs/posts/index.xml b/docs/posts/index.xml index 7db9c2c87..44f852384 100644 --- a/docs/posts/index.xml +++ b/docs/posts/index.xml @@ -17,7 +17,35 @@ Mon, 01 Apr 2019 09:00:43 +0300 https://alanorth.github.io/cgspace-notes/2019-04/ - <h2 id="2019-04-01">2019-04-01</h2> + <h2 id="2019-04-01">2019-04-01</h2> + +<ul> +<li>Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc + +<ul> +<li>They asked if we had plans to enable RDF support in CGSpace</li> +</ul></li> +<li>There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today + +<ul> +<li>I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!</li> +</ul></li> +</ul> + +<pre><code># cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5 + 4432 200 +</code></pre> + +<ul> +<li>In the last two weeks there have been 47,000 downloads of this <em>same exact PDF</em> by these three IP addresses</li> +<li>Apply country and region corrections and deletions on DSpace Test and CGSpace:</li> +</ul> + +<pre><code>$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d +$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d +$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d +$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d +</code></pre>
diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 0b5be6adb..023f87fed 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -9,7 +9,7 @@ https://alanorth.github.io/cgspace-notes/2019-03/ - 2019-03-31T17:35:28+03:00 + 2019-04-01T09:02:18+03:00 diff --git a/docs/tags/index.html b/docs/tags/index.html index 96fba5cd8..530c99caf 100644 --- a/docs/tags/index.html +++ b/docs/tags/index.html @@ -102,6 +102,34 @@

2019-04-01

+ +
    +
  • Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc + +
      +
    • They asked if we had plans to enable RDF support in CGSpace
    • +
  • +
  • There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today + +
      +
    • I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
    • +
  • +
+ +
# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+   4432 200
+
+ +
    +
  • In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
  • +
  • Apply country and region corrections and deletions on DSpace Test and CGSpace:
  • +
+ +
$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more → diff --git a/docs/tags/notes/index.html b/docs/tags/notes/index.html index 5d6b03dc3..d3d7cb160 100644 --- a/docs/tags/notes/index.html +++ b/docs/tags/notes/index.html @@ -87,6 +87,34 @@

2019-04-01

+ +
    +
  • Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc + +
      +
    • They asked if we had plans to enable RDF support in CGSpace
    • +
  • +
  • There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today + +
      +
    • I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!
    • +
  • +
+ +
# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5
+   4432 200
+
+ +
    +
  • In the last two weeks there have been 47,000 downloads of this same exact PDF by these three IP addresses
  • +
  • Apply country and region corrections and deletions on DSpace Test and CGSpace:
  • +
+ +
$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d
+$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d
+$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d
+
Read more → diff --git a/docs/tags/notes/index.xml b/docs/tags/notes/index.xml index 4a29bd7c1..a25a4d4ed 100644 --- a/docs/tags/notes/index.xml +++ b/docs/tags/notes/index.xml @@ -17,7 +17,35 @@ Mon, 01 Apr 2019 09:00:43 +0300 https://alanorth.github.io/cgspace-notes/2019-04/ - <h2 id="2019-04-01">2019-04-01</h2> + <h2 id="2019-04-01">2019-04-01</h2> + +<ul> +<li>Meeting with AgroKnow to discuss CGSpace, ILRI data, AReS, GARDIAN, etc + +<ul> +<li>They asked if we had plans to enable RDF support in CGSpace</li> +</ul></li> +<li>There have been 4,400 more downloads of the CTA Spore publication from those strange Amazon IP addresses today + +<ul> +<li>I suspected that some might not be successful, because the stats show less, but today they were all HTTP 200!</li> +</ul></li> +</ul> + +<pre><code># cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep 'Spore-192-EN-web.pdf' | grep -E '(18.196.196.108|18.195.78.144|18.195.218.6)' | awk '{print $9}' | sort | uniq -c | sort -n | tail -n 5 + 4432 200 +</code></pre> + +<ul> +<li>In the last two weeks there have been 47,000 downloads of this <em>same exact PDF</em> by these three IP addresses</li> +<li>Apply country and region corrections and deletions on DSpace Test and CGSpace:</li> +</ul> + +<pre><code>$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-9-countries.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.country -m 228 -t ACTION -d +$ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u dspace -p 'fuuu' -f cg.coverage.region -m 231 -t action -d +$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d +$ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d +</code></pre>