From a13c5e93b6b45cafd20dd9bbe7370d441bd75065 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 13 Sep 2017 16:42:17 +0300 Subject: [PATCH] Add notes for 2017-09-13 --- content/post/2017-09.md | 46 ++++++++++++++++++++++++++++++++ public/2017-09/index.html | 56 ++++++++++++++++++++++++++++++++++++--- public/sitemap.xml | 10 +++---- 3 files changed, 104 insertions(+), 8 deletions(-) diff --git a/content/post/2017-09.md b/content/post/2017-09.md index 6fb603609..f7ece7be0 100644 --- a/content/post/2017-09.md +++ b/content/post/2017-09.md @@ -173,8 +173,54 @@ $ sudo tcpdump -i en0 -w without-cached-xsd.dump dst port 80 and 'tcp[32:4] = 0x ``` - If this continues I will definitely need to figure out who is responsible for this scraper and add their user agent to the session crawler valve regex +- A search for "API scraper" user agent on Google returns a `robots.txt` with a comment that this is the Yewno bot: http://www.escholarship.org/robots.txt - Also, in looking at the DSpace logs I noticed a warning from OAI that I should look into: ``` WARN org.dspace.xoai.services.impl.xoai.DSpaceRepositoryConfiguration @ { OAI 2.0 :: DSpace } Not able to retrieve the dspace.oai.url property from oai.cfg. Falling back to request address ``` + +- Looking at the spreadsheet with deletions and corrections that CCAFS sent last week +- It appears they want to delete a lot of metadata, which I'm not sure they realize the implications of: + +``` +dspace=# select text_value, count(text_value) from metadatavalue where resource_type_id=2 and metadata_field_id in (134, 235) and text_value in ('EA_PAR','FP1_CSAEvidence','FP2_CRMWestAfrica','FP3_Gender','FP4_Baseline','FP4_CCPAG','FP4_CCPG','FP4_CIATLAM IMPACT','FP4_ClimateData','FP4_ClimateModels','FP4_GenderPolicy','FP4_GenderToolbox','FP4_Livestock','FP4_PolicyEngagement','FP_GII','SA_Biodiversity','SA_CSV','SA_GHGMeasurement','SEA_mitigationSAMPLES','SEA_UpscalingInnovation','WA_Partnership','WA_SciencePolicyExchange') group by text_value; + text_value | count +--------------------------+------- + FP4_ClimateModels | 6 + FP1_CSAEvidence | 7 + SEA_UpscalingInnovation | 7 + FP4_Baseline | 69 + WA_Partnership | 1 + WA_SciencePolicyExchange | 6 + SA_GHGMeasurement | 2 + SA_CSV | 7 + EA_PAR | 18 + FP4_Livestock | 7 + FP4_GenderPolicy | 4 + FP2_CRMWestAfrica | 12 + FP4_ClimateData | 24 + FP4_CCPAG | 2 + SEA_mitigationSAMPLES | 2 + SA_Biodiversity | 1 + FP4_PolicyEngagement | 20 + FP3_Gender | 9 + FP4_GenderToolbox | 3 +(19 rows) +``` + +- I sent CCAFS people an email to ask if they really want to remove these 200+ tags +- She responded yes, so I'll at least need to do these deletes in PostgreSQL: + +``` +dspace=# delete from metadatavalue where resource_type_id=2 and metadata_field_id in (134, 235) and text_value in ('EA_PAR','FP1_CSAEvidence','FP2_CRMWestAfrica','FP3_Gender','FP4_Baseline','FP4_CCPAG','FP4_CCPG','FP4_CIATLAM IMPACT','FP4_ClimateData','FP4_ClimateModels','FP4_GenderPolicy','FP4_GenderToolbox','FP4_Livestock','FP4_PolicyEngagement','FP_GII','SA_Biodiversity','SA_CSV','SA_GHGMeasurement','SEA_mitigationSAMPLES','SEA_UpscalingInnovation','WA_Partnership','WA_SciencePolicyExchange','FP_GII'); +DELETE 207 +``` + +- When we discussed this in late July there were some other renames they had requested, but I don't see them in the current spreadsheet so I will have to follow that up +- Create and merge pull request to shut up the Ehcache update check ([#337](https://github.com/ilri/DSpace/pull/337)) +- Although it looks like there was a previous attempt to disable these update checks that was merged in DSpace 4.0 (although it only affects XMLUI): https://jira.duraspace.org/browse/DS-1492 +- I commented there suggesting that we disable it globally +- I merged the changes to the CCAFS project tags ([#336](https://github.com/ilri/DSpace/pull/336)) but still need to finalize the metadata deletions/renames +- I merged the CGIAR Library theme changes ([#338](https://github.com/ilri/DSpace/pull/338)) to the `5_x-prod` branch in preparation for next week's migration +- I emailed the Handle administrators (hdladmin@cnri.reston.va.us) to ask them what the process for changing their prefix to be resolved by our resolver diff --git a/public/2017-09/index.html b/public/2017-09/index.html index f0446cc90..f56d55d69 100644 --- a/public/2017-09/index.html +++ b/public/2017-09/index.html @@ -25,7 +25,7 @@ Ask Sisay to clean up the WLE approvers a bit, as Marianne’s user account - + @@ -61,9 +61,9 @@ Ask Sisay to clean up the WLE approvers a bit, as Marianne’s user account "@type": "BlogPosting", "headline": "September, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-09/", - "wordCount": "1241", + "wordCount": "1566", "datePublished": "2017-09-07T16:54:52+07:00", - "dateModified": "2017-09-12T16:57:19+03:00", + "dateModified": "2017-09-13T09:53:54+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -319,12 +319,62 @@ dspace.log.2017-09-10:0
WARN  org.dspace.xoai.services.impl.xoai.DSpaceRepositoryConfiguration @ { OAI 2.0 :: DSpace } Not able to retrieve the dspace.oai.url property from oai.cfg. Falling back to request address
 
+ + +
dspace=# select text_value, count(text_value) from metadatavalue where resource_type_id=2 and metadata_field_id in (134, 235) and text_value in ('EA_PAR','FP1_CSAEvidence','FP2_CRMWestAfrica','FP3_Gender','FP4_Baseline','FP4_CCPAG','FP4_CCPG','FP4_CIATLAM IMPACT','FP4_ClimateData','FP4_ClimateModels','FP4_GenderPolicy','FP4_GenderToolbox','FP4_Livestock','FP4_PolicyEngagement','FP_GII','SA_Biodiversity','SA_CSV','SA_GHGMeasurement','SEA_mitigationSAMPLES','SEA_UpscalingInnovation','WA_Partnership','WA_SciencePolicyExchange') group by text_value;                                                                                                                                                                                                                  
+        text_value        | count                              
+--------------------------+-------                             
+ FP4_ClimateModels        |     6                              
+ FP1_CSAEvidence          |     7                              
+ SEA_UpscalingInnovation  |     7                              
+ FP4_Baseline             |    69                              
+ WA_Partnership           |     1                              
+ WA_SciencePolicyExchange |     6                              
+ SA_GHGMeasurement        |     2                              
+ SA_CSV                   |     7                              
+ EA_PAR                   |    18                              
+ FP4_Livestock            |     7                              
+ FP4_GenderPolicy         |     4                              
+ FP2_CRMWestAfrica        |    12                              
+ FP4_ClimateData          |    24                              
+ FP4_CCPAG                |     2                              
+ SEA_mitigationSAMPLES    |     2                              
+ SA_Biodiversity          |     1                              
+ FP4_PolicyEngagement     |    20                              
+ FP3_Gender               |     9                              
+ FP4_GenderToolbox        |     3                              
+(19 rows)
+
+ + + +
dspace=# delete from metadatavalue where resource_type_id=2 and metadata_field_id in (134, 235) and text_value in ('EA_PAR','FP1_CSAEvidence','FP2_CRMWestAfrica','FP3_Gender','FP4_Baseline','FP4_CCPAG','FP4_CCPG','FP4_CIATLAM IMPACT','FP4_ClimateData','FP4_ClimateModels','FP4_GenderPolicy','FP4_GenderToolbox','FP4_Livestock','FP4_PolicyEngagement','FP_GII','SA_Biodiversity','SA_CSV','SA_GHGMeasurement','SEA_mitigationSAMPLES','SEA_UpscalingInnovation','WA_Partnership','WA_SciencePolicyExchange','FP_GII');
+DELETE 207
+
+ + + diff --git a/public/sitemap.xml b/public/sitemap.xml index fb7dd2ad1..d64aed29a 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2017-09/ - 2017-09-12T16:57:19+03:00 + 2017-09-13T09:53:54+03:00 @@ -119,7 +119,7 @@ https://alanorth.github.io/cgspace-notes/ - 2017-09-12T16:57:19+03:00 + 2017-09-13T09:53:54+03:00 0 @@ -130,19 +130,19 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2017-09-12T16:57:19+03:00 + 2017-09-13T09:53:54+03:00 0 https://alanorth.github.io/cgspace-notes/post/ - 2017-09-12T16:57:19+03:00 + 2017-09-13T09:53:54+03:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2017-09-12T16:57:19+03:00 + 2017-09-13T09:53:54+03:00 0