mirror of
https://github.com/ilri/cgspace-java-helpers.git
synced 2024-12-22 21:22:22 +01:00
Add new NormalizeDOIs curation task
This commit is contained in:
parent
b15dd50c16
commit
7a91305742
@ -4,6 +4,7 @@ DSpace curation tasks and other Java-based helpers used on the [CGSpace](https:/
|
||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
|
||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
|
||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
|
||||
- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
|
||||
|
||||
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System).
|
||||
|
||||
|
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-only
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.ctasks;
|
||||
|
||||
import org.dspace.content.DSpaceObject;
|
||||
import org.dspace.content.Item;
|
||||
import org.dspace.content.MetadataValue;
|
||||
import org.dspace.core.Constants;
|
||||
import org.dspace.curate.AbstractCurationTask;
|
||||
import org.dspace.curate.Curator;
|
||||
import org.dspace.curate.Suspendable;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Attempt to normalize DOIs by stripping whitespace, lower casing, and
|
||||
* converting to <code>https://doi.org</code> format. The reason is that DOIs are case
|
||||
* insensitive and must be unique, which we can only guarantee if they are
|
||||
* normalized to the same format.
|
||||
*
|
||||
* See: <a href="https://www.crossref.org/documentation/member-setup/constructing-your-dois/">https://www.crossref.org/documentation/member-setup/constructing-your-dois/</a>
|
||||
*
|
||||
* TODO: set curation to failed if invalid DOI submitted (and configure to reject in workflow)
|
||||
* TODO: allow operation on communities and collections (currently only works on items)
|
||||
*
|
||||
* @author Alan Orth for the International Livestock Research Institute
|
||||
* @version 7.6.1.1
|
||||
* @since 7.6.1.1
|
||||
*/
|
||||
@Suspendable
|
||||
public class NormalizeDOIs extends AbstractCurationTask {
|
||||
@Override
|
||||
public int perform(DSpaceObject dso) throws IOException {
|
||||
if (dso.getType() == Constants.ITEM) {
|
||||
Item item = (Item) dso;
|
||||
String result;
|
||||
|
||||
// Keep track of whether we change metadata, and how many
|
||||
boolean metadataChanged = false;
|
||||
int count = 0;
|
||||
|
||||
// Hard coding the metadata field for now since I can't figure out how to read the taskProperty
|
||||
List<MetadataValue> itemDOIs = itemService.getMetadataByMetadataString(item, "cg.identifier.doi");
|
||||
|
||||
// skip items that don't have DOIs
|
||||
if (itemDOIs.isEmpty()) {
|
||||
setResult("No DOIs, skipping");
|
||||
return Curator.CURATE_SKIP;
|
||||
} else {
|
||||
for (MetadataValue itemDOI : itemDOIs) {
|
||||
String newDOI = getNormalizedDOI(itemDOI);
|
||||
|
||||
// Check if the normalized DOI is different than the original
|
||||
if (!newDOI.equals(itemDOI.getValue())) {
|
||||
itemDOI.setValue(newDOI);
|
||||
metadataChanged = true;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (metadataChanged) {
|
||||
result = "Normalized " + count + " DOI(s)";
|
||||
} else {
|
||||
result = "All DOIs already normalized";
|
||||
}
|
||||
report(result);
|
||||
setResult(result);
|
||||
|
||||
return Curator.CURATE_SUCCESS;
|
||||
} else {
|
||||
setResult("Object skipped");
|
||||
return Curator.CURATE_SKIP;
|
||||
}
|
||||
}
|
||||
|
||||
private static String getNormalizedDOI(MetadataValue itemDOI) {
|
||||
// 1. Convert to lowercase
|
||||
String newDOI = itemDOI.getValue().toLowerCase();
|
||||
// 2. Strip leading and trailing whitespace
|
||||
newDOI = newDOI.strip();
|
||||
// 3. Convert to HTTPS
|
||||
newDOI = newDOI.replace("http://", "https://");
|
||||
// 4. Prefer doi.org to dx.doi.org
|
||||
newDOI = newDOI.replace("dx.doi.org", "doi.org");
|
||||
// 5. Replace values like doi: 10.11648/j.jps.20140201.14
|
||||
newDOI = newDOI.replaceAll("^doi: 10\\.", "https://doi.org/10.");
|
||||
// 6. Replace values like 10.3390/foods12010115
|
||||
newDOI = newDOI.replaceAll("^10\\.", "https://doi.org/10.");
|
||||
|
||||
return newDOI;
|
||||
}
|
||||
}
|
@ -2,6 +2,7 @@
|
||||
DSpace curation tasks used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
|
||||
|
||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
|
||||
- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
|
||||
|
||||
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
|
||||
|
||||
@ -39,6 +40,7 @@ Add the curation task to DSpace's `config/modules/curate.cfg`:
|
||||
```
|
||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger
|
||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
|
||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.NormalizeDOIs = normalizedois
|
||||
```
|
||||
|
||||
And then add the following variables to your `local.cfg` or some other [configuration file that is included](https://wiki.lyrasis.org/display/DSDOC6x/Configuration+Reference#ConfigurationReference-IncludingotherPropertyFiles):
|
||||
|
Loading…
Reference in New Issue
Block a user