mirror of
https://github.com/ilri/cgspace-java-helpers.git
synced 2025-01-10 22:13:23 +01:00
Add new NormalizeDOIs curation task
This commit is contained in:
parent
b15dd50c16
commit
7a91305742
@ -4,6 +4,7 @@ DSpace curation tasks and other Java-based helpers used on the [CGSpace](https:/
|
|||||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
|
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
|
||||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
|
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
|
||||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
|
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
|
||||||
|
- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
|
||||||
|
|
||||||
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System).
|
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System).
|
||||||
|
|
||||||
|
@ -0,0 +1,97 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2024 Alan Orth
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: GPL-3.0-only
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.github.ilri.cgspace.ctasks;
|
||||||
|
|
||||||
|
import org.dspace.content.DSpaceObject;
|
||||||
|
import org.dspace.content.Item;
|
||||||
|
import org.dspace.content.MetadataValue;
|
||||||
|
import org.dspace.core.Constants;
|
||||||
|
import org.dspace.curate.AbstractCurationTask;
|
||||||
|
import org.dspace.curate.Curator;
|
||||||
|
import org.dspace.curate.Suspendable;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempt to normalize DOIs by stripping whitespace, lower casing, and
|
||||||
|
* converting to <code>https://doi.org</code> format. The reason is that DOIs are case
|
||||||
|
* insensitive and must be unique, which we can only guarantee if they are
|
||||||
|
* normalized to the same format.
|
||||||
|
*
|
||||||
|
* See: <a href="https://www.crossref.org/documentation/member-setup/constructing-your-dois/">https://www.crossref.org/documentation/member-setup/constructing-your-dois/</a>
|
||||||
|
*
|
||||||
|
* TODO: set curation to failed if invalid DOI submitted (and configure to reject in workflow)
|
||||||
|
* TODO: allow operation on communities and collections (currently only works on items)
|
||||||
|
*
|
||||||
|
* @author Alan Orth for the International Livestock Research Institute
|
||||||
|
* @version 7.6.1.1
|
||||||
|
* @since 7.6.1.1
|
||||||
|
*/
|
||||||
|
@Suspendable
|
||||||
|
public class NormalizeDOIs extends AbstractCurationTask {
|
||||||
|
@Override
|
||||||
|
public int perform(DSpaceObject dso) throws IOException {
|
||||||
|
if (dso.getType() == Constants.ITEM) {
|
||||||
|
Item item = (Item) dso;
|
||||||
|
String result;
|
||||||
|
|
||||||
|
// Keep track of whether we change metadata, and how many
|
||||||
|
boolean metadataChanged = false;
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
// Hard coding the metadata field for now since I can't figure out how to read the taskProperty
|
||||||
|
List<MetadataValue> itemDOIs = itemService.getMetadataByMetadataString(item, "cg.identifier.doi");
|
||||||
|
|
||||||
|
// skip items that don't have DOIs
|
||||||
|
if (itemDOIs.isEmpty()) {
|
||||||
|
setResult("No DOIs, skipping");
|
||||||
|
return Curator.CURATE_SKIP;
|
||||||
|
} else {
|
||||||
|
for (MetadataValue itemDOI : itemDOIs) {
|
||||||
|
String newDOI = getNormalizedDOI(itemDOI);
|
||||||
|
|
||||||
|
// Check if the normalized DOI is different than the original
|
||||||
|
if (!newDOI.equals(itemDOI.getValue())) {
|
||||||
|
itemDOI.setValue(newDOI);
|
||||||
|
metadataChanged = true;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (metadataChanged) {
|
||||||
|
result = "Normalized " + count + " DOI(s)";
|
||||||
|
} else {
|
||||||
|
result = "All DOIs already normalized";
|
||||||
|
}
|
||||||
|
report(result);
|
||||||
|
setResult(result);
|
||||||
|
|
||||||
|
return Curator.CURATE_SUCCESS;
|
||||||
|
} else {
|
||||||
|
setResult("Object skipped");
|
||||||
|
return Curator.CURATE_SKIP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getNormalizedDOI(MetadataValue itemDOI) {
|
||||||
|
// 1. Convert to lowercase
|
||||||
|
String newDOI = itemDOI.getValue().toLowerCase();
|
||||||
|
// 2. Strip leading and trailing whitespace
|
||||||
|
newDOI = newDOI.strip();
|
||||||
|
// 3. Convert to HTTPS
|
||||||
|
newDOI = newDOI.replace("http://", "https://");
|
||||||
|
// 4. Prefer doi.org to dx.doi.org
|
||||||
|
newDOI = newDOI.replace("dx.doi.org", "doi.org");
|
||||||
|
// 5. Replace values like doi: 10.11648/j.jps.20140201.14
|
||||||
|
newDOI = newDOI.replaceAll("^doi: 10\\.", "https://doi.org/10.");
|
||||||
|
// 6. Replace values like 10.3390/foods12010115
|
||||||
|
newDOI = newDOI.replaceAll("^10\\.", "https://doi.org/10.");
|
||||||
|
|
||||||
|
return newDOI;
|
||||||
|
}
|
||||||
|
}
|
@ -2,6 +2,7 @@
|
|||||||
DSpace curation tasks used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
|
DSpace curation tasks used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
|
||||||
|
|
||||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
|
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
|
||||||
|
- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
|
||||||
|
|
||||||
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
|
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
|
||||||
|
|
||||||
@ -39,6 +40,7 @@ Add the curation task to DSpace's `config/modules/curate.cfg`:
|
|||||||
```
|
```
|
||||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger
|
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger
|
||||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
|
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
|
||||||
|
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.NormalizeDOIs = normalizedois
|
||||||
```
|
```
|
||||||
|
|
||||||
And then add the following variables to your `local.cfg` or some other [configuration file that is included](https://wiki.lyrasis.org/display/DSDOC6x/Configuration+Reference#ConfigurationReference-IncludingotherPropertyFiles):
|
And then add the following variables to your `local.cfg` or some other [configuration file that is included](https://wiki.lyrasis.org/display/DSDOC6x/Configuration+Reference#ConfigurationReference-IncludingotherPropertyFiles):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user