diff --git a/README.md b/README.md
index 30ceeb9..e6364f4 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,7 @@ DSpace curation tasks and other Java-based helpers used on the [CGSpace](https:/
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
+- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System).
diff --git a/src/main/java/io/github/ilri/cgspace/ctasks/NormalizeDOIs.java b/src/main/java/io/github/ilri/cgspace/ctasks/NormalizeDOIs.java
new file mode 100644
index 0000000..99e6234
--- /dev/null
+++ b/src/main/java/io/github/ilri/cgspace/ctasks/NormalizeDOIs.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2024 Alan Orth
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+package io.github.ilri.cgspace.ctasks;
+
+import org.dspace.content.DSpaceObject;
+import org.dspace.content.Item;
+import org.dspace.content.MetadataValue;
+import org.dspace.core.Constants;
+import org.dspace.curate.AbstractCurationTask;
+import org.dspace.curate.Curator;
+import org.dspace.curate.Suspendable;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Attempt to normalize DOIs by stripping whitespace, lower casing, and
+ * converting to https://doi.org
format. The reason is that DOIs are case
+ * insensitive and must be unique, which we can only guarantee if they are
+ * normalized to the same format.
+ *
+ * See: https://www.crossref.org/documentation/member-setup/constructing-your-dois/
+ *
+ * TODO: set curation to failed if invalid DOI submitted (and configure to reject in workflow)
+ * TODO: allow operation on communities and collections (currently only works on items)
+ *
+ * @author Alan Orth for the International Livestock Research Institute
+ * @version 7.6.1.1
+ * @since 7.6.1.1
+ */
+@Suspendable
+public class NormalizeDOIs extends AbstractCurationTask {
+ @Override
+ public int perform(DSpaceObject dso) throws IOException {
+ if (dso.getType() == Constants.ITEM) {
+ Item item = (Item) dso;
+ String result;
+
+ // Keep track of whether we change metadata, and how many
+ boolean metadataChanged = false;
+ int count = 0;
+
+ // Hard coding the metadata field for now since I can't figure out how to read the taskProperty
+ List itemDOIs = itemService.getMetadataByMetadataString(item, "cg.identifier.doi");
+
+ // skip items that don't have DOIs
+ if (itemDOIs.isEmpty()) {
+ setResult("No DOIs, skipping");
+ return Curator.CURATE_SKIP;
+ } else {
+ for (MetadataValue itemDOI : itemDOIs) {
+ String newDOI = getNormalizedDOI(itemDOI);
+
+ // Check if the normalized DOI is different than the original
+ if (!newDOI.equals(itemDOI.getValue())) {
+ itemDOI.setValue(newDOI);
+ metadataChanged = true;
+ count++;
+ }
+ }
+ }
+ if (metadataChanged) {
+ result = "Normalized " + count + " DOI(s)";
+ } else {
+ result = "All DOIs already normalized";
+ }
+ report(result);
+ setResult(result);
+
+ return Curator.CURATE_SUCCESS;
+ } else {
+ setResult("Object skipped");
+ return Curator.CURATE_SKIP;
+ }
+ }
+
+ private static String getNormalizedDOI(MetadataValue itemDOI) {
+ // 1. Convert to lowercase
+ String newDOI = itemDOI.getValue().toLowerCase();
+ // 2. Strip leading and trailing whitespace
+ newDOI = newDOI.strip();
+ // 3. Convert to HTTPS
+ newDOI = newDOI.replace("http://", "https://");
+ // 4. Prefer doi.org to dx.doi.org
+ newDOI = newDOI.replace("dx.doi.org", "doi.org");
+ // 5. Replace values like doi: 10.11648/j.jps.20140201.14
+ newDOI = newDOI.replaceAll("^doi: 10\\.", "https://doi.org/10.");
+ // 6. Replace values like 10.3390/foods12010115
+ newDOI = newDOI.replaceAll("^10\\.", "https://doi.org/10.");
+
+ return newDOI;
+ }
+}
diff --git a/src/main/java/io/github/ilri/cgspace/ctasks/README.md b/src/main/java/io/github/ilri/cgspace/ctasks/README.md
index 996e6eb..8aa2d72 100644
--- a/src/main/java/io/github/ilri/cgspace/ctasks/README.md
+++ b/src/main/java/io/github/ilri/cgspace/ctasks/README.md
@@ -2,6 +2,7 @@
DSpace curation tasks used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
+- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
@@ -39,6 +40,7 @@ Add the curation task to DSpace's `config/modules/curate.cfg`:
```
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
+plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.NormalizeDOIs = normalizedois
```
And then add the following variables to your `local.cfg` or some other [configuration file that is included](https://wiki.lyrasis.org/display/DSDOC6x/Configuration+Reference#ConfigurationReference-IncludingotherPropertyFiles):