From 3a583c4f86b21db8517bdc8a7946199ad4cbd085 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 26 Jun 2024 12:46:08 +0300 Subject: [PATCH] src/main/java: more DOI normalization Normalize %2f to /. --- .../java/io/github/ilri/cgspace/ctasks/NormalizeDOIs.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/io/github/ilri/cgspace/ctasks/NormalizeDOIs.java b/src/main/java/io/github/ilri/cgspace/ctasks/NormalizeDOIs.java index bcd05ee..5bf8bfe 100644 --- a/src/main/java/io/github/ilri/cgspace/ctasks/NormalizeDOIs.java +++ b/src/main/java/io/github/ilri/cgspace/ctasks/NormalizeDOIs.java @@ -29,7 +29,7 @@ import java.util.List; * TODO: allow operation on communities and collections (currently only works on items) * * @author Alan Orth for the International Livestock Research Institute - * @version 7.6.1.2 + * @version 7.6.1.3 * @since 7.6.1.1 */ @Suspendable @@ -88,6 +88,8 @@ public class NormalizeDOIs extends AbstractCurationTask { newDOI = newDOI.replace("dx.doi.org", "doi.org"); // Prefer doi.org to www.doi.org newDOI = newDOI.replace("www.doi.org", "doi.org"); + // Fix URL encoded slashes (%2f) + newDOI = newDOI.replace("%2f", "/"); // Replace values like doi: 10.11648/j.jps.20140201.14 newDOI = newDOI.replaceAll("^doi: 10\\.", "https://doi.org/10."); // Replace values like 10.3390/foods12010115