mirror of
https://github.com/ilri/cgspace-java-helpers.git
synced 2025-05-10 15:16:04 +02:00
Compare commits
50 Commits
v6.1
...
3c36452891
Author | SHA1 | Date | |
---|---|---|---|
3c36452891
|
|||
3a860dabe4
|
|||
5f44c9ea8a
|
|||
32a14c0ea5
|
|||
13d3dfb885
|
|||
1e7df1ce46
|
|||
443e5576ab
|
|||
8531992412
|
|||
27016f5f77
|
|||
3a583c4f86
|
|||
28668f76c9
|
|||
e0153fd38a
|
|||
12a606ac61
|
|||
692a62b454
|
|||
d4ca92066a
|
|||
5ad8c556e9
|
|||
77425c13bf
|
|||
5e0a456fb5
|
|||
9050caf37f
|
|||
639148dc19
|
|||
369f81d181
|
|||
7a91305742
|
|||
b15dd50c16
|
|||
0c35e81362
|
|||
2fb8d274c9
|
|||
169b063e9a
|
|||
0cb533b2c4
|
|||
ee6518035e
|
|||
14051984f3
|
|||
9faf657c59
|
|||
7fb78c2722
|
|||
6ef9f521bf
|
|||
1a345de36a
|
|||
eb66ccbd0d
|
|||
62138540ae
|
|||
c0d0e40321
|
|||
f2a637f0a8
|
|||
6e38a2f7e1
|
|||
f9d7e5f6a2
|
|||
9e965afdb7
|
|||
408a0e1c19
|
|||
ea9f669e9c
|
|||
546101bc92
|
|||
0a7cf7bf59
|
|||
8c0a8fbcd1
|
|||
c05a2e4f96
|
|||
cf2af393c0
|
|||
1f6ba4af67
|
|||
5ceaebaeae
|
|||
f3dcc6e261
|
12
.github/workflows/maven.yml
vendored
12
.github/workflows/maven.yml
vendored
@ -5,9 +5,9 @@ name: Build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ dspace6 ]
|
||||
branches: [ dspace7 ]
|
||||
pull_request:
|
||||
branches: [ dspace6 ]
|
||||
branches: [ dspace7 ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@ -15,11 +15,11 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up JDK 8
|
||||
uses: actions/setup-java@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up JDK 17
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
java-version: 8
|
||||
java-version: 17
|
||||
distribution: 'temurin'
|
||||
cache: 'maven'
|
||||
- name: Build with Maven
|
||||
|
4
.idea/misc.xml
generated
4
.idea/misc.xml
generated
@ -1,11 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ExternalStorageConfigurationManager" enabled="true" />
|
||||
<component name="MavenProjectsManager">
|
||||
<option name="originalFiles">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/pom.xml" />
|
||||
</list>
|
||||
</option>
|
||||
<option name="workspaceImportForciblyTurnedOn" value="true" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="11" project-jdk-type="JavaSDK" />
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="17" project-jdk-type="JavaSDK" />
|
||||
</project>
|
30
CHANGELOG.md
30
CHANGELOG.md
@ -4,6 +4,36 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## Unreleased
|
||||
|
||||
## [7.6.1.3] - 2024-06-26
|
||||
### Updated
|
||||
- Add more formats to `NormalizeDOIs` curation task
|
||||
|
||||
## [7.6.1.2] - 2024-04-25
|
||||
### Changed
|
||||
- Remove reporting from curation tasks since "results" are enough
|
||||
|
||||
## [7.6.1.1] - 2024-04-23
|
||||
### Added
|
||||
- New `NormalizeDOIs` curation task
|
||||
|
||||
### Updated
|
||||
- Update dependencies in `pom.xml`
|
||||
|
||||
## [7.6.1] - 2024-01-02
|
||||
### Changed
|
||||
- Pin gson dependency to 2.9.0 to avoid dependency convergence issues with DSpace
|
||||
|
||||
## [7.6] - 2024-01-02
|
||||
### Updated
|
||||
- `iso_3166-1.json` from iso-codes 4.13.0-SNAPSHOT, which [adds common names for Iran, Laos, and Syria](https://salsa.debian.org/iso-codes-team/iso-codes/-/merge_requests/32)
|
||||
- DSpace 7.6 compatibility
|
||||
|
||||
## [6.2] - 2023-02-20
|
||||
### Updated
|
||||
- `iso_3166-1.json` from iso-codes 4.12.0, which updates the name for TR to "Türkiye"
|
||||
|
||||
## [6.1] - 2022-10-31
|
||||
### Updated
|
||||
- Update dependencies in `pom.xml`
|
||||
|
15
README.md
15
README.md
@ -4,8 +4,9 @@ DSpace curation tasks and other Java-based helpers used on the [CGSpace](https:/
|
||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
|
||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
|
||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
|
||||
- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
|
||||
|
||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC6x/Curation+System).
|
||||
Tested on DSpace 7.6.1. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System).
|
||||
|
||||
## Build and Install
|
||||
|
||||
@ -16,7 +17,7 @@ To use these curation tasks in a DSpace project add the following dependency to
|
||||
<dependency>
|
||||
<groupId>io.github.ilri.cgspace</groupId>
|
||||
<artifactId>cgspace-java-helpers</artifactId>
|
||||
<version>6.1-SNAPSHOT</version>
|
||||
<version>7.6.1.4-SNAPSHOT</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@ -32,18 +33,14 @@ $ mvn package
|
||||
Copy the resulting jar to the DSpace `lib` directory:
|
||||
|
||||
```console
|
||||
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
|
||||
$ cp target/cgspace-java-helpers-7.6.1.4-SNAPSHOT.jar ~/dspace/lib/
|
||||
```
|
||||
|
||||
## Configuration
|
||||
Please refer to the appropriate README.md file:
|
||||
|
||||
- Curation Tasks: [src/main/java/io/github/ilri/cgspace/ctasks/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace6/src/main/java/io/github/ilri/cgspace/ctasks/README.md)
|
||||
- Scripts: [src/main/java/io/github/ilri/cgspace/scripts/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace6/src/main/java/io/github/ilri/cgspace/scripts/README.md)
|
||||
|
||||
## TODO
|
||||
|
||||
- Add a curation task to normalize DOIs to "https://doi.org" format
|
||||
- Curation Tasks: [src/main/java/io/github/ilri/cgspace/ctasks/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace7/src/main/java/io/github/ilri/cgspace/ctasks/README.md)
|
||||
- Scripts: [src/main/java/io/github/ilri/cgspace/scripts/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace7/src/main/java/io/github/ilri/cgspace/scripts/README.md)
|
||||
|
||||
## Notes
|
||||
This project was initially created according to the [Maven Getting Started Guide](https://maven.apache.org/guides/getting-started/):
|
||||
|
74
pom.xml
74
pom.xml
@ -6,50 +6,50 @@
|
||||
|
||||
<groupId>io.github.ilri.cgspace</groupId>
|
||||
<artifactId>cgspace-java-helpers</artifactId>
|
||||
<version>6.1</version>
|
||||
<version>7.6.1.4-SNAPSHOT</version>
|
||||
|
||||
<name>cgspace-java-helpers</name>
|
||||
<url>https://github.com/ilri/cgspace-java-helpers</url>
|
||||
<description>Curation tasks and helper scripts for the CGSpace institutional repository</description>
|
||||
|
||||
<developers>
|
||||
<developer>
|
||||
<name>Alan Orth</name>
|
||||
<email>maven@mjanja.mozmail.com</email>
|
||||
<organizationUrl>https://mjanja.ch</organizationUrl>
|
||||
</developer>
|
||||
</developers>
|
||||
|
||||
<licenses>
|
||||
<license>
|
||||
<name>GPL-3.0-only</name>
|
||||
<url>https://spdx.org/licenses/GPL-3.0-or-later.html</url>
|
||||
<url>https://spdx.org/licenses/GPL-3.0-only.html</url>
|
||||
</license>
|
||||
</licenses>
|
||||
|
||||
<!-- brings the sonatype snapshot repository and signing requirement on board -->
|
||||
<parent>
|
||||
<groupId>org.sonatype.oss</groupId>
|
||||
<artifactId>oss-parent</artifactId>
|
||||
<version>9</version>
|
||||
<relativePath />
|
||||
</parent>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<maven.compiler.source>1.8</maven.compiler.source>
|
||||
<maven.compiler.target>1.8</maven.compiler.target>
|
||||
<maven.compiler.release>11</maven.compiler.release>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.google.code.gson</groupId>
|
||||
<artifactId>gson</artifactId>
|
||||
<version>2.9.1</version>
|
||||
<version>2.9.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.dspace</groupId>
|
||||
<artifactId>dspace-api</artifactId>
|
||||
<version>6.3</version>
|
||||
<version>7.6.1</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<scm>
|
||||
<connection>scm:git:git://github.com/ilri/cgspace-java-helpers.git</connection>
|
||||
<developerConnection>scm:git:ssh://github.com:nanosai/cgspace-java-helpers.git</developerConnection>
|
||||
<url>http://github.com/ilri/cgspace-java-helpers</url>
|
||||
<developerConnection>scm:git:ssh://github.com:ilri/cgspace-java-helpers.git</developerConnection>
|
||||
<url>https://github.com/ilri/cgspace-java-helpers</url>
|
||||
</scm>
|
||||
|
||||
<distributionManagement>
|
||||
@ -69,32 +69,28 @@
|
||||
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
|
||||
<plugin>
|
||||
<artifactId>maven-clean-plugin</artifactId>
|
||||
<version>3.2.0</version>
|
||||
<version>3.3.2</version>
|
||||
</plugin>
|
||||
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
|
||||
<plugin>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
<version>3.3.0</version>
|
||||
<version>3.3.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.10.1</version>
|
||||
<version>3.13.0</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>3.0.0-M7</version>
|
||||
<version>3.2.5</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>3.3.0</version>
|
||||
<version>3.4.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-install-plugin</artifactId>
|
||||
<version>3.0.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-deploy-plugin</artifactId>
|
||||
<version>3.0.0</version>
|
||||
<version>3.1.1</version>
|
||||
</plugin>
|
||||
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
|
||||
<plugin>
|
||||
@ -103,9 +99,33 @@
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-project-info-reports-plugin</artifactId>
|
||||
<version>3.4.1</version>
|
||||
<version>3.5.0</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.sonatype.plugins</groupId>
|
||||
<artifactId>nexus-staging-maven-plugin</artifactId>
|
||||
<version>1.7.0</version>
|
||||
<extensions>true</extensions>
|
||||
<configuration>
|
||||
<serverId>ossrh</serverId>
|
||||
<nexusUrl>https://oss.sonatype.org/</nexusUrl>
|
||||
<autoReleaseAfterClose>true</autoReleaseAfterClose>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
|
||||
<repositories>
|
||||
<!-- Check Maven Central first (before other repos below) -->
|
||||
<repository>
|
||||
<id>maven-central</id>
|
||||
<url>https://repo.maven.apache.org/maven2</url>
|
||||
</repository>
|
||||
<!-- For Handle Server -->
|
||||
<repository>
|
||||
<id>handle.net</id>
|
||||
<url>https://handle.net/maven</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
</project>
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
* SPDX-License-Identifier: GPL-3.0-only
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.ctasks;
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
* SPDX-License-Identifier: GPL-3.0-only
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.ctasks;
|
||||
@ -10,14 +10,14 @@ import javax.annotation.Nullable;
|
||||
|
||||
public class CountriesVocabulary {
|
||||
|
||||
class Country {
|
||||
private String name; // required
|
||||
private String common_name; // optional
|
||||
private String official_name; // optional
|
||||
private String cgspace_name; // optional
|
||||
private String numeric; // required Hmmmm need to cast this...
|
||||
private String alpha_2; // required
|
||||
private String alpha_3; // required
|
||||
static class Country {
|
||||
private final String name; // required
|
||||
private final String common_name; // optional
|
||||
private final String official_name; // optional
|
||||
private final String cgspace_name; // optional
|
||||
private final String numeric; // required Hmmmm need to cast this...
|
||||
private final String alpha_2; // required
|
||||
private final String alpha_3; // required
|
||||
|
||||
public Country(
|
||||
String name,
|
||||
|
@ -1,14 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
* SPDX-License-Identifier: GPL-3.0-only
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.ctasks;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.dspace.authorize.AuthorizeException;
|
||||
import org.dspace.content.DSpaceObject;
|
||||
import org.dspace.content.Item;
|
||||
@ -23,7 +24,15 @@ import java.io.InputStreamReader;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/*
|
||||
* Add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata.
|
||||
*
|
||||
* @author Alan Orth for the International Livestock Research Institute
|
||||
* @version 7.6.1.2
|
||||
* @since 5.1
|
||||
*/
|
||||
public class CountryCodeTagger extends AbstractCurationTask {
|
||||
public class CountryCodeTaggerConfig {
|
||||
private final String isocodesJsonPath = "/io/github/ilri/cgspace/ctasks/iso_3166-1.json";
|
||||
@ -33,10 +42,10 @@ public class CountryCodeTagger extends AbstractCurationTask {
|
||||
private final String iso3166Alpha2Field = taskProperty("iso3166-alpha2.field");
|
||||
private final boolean forceupdate = taskBooleanProperty("forceupdate", false);
|
||||
|
||||
private Logger log = Logger.getLogger(CountryCodeTagger.class);
|
||||
private final Logger log = LogManager.getLogger();
|
||||
}
|
||||
|
||||
public class CountryCodeTaggerResult {
|
||||
public static class CountryCodeTaggerResult {
|
||||
private int status = Curator.CURATE_UNSET;
|
||||
private String result = null;
|
||||
|
||||
@ -75,7 +84,6 @@ public class CountryCodeTagger extends AbstractCurationTask {
|
||||
}
|
||||
|
||||
setResult(alpha2Result.getResult());
|
||||
report(alpha2Result.getResult());
|
||||
}
|
||||
|
||||
return alpha2Result.getStatus();
|
||||
@ -84,14 +92,13 @@ public class CountryCodeTagger extends AbstractCurationTask {
|
||||
public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config)
|
||||
throws IOException, SQLException {
|
||||
CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
|
||||
String itemHandle = item.getHandle();
|
||||
|
||||
List<MetadataValue> itemCountries =
|
||||
itemService.getMetadataByMetadataString(item, config.iso3166Field);
|
||||
|
||||
// skip items that don't have country metadata
|
||||
if (itemCountries.size() == 0) {
|
||||
alpha2Result.setResult(itemHandle + ": no countries, skipping.");
|
||||
if (itemCountries.isEmpty()) {
|
||||
alpha2Result.setResult("No countries, skipping.");
|
||||
alpha2Result.setStatus(Curator.CURATE_SKIP);
|
||||
} else {
|
||||
Gson gson = new Gson();
|
||||
@ -101,7 +108,7 @@ public class CountryCodeTagger extends AbstractCurationTask {
|
||||
BufferedReader reader =
|
||||
new BufferedReader(
|
||||
new InputStreamReader(
|
||||
this.getClass().getResourceAsStream(config.isocodesJsonPath)));
|
||||
Objects.requireNonNull(this.getClass().getResourceAsStream(config.isocodesJsonPath))));
|
||||
ISO3166CountriesVocabulary isocodesCountriesJson =
|
||||
gson.fromJson(reader, ISO3166CountriesVocabulary.class);
|
||||
reader.close();
|
||||
@ -109,8 +116,8 @@ public class CountryCodeTagger extends AbstractCurationTask {
|
||||
reader =
|
||||
new BufferedReader(
|
||||
new InputStreamReader(
|
||||
this.getClass()
|
||||
.getResourceAsStream(config.cgspaceCountriesJsonPath)));
|
||||
Objects.requireNonNull(this.getClass()
|
||||
.getResourceAsStream(config.cgspaceCountriesJsonPath))));
|
||||
CGSpaceCountriesVocabulary cgspaceCountriesJson =
|
||||
gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
|
||||
reader.close();
|
||||
@ -133,7 +140,7 @@ public class CountryCodeTagger extends AbstractCurationTask {
|
||||
List<MetadataValue> itemAlpha2CountryCodes =
|
||||
itemService.getMetadataByMetadataString(item, config.iso3166Alpha2Field);
|
||||
|
||||
if (itemAlpha2CountryCodes.size() == 0) {
|
||||
if (itemAlpha2CountryCodes.isEmpty()) {
|
||||
List<String> newAlpha2Codes = new ArrayList<String>();
|
||||
for (MetadataValue itemCountry : itemCountries) {
|
||||
// check ISO 3166-1 countries
|
||||
@ -157,7 +164,7 @@ public class CountryCodeTagger extends AbstractCurationTask {
|
||||
}
|
||||
}
|
||||
|
||||
if (newAlpha2Codes.size() > 0) {
|
||||
if (!newAlpha2Codes.isEmpty()) {
|
||||
try {
|
||||
itemService.addMetadata(
|
||||
Curator.curationContext(),
|
||||
@ -170,21 +177,20 @@ public class CountryCodeTagger extends AbstractCurationTask {
|
||||
itemService.update(Curator.curationContext(), item);
|
||||
} catch (SQLException | AuthorizeException sqle) {
|
||||
config.log.debug(sqle.getMessage());
|
||||
alpha2Result.setResult(itemHandle + ": error");
|
||||
alpha2Result.setResult("Error");
|
||||
alpha2Result.setStatus(Curator.CURATE_ERROR);
|
||||
}
|
||||
|
||||
alpha2Result.setResult(
|
||||
itemHandle
|
||||
+ ": added "
|
||||
"Added "
|
||||
+ newAlpha2Codes.size()
|
||||
+ " alpha2 country code(s)");
|
||||
} else {
|
||||
alpha2Result.setResult(itemHandle + ": no matching countries found");
|
||||
alpha2Result.setResult("No matching countries found");
|
||||
}
|
||||
alpha2Result.setStatus(Curator.CURATE_SUCCESS);
|
||||
} else {
|
||||
alpha2Result.setResult(itemHandle + ": item has country codes, skipping");
|
||||
alpha2Result.setResult("Item already has country codes, skipping unless forced");
|
||||
alpha2Result.setStatus(Curator.CURATE_SKIP);
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
* SPDX-License-Identifier: GPL-3.0-only
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.ctasks;
|
||||
|
100
src/main/java/io/github/ilri/cgspace/ctasks/NormalizeDOIs.java
Normal file
100
src/main/java/io/github/ilri/cgspace/ctasks/NormalizeDOIs.java
Normal file
@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-only
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.ctasks;
|
||||
|
||||
import org.dspace.content.DSpaceObject;
|
||||
import org.dspace.content.Item;
|
||||
import org.dspace.content.MetadataValue;
|
||||
import org.dspace.core.Constants;
|
||||
import org.dspace.curate.AbstractCurationTask;
|
||||
import org.dspace.curate.Curator;
|
||||
import org.dspace.curate.Suspendable;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Attempt to normalize DOIs by stripping whitespace, lower casing, and
|
||||
* converting to <code>https://doi.org</code> format. The reason is that DOIs are case
|
||||
* insensitive and must be unique, which we can only guarantee if they are
|
||||
* normalized to the same format.
|
||||
*
|
||||
* See: <a href="https://www.crossref.org/documentation/member-setup/constructing-your-dois/">https://www.crossref.org/documentation/member-setup/constructing-your-dois/</a>
|
||||
*
|
||||
* TODO: set curation to failed if invalid DOI submitted (and configure to reject in workflow)
|
||||
* TODO: allow operation on communities and collections (currently only works on items)
|
||||
*
|
||||
* @author Alan Orth for the International Livestock Research Institute
|
||||
* @version 7.6.1.3
|
||||
* @since 7.6.1.1
|
||||
*/
|
||||
@Suspendable
|
||||
public class NormalizeDOIs extends AbstractCurationTask {
|
||||
@Override
|
||||
public int perform(DSpaceObject dso) throws IOException {
|
||||
if (dso.getType() == Constants.ITEM) {
|
||||
Item item = (Item) dso;
|
||||
String result;
|
||||
|
||||
// Keep track of whether we change metadata, and how many
|
||||
boolean metadataChanged = false;
|
||||
int count = 0;
|
||||
|
||||
// Hard coding the metadata field for now since I can't figure out how to read the taskProperty
|
||||
List<MetadataValue> itemDOIs = itemService.getMetadataByMetadataString(item, "cg.identifier.doi");
|
||||
|
||||
// skip items that don't have DOIs
|
||||
if (itemDOIs.isEmpty()) {
|
||||
setResult("No DOIs, skipping");
|
||||
return Curator.CURATE_SKIP;
|
||||
} else {
|
||||
for (MetadataValue itemDOI : itemDOIs) {
|
||||
String newDOI = getNormalizedDOI(itemDOI);
|
||||
|
||||
// Check if the normalized DOI is different than the original
|
||||
if (!newDOI.equals(itemDOI.getValue())) {
|
||||
itemDOI.setValue(newDOI);
|
||||
metadataChanged = true;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (metadataChanged) {
|
||||
result = "Normalized " + count + " DOI(s)";
|
||||
} else {
|
||||
result = "All DOIs already normalized";
|
||||
}
|
||||
setResult(result);
|
||||
|
||||
return Curator.CURATE_SUCCESS;
|
||||
} else {
|
||||
setResult("Object skipped");
|
||||
return Curator.CURATE_SKIP;
|
||||
}
|
||||
}
|
||||
|
||||
private static String getNormalizedDOI(MetadataValue itemDOI) {
|
||||
// Convert to lowercase
|
||||
String newDOI = itemDOI.getValue().toLowerCase();
|
||||
// Strip leading and trailing whitespace
|
||||
newDOI = newDOI.strip();
|
||||
// Convert to HTTPS
|
||||
newDOI = newDOI.replace("http://", "https://");
|
||||
// Prefer doi.org to dx.doi.org
|
||||
newDOI = newDOI.replace("dx.doi.org", "doi.org");
|
||||
// Prefer doi.org to www.doi.org
|
||||
newDOI = newDOI.replace("www.doi.org", "doi.org");
|
||||
// Fix URL encoded slashes (%2f)
|
||||
newDOI = newDOI.replace("%2f", "/");
|
||||
// Replace values like doi: 10.11648/j.jps.20140201.14
|
||||
newDOI = newDOI.replaceAll("^doi: 10\\.", "https://doi.org/10.");
|
||||
// Replace values like 10.3390/foods12010115
|
||||
newDOI = newDOI.replaceAll("^10\\.", "https://doi.org/10.");
|
||||
|
||||
return newDOI;
|
||||
}
|
||||
}
|
@ -2,8 +2,9 @@
|
||||
DSpace curation tasks used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
|
||||
|
||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
|
||||
- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
|
||||
|
||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
|
||||
Tested on DSpace 7.6.1. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
|
||||
|
||||
## Build and Install
|
||||
|
||||
@ -14,7 +15,7 @@ To use these curation tasks in a DSpace project add the following dependency to
|
||||
<dependency>
|
||||
<groupId>io.github.ilri.cgspace</groupId>
|
||||
<artifactId>cgspace-java-helpers</artifactId>
|
||||
<version>6.1-SNAPSHOT</version>
|
||||
<version>7.6.1.4-SNAPSHOT</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@ -30,15 +31,16 @@ $ mvn package
|
||||
Copy the resulting jar to the DSpace `lib` directory:
|
||||
|
||||
```
|
||||
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
|
||||
$ cp target/cgspace-java-helpers-7.6.1.4-SNAPSHOT.jar ~/dspace/lib/
|
||||
```
|
||||
|
||||
## Configuration
|
||||
Add the curation task to DSpace's `config/modules/curate.cfg`:
|
||||
Add the curation task(s) to DSpace's `config/modules/curate.cfg`:
|
||||
|
||||
```
|
||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger
|
||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
|
||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.NormalizeDOIs = normalizedois
|
||||
```
|
||||
|
||||
And then add the following variables to your `local.cfg` or some other [configuration file that is included](https://wiki.lyrasis.org/display/DSDOC6x/Configuration+Reference#ConfigurationReference-IncludingotherPropertyFiles):
|
||||
@ -60,7 +62,7 @@ countrycodetagger.iso3166-alpha2.field = cg.coverage.iso3166-alpha2
|
||||
Once the jar is installed and you have added appropriate configuration in `~/dspace/config/modules`:
|
||||
|
||||
```
|
||||
$ ~/dspace/bin/dspace curate -t countrycodetagger -i 10568/3 -r - -s object
|
||||
$ ~/dspace/bin/dspace curate -e eperson@repo.org -t countrycodetagger -i 10568/3 -r - -s object
|
||||
```
|
||||
|
||||
*Note*: it is very important to set the database transaction scope to something sensible (`object`) if you're curating a community or collection with more than a few hundred items.
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
* SPDX-License-Identifier: GPL-3.0-only
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.scripts;
|
||||
@ -138,7 +138,7 @@ public class FixJpgJpgThumbnails {
|
||||
for (Bitstream originalBitstream : originalBundleBitstreams) {
|
||||
String originalName = originalBitstream.getName();
|
||||
|
||||
long originalBitstreamBytes = originalBitstream.getSize();
|
||||
long originalBitstreamBytes = originalBitstream.getSizeBytes();
|
||||
|
||||
/*
|
||||
- check if the original file name is the same as the thumbnail name minus the extra ".jpg"
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
* SPDX-License-Identifier: GPL-3.0-only
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.scripts;
|
||||
|
@ -4,7 +4,7 @@ Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutiona
|
||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
|
||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
|
||||
|
||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC6x/Curation+System).
|
||||
Tested on DSpace 7.6.1. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC6x/Curation+System).
|
||||
|
||||
## Build and Install
|
||||
|
||||
@ -15,7 +15,7 @@ To use these curation tasks in a DSpace project add the following dependency to
|
||||
<dependency>
|
||||
<groupId>io.github.ilri.cgspace</groupId>
|
||||
<artifactId>cgspace-java-helpers</artifactId>
|
||||
<version>6.1-SNAPSHOT</version>
|
||||
<version>7.6.1.4-SNAPSHOT</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@ -31,7 +31,7 @@ $ mvn package
|
||||
Copy the resulting jar to the DSpace `lib` directory:
|
||||
|
||||
```console
|
||||
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
|
||||
$ cp target/cgspace-java-helpers-7.6.1.4-SNAPSHOT.jar ~/dspace/lib/
|
||||
```
|
||||
|
||||
## Invocation
|
||||
|
@ -16,14 +16,6 @@
|
||||
"name": "Congo, The Democratic Republic of the",
|
||||
"numeric": "180"
|
||||
},
|
||||
{
|
||||
"alpha_2": "IR",
|
||||
"alpha_3": "IRN",
|
||||
"name": "Iran, Islamic Republic of",
|
||||
"cgspace_name": "Iran",
|
||||
"numeric": "364",
|
||||
"official_name": "Islamic Republic of Iran"
|
||||
},
|
||||
{
|
||||
"alpha_2": "KP",
|
||||
"alpha_3": "PRK",
|
||||
@ -33,13 +25,6 @@
|
||||
"numeric": "408",
|
||||
"official_name": "Democratic People's Republic of Korea"
|
||||
},
|
||||
{
|
||||
"alpha_2": "LA",
|
||||
"alpha_3": "LAO",
|
||||
"name": "Lao People's Democratic Republic",
|
||||
"cgspace_name": "Laos",
|
||||
"numeric": "418"
|
||||
},
|
||||
{
|
||||
"alpha_2": "FM",
|
||||
"alpha_3": "FSM",
|
||||
@ -54,13 +39,6 @@
|
||||
"name": "Russian Federation",
|
||||
"cgspace_name": "Russia",
|
||||
"numeric": "643"
|
||||
},
|
||||
{
|
||||
"alpha_2": "SY",
|
||||
"alpha_3": "SYR",
|
||||
"name": "Syrian Arab Republic",
|
||||
"cgspace_name": "Syria",
|
||||
"numeric": "760"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -821,6 +821,7 @@
|
||||
{
|
||||
"alpha_2": "IR",
|
||||
"alpha_3": "IRN",
|
||||
"common_name": "Iran",
|
||||
"flag": "🇮🇷",
|
||||
"name": "Iran, Islamic Republic of",
|
||||
"numeric": "364",
|
||||
@ -953,6 +954,7 @@
|
||||
{
|
||||
"alpha_2": "LA",
|
||||
"alpha_3": "LAO",
|
||||
"common_name": "Laos",
|
||||
"flag": "🇱🇦",
|
||||
"name": "Lao People's Democratic Republic",
|
||||
"numeric": "418"
|
||||
@ -1653,6 +1655,7 @@
|
||||
{
|
||||
"alpha_2": "SY",
|
||||
"alpha_3": "SYR",
|
||||
"common_name": "Syria",
|
||||
"flag": "🇸🇾",
|
||||
"name": "Syrian Arab Republic",
|
||||
"numeric": "760"
|
||||
@ -1746,9 +1749,9 @@
|
||||
"alpha_2": "TR",
|
||||
"alpha_3": "TUR",
|
||||
"flag": "🇹🇷",
|
||||
"name": "Turkey",
|
||||
"name": "Türkiye",
|
||||
"numeric": "792",
|
||||
"official_name": "Republic of Turkey"
|
||||
"official_name": "Republic of Türkiye"
|
||||
},
|
||||
{
|
||||
"alpha_2": "TV",
|
||||
|
Reference in New Issue
Block a user