mirror of
https://github.com/ilri/cgspace-java-helpers.git
synced 2025-07-05 14:03:37 +02:00
Compare commits
39 Commits
f0754ab419
...
v7.6
Author | SHA1 | Date | |
---|---|---|---|
9faf657c59
|
|||
7fb78c2722
|
|||
6ef9f521bf
|
|||
1a345de36a
|
|||
eb66ccbd0d
|
|||
62138540ae
|
|||
c0d0e40321
|
|||
f2a637f0a8
|
|||
6e38a2f7e1
|
|||
f9d7e5f6a2
|
|||
9e965afdb7
|
|||
408a0e1c19
|
|||
ea9f669e9c
|
|||
546101bc92
|
|||
0a7cf7bf59
|
|||
8c0a8fbcd1
|
|||
c05a2e4f96
|
|||
cf2af393c0
|
|||
1f6ba4af67
|
|||
5ceaebaeae
|
|||
f3dcc6e261
|
|||
3eddbc3e22
|
|||
dbf59f784c
|
|||
0ffa4c8d37
|
|||
970d0c074e
|
|||
6b2b899957
|
|||
dfaa234a90
|
|||
f46e81b8cd
|
|||
dbd8721579
|
|||
a234b39064
|
|||
80a336f94d
|
|||
5ebf4930cf
|
|||
8e01595cc1
|
|||
8b3aac610d
|
|||
c2d7535d01
|
|||
b396fba043
|
|||
38a9cc5188
|
|||
16db38967b
|
|||
2604dc3cce
|
16
.github/workflows/maven.yml
vendored
16
.github/workflows/maven.yml
vendored
@ -5,20 +5,22 @@ name: Build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ dspace6 ]
|
||||
branches: [ dspace7 ]
|
||||
pull_request:
|
||||
branches: [ dspace6 ]
|
||||
branches: [ dspace7 ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up JDK 1.8
|
||||
uses: actions/setup-java@v1
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up JDK 17
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
java-version: 1.8
|
||||
java-version: 17
|
||||
distribution: 'temurin'
|
||||
cache: 'maven'
|
||||
- name: Build with Maven
|
||||
run: mvn -B package --file pom.xml
|
||||
|
4
.idea/misc.xml
generated
4
.idea/misc.xml
generated
@ -1,11 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ExternalStorageConfigurationManager" enabled="true" />
|
||||
<component name="MavenProjectsManager">
|
||||
<option name="originalFiles">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/pom.xml" />
|
||||
</list>
|
||||
</option>
|
||||
<option name="workspaceImportForciblyTurnedOn" value="true" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="11" project-jdk-type="JavaSDK" />
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="17" project-jdk-type="JavaSDK" />
|
||||
</project>
|
15
CHANGELOG.md
15
CHANGELOG.md
@ -4,12 +4,25 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## Unreleased
|
||||
## [7.6] - 2024-01-02
|
||||
### Updated
|
||||
- `iso_3166-1.json` from iso-codes 4.13.0-SNAPSHOT, which [adds common names for Iran, Laos, and Syria](https://salsa.debian.org/iso-codes-team/iso-codes/-/merge_requests/32)
|
||||
- DSpace 7.6 compatibility
|
||||
|
||||
## [6.2] - 2023-02-20
|
||||
### Updated
|
||||
- `iso_3166-1.json` from iso-codes 4.12.0, which updates the name for TR to "Türkiye"
|
||||
|
||||
## [6.1] - 2022-10-31
|
||||
### Updated
|
||||
- Update dependencies in `pom.xml`
|
||||
- `iso_3166-1.json` from iso-codes 4.11.0
|
||||
|
||||
### Changed
|
||||
- Java compiler and target from JDK 7 to JDK 8
|
||||
|
||||
### Added
|
||||
- New `FixLowQualityThumbnails` script to detect and remove more low-quality thumbnails
|
||||
|
||||
### Fixed
|
||||
- `FixJpgJpgThumbnails` and `FixLowQualityThumbnails` scripts not commiting changes when operating on a site, community, or collection
|
||||
|
16
README.md
16
README.md
@ -5,7 +5,7 @@ DSpace curation tasks and other Java-based helpers used on the [CGSpace](https:/
|
||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
|
||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
|
||||
|
||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
|
||||
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System).
|
||||
|
||||
## Build and Install
|
||||
|
||||
@ -16,7 +16,7 @@ To use these curation tasks in a DSpace project add the following dependency to
|
||||
<dependency>
|
||||
<groupId>io.github.ilri.cgspace</groupId>
|
||||
<artifactId>cgspace-java-helpers</artifactId>
|
||||
<version>6.1-SNAPSHOT</version>
|
||||
<version>7.6-SNAPSHOT</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@ -32,7 +32,7 @@ $ mvn package
|
||||
Copy the resulting jar to the DSpace `lib` directory:
|
||||
|
||||
```console
|
||||
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
|
||||
$ cp target/cgspace-java-helpers-7.6-SNAPSHOT.jar ~/dspace/lib/
|
||||
```
|
||||
|
||||
## Configuration
|
||||
@ -44,6 +44,8 @@ Please refer to the appropriate README.md file:
|
||||
## TODO
|
||||
|
||||
- Add a curation task to normalize DOIs to "https://doi.org" format
|
||||
- Migrate from maven-deploy-plugin to nexus-staging-maven-plugin, see: https://central.sonatype.org/publish/publish-maven/#nexus-staging-maven-plugin-for-deployment-and-release
|
||||
- Stop using oss-parent, see: https://central.sonatype.org/publish/publish-maven/#create-a-ticket-with-sonatype
|
||||
|
||||
## Notes
|
||||
This project was initially created according to the [Maven Getting Started Guide](https://maven.apache.org/guides/getting-started/):
|
||||
@ -52,6 +54,14 @@ This project was initially created according to the [Maven Getting Started Guide
|
||||
$ mvn -B archetype:generate -DgroupId=io.github.ilri.cgspace -DartifactId=cgspace-java-helpers -DarchetypeArtifactId=maven-archetype-quickstart -DarchetypeVersion=1.4
|
||||
```
|
||||
|
||||
To deploy a new `-SNAPSHOT` release to Maven Central (make sure OSSHRH credentials are in `~/.m2/settings.xml`):
|
||||
|
||||
```console
|
||||
$ mvn clean deploy
|
||||
```
|
||||
|
||||
See: <a href="https://central.sonatype.org/publish/publish-maven/#performing-a-snapshot-deployment">Performing a Snapshot Deployment</a>
|
||||
|
||||
## License
|
||||
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
|
||||
|
||||
|
33
pom.xml
33
pom.xml
@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>io.github.ilri.cgspace</groupId>
|
||||
<artifactId>cgspace-java-helpers</artifactId>
|
||||
<version>6.1-SNAPSHOT</version>
|
||||
<version>7.6-SNAPSHOT</version>
|
||||
|
||||
<name>cgspace-java-helpers</name>
|
||||
<url>https://github.com/ilri/cgspace-java-helpers</url>
|
||||
@ -36,12 +36,12 @@
|
||||
<dependency>
|
||||
<groupId>com.google.code.gson</groupId>
|
||||
<artifactId>gson</artifactId>
|
||||
<version>2.9.1</version>
|
||||
<version>2.10.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.dspace</groupId>
|
||||
<artifactId>dspace-api</artifactId>
|
||||
<version>6.3</version>
|
||||
<version>7.6.1</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
@ -69,20 +69,20 @@
|
||||
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
|
||||
<plugin>
|
||||
<artifactId>maven-clean-plugin</artifactId>
|
||||
<version>3.2.0</version>
|
||||
<version>3.3.2</version>
|
||||
</plugin>
|
||||
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
|
||||
<plugin>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
<version>3.3.0</version>
|
||||
<version>3.3.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.10.1</version>
|
||||
<version>3.12.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>3.0.0-M7</version>
|
||||
<version>3.2.3</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
@ -90,11 +90,11 @@
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-install-plugin</artifactId>
|
||||
<version>3.0.1</version>
|
||||
<version>3.1.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-deploy-plugin</artifactId>
|
||||
<version>3.3.0</version>
|
||||
<version>3.1.1</version>
|
||||
</plugin>
|
||||
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
|
||||
<plugin>
|
||||
@ -103,9 +103,22 @@
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-project-info-reports-plugin</artifactId>
|
||||
<version>3.4.1</version>
|
||||
<version>3.5.0</version>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
|
||||
<repositories>
|
||||
<!-- Check Maven Central first (before other repos below) -->
|
||||
<repository>
|
||||
<id>maven-central</id>
|
||||
<url>https://repo.maven.apache.org/maven2</url>
|
||||
</repository>
|
||||
<!-- For Handle Server -->
|
||||
<repository>
|
||||
<id>handle.net</id>
|
||||
<url>https://handle.net/maven</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
</project>
|
||||
|
@ -1,8 +1,8 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*/
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.ctasks;
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*/
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.ctasks;
|
||||
|
||||
@ -10,16 +10,17 @@ import javax.annotation.Nullable;
|
||||
|
||||
public class CountriesVocabulary {
|
||||
|
||||
class Country {
|
||||
private String name; //required
|
||||
private String common_name; //optional
|
||||
private String official_name; //optional
|
||||
private String cgspace_name; //optional
|
||||
private String numeric; //required Hmmmm need to cast this...
|
||||
private String alpha_2; //required
|
||||
private String alpha_3; //required
|
||||
static class Country {
|
||||
private final String name; // required
|
||||
private final String common_name; // optional
|
||||
private final String official_name; // optional
|
||||
private final String cgspace_name; // optional
|
||||
private final String numeric; // required Hmmmm need to cast this...
|
||||
private final String alpha_2; // required
|
||||
private final String alpha_3; // required
|
||||
|
||||
public Country(String name,
|
||||
public Country(
|
||||
String name,
|
||||
@Nullable String common_name,
|
||||
@Nullable String official_name,
|
||||
@Nullable String cgspace_name,
|
||||
@ -30,7 +31,9 @@ public class CountriesVocabulary {
|
||||
this.common_name = common_name;
|
||||
this.official_name = official_name;
|
||||
this.cgspace_name = cgspace_name;
|
||||
this.numeric = numeric; // fuuuuu this is a string and we can't cast to Integer because some values are zeropadded like "004"
|
||||
this.numeric =
|
||||
numeric; // fuuuuu this is a string and we can't cast to Integer because some
|
||||
// values are zeropadded like "004"
|
||||
this.alpha_2 = alpha_2;
|
||||
this.alpha_3 = alpha_3;
|
||||
}
|
||||
|
@ -1,13 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*/
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.ctasks;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.dspace.authorize.AuthorizeException;
|
||||
import org.dspace.content.DSpaceObject;
|
||||
import org.dspace.content.Item;
|
||||
@ -22,20 +24,21 @@ import java.io.InputStreamReader;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
public class CountryCodeTagger extends AbstractCurationTask
|
||||
{
|
||||
public class CountryCodeTagger extends AbstractCurationTask {
|
||||
public class CountryCodeTaggerConfig {
|
||||
private final String isocodesJsonPath = "/io/github/ilri/cgspace/ctasks/iso_3166-1.json";
|
||||
private final String cgspaceCountriesJsonPath = "/io/github/ilri/cgspace/ctasks/cgspace-countries.json";
|
||||
private final String cgspaceCountriesJsonPath =
|
||||
"/io/github/ilri/cgspace/ctasks/cgspace-countries.json";
|
||||
private final String iso3166Field = taskProperty("iso3166.field");
|
||||
private final String iso3166Alpha2Field = taskProperty("iso3166-alpha2.field");
|
||||
private final boolean forceupdate = taskBooleanProperty("forceupdate", false);
|
||||
|
||||
private Logger log = Logger.getLogger(CountryCodeTagger.class);
|
||||
private final Logger log = LogManager.getLogger();
|
||||
}
|
||||
|
||||
public class CountryCodeTaggerResult {
|
||||
public static class CountryCodeTaggerResult {
|
||||
private int status = Curator.CURATE_UNSET;
|
||||
private String result = null;
|
||||
|
||||
@ -57,17 +60,15 @@ public class CountryCodeTagger extends AbstractCurationTask
|
||||
}
|
||||
|
||||
@Override
|
||||
public int perform(DSpaceObject dso) throws IOException
|
||||
{
|
||||
public int perform(DSpaceObject dso) throws IOException {
|
||||
// gotta define this here so we can access it after the if context...
|
||||
CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
|
||||
|
||||
if (dso.getType() == Constants.ITEM)
|
||||
{
|
||||
if (dso.getType() == Constants.ITEM) {
|
||||
// Load configuration
|
||||
CountryCodeTaggerConfig config = new CountryCodeTaggerConfig();
|
||||
|
||||
Item item = (Item)dso;
|
||||
Item item = (Item) dso;
|
||||
|
||||
try {
|
||||
alpha2Result = performAlpha2(item, config);
|
||||
@ -82,49 +83,75 @@ public class CountryCodeTagger extends AbstractCurationTask
|
||||
return alpha2Result.getStatus();
|
||||
}
|
||||
|
||||
public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config) throws IOException, SQLException {
|
||||
public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config)
|
||||
throws IOException, SQLException {
|
||||
CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
|
||||
String itemHandle = item.getHandle();
|
||||
|
||||
List<MetadataValue> itemCountries = itemService.getMetadataByMetadataString(item, config.iso3166Field);
|
||||
List<MetadataValue> itemCountries =
|
||||
itemService.getMetadataByMetadataString(item, config.iso3166Field);
|
||||
|
||||
// skip items that don't have country metadata
|
||||
if (itemCountries.size() == 0) {
|
||||
if (itemCountries.isEmpty()) {
|
||||
alpha2Result.setResult(itemHandle + ": no countries, skipping.");
|
||||
alpha2Result.setStatus(Curator.CURATE_SKIP);
|
||||
} else {
|
||||
Gson gson = new Gson();
|
||||
|
||||
// TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(config.isocodesJsonPath)));
|
||||
ISO3166CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, ISO3166CountriesVocabulary.class);
|
||||
// TODO: convert to try:
|
||||
// https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
|
||||
BufferedReader reader =
|
||||
new BufferedReader(
|
||||
new InputStreamReader(
|
||||
Objects.requireNonNull(this.getClass().getResourceAsStream(config.isocodesJsonPath))));
|
||||
ISO3166CountriesVocabulary isocodesCountriesJson =
|
||||
gson.fromJson(reader, ISO3166CountriesVocabulary.class);
|
||||
reader.close();
|
||||
|
||||
reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(config.cgspaceCountriesJsonPath)));
|
||||
CGSpaceCountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
|
||||
reader =
|
||||
new BufferedReader(
|
||||
new InputStreamReader(
|
||||
Objects.requireNonNull(this.getClass()
|
||||
.getResourceAsStream(config.cgspaceCountriesJsonPath))));
|
||||
CGSpaceCountriesVocabulary cgspaceCountriesJson =
|
||||
gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
|
||||
reader.close();
|
||||
|
||||
// split the alpha2 country code field into schema, element, and qualifier so we can use it with item.addMetadata()
|
||||
// split the alpha2 country code field into schema, element, and qualifier so we can use
|
||||
// it with item.addMetadata()
|
||||
String[] iso3166Alpha2FieldParts = config.iso3166Alpha2Field.split("\\.");
|
||||
|
||||
if (config.forceupdate) {
|
||||
itemService.clearMetadata(Curator.curationContext(), item, iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], Item.ANY);
|
||||
itemService.clearMetadata(
|
||||
Curator.curationContext(),
|
||||
item,
|
||||
iso3166Alpha2FieldParts[0],
|
||||
iso3166Alpha2FieldParts[1],
|
||||
iso3166Alpha2FieldParts[2],
|
||||
Item.ANY);
|
||||
}
|
||||
|
||||
// check the item's country codes, if any
|
||||
List<MetadataValue> itemAlpha2CountryCodes = itemService.getMetadataByMetadataString(item, config.iso3166Alpha2Field);
|
||||
List<MetadataValue> itemAlpha2CountryCodes =
|
||||
itemService.getMetadataByMetadataString(item, config.iso3166Alpha2Field);
|
||||
|
||||
if (itemAlpha2CountryCodes.size() == 0) {
|
||||
if (itemAlpha2CountryCodes.isEmpty()) {
|
||||
List<String> newAlpha2Codes = new ArrayList<String>();
|
||||
for (MetadataValue itemCountry : itemCountries) {
|
||||
//check ISO 3166-1 countries
|
||||
// check ISO 3166-1 countries
|
||||
for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
|
||||
if (itemCountry.getValue().equalsIgnoreCase(country.getName()) || itemCountry.getValue().equalsIgnoreCase(country.get_official_name()) || itemCountry.getValue().equalsIgnoreCase(country.get_common_name())) {
|
||||
if (itemCountry.getValue().equalsIgnoreCase(country.getName())
|
||||
|| itemCountry
|
||||
.getValue()
|
||||
.equalsIgnoreCase(country.get_official_name())
|
||||
|| itemCountry
|
||||
.getValue()
|
||||
.equalsIgnoreCase(country.get_common_name())) {
|
||||
newAlpha2Codes.add(country.getAlpha_2());
|
||||
}
|
||||
}
|
||||
|
||||
//check CGSpace countries
|
||||
// check CGSpace countries
|
||||
for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) {
|
||||
if (itemCountry.getValue().equalsIgnoreCase(country.getCgspace_name())) {
|
||||
newAlpha2Codes.add(country.getAlpha_2());
|
||||
@ -132,9 +159,16 @@ public class CountryCodeTagger extends AbstractCurationTask
|
||||
}
|
||||
}
|
||||
|
||||
if (newAlpha2Codes.size() > 0) {
|
||||
if (!newAlpha2Codes.isEmpty()) {
|
||||
try {
|
||||
itemService.addMetadata(Curator.curationContext(), item, iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", newAlpha2Codes);
|
||||
itemService.addMetadata(
|
||||
Curator.curationContext(),
|
||||
item,
|
||||
iso3166Alpha2FieldParts[0],
|
||||
iso3166Alpha2FieldParts[1],
|
||||
iso3166Alpha2FieldParts[2],
|
||||
"en_US",
|
||||
newAlpha2Codes);
|
||||
itemService.update(Curator.curationContext(), item);
|
||||
} catch (SQLException | AuthorizeException sqle) {
|
||||
config.log.debug(sqle.getMessage());
|
||||
@ -142,7 +176,11 @@ public class CountryCodeTagger extends AbstractCurationTask
|
||||
alpha2Result.setStatus(Curator.CURATE_ERROR);
|
||||
}
|
||||
|
||||
alpha2Result.setResult(itemHandle + ": added " + newAlpha2Codes.size() + " alpha2 country code(s)");
|
||||
alpha2Result.setResult(
|
||||
itemHandle
|
||||
+ ": added "
|
||||
+ newAlpha2Codes.size()
|
||||
+ " alpha2 country code(s)");
|
||||
} else {
|
||||
alpha2Result.setResult(itemHandle + ": no matching countries found");
|
||||
}
|
||||
|
@ -1,15 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*/
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.ctasks;
|
||||
|
||||
import com.google.gson.annotations.SerializedName;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class ISO3166CountriesVocabulary extends CountriesVocabulary {
|
||||
// support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly
|
||||
@SerializedName("3166-1") List<Country> countries;
|
||||
// support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since
|
||||
// our class needs to match the JSON exactly
|
||||
@SerializedName("3166-1")
|
||||
List<Country> countries;
|
||||
}
|
@ -3,7 +3,7 @@ DSpace curation tasks used on the [CGSpace](https://cgspace.cgiar.org) instituti
|
||||
|
||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
|
||||
|
||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
|
||||
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
|
||||
|
||||
## Build and Install
|
||||
|
||||
@ -14,7 +14,7 @@ To use these curation tasks in a DSpace project add the following dependency to
|
||||
<dependency>
|
||||
<groupId>io.github.ilri.cgspace</groupId>
|
||||
<artifactId>cgspace-java-helpers</artifactId>
|
||||
<version>6.1-SNAPSHOT</version>
|
||||
<version>7.6-SNAPSHOT</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@ -30,7 +30,7 @@ $ mvn package
|
||||
Copy the resulting jar to the DSpace `lib` directory:
|
||||
|
||||
```
|
||||
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
|
||||
$ cp target/cgspace-java-helpers-7.6-SNAPSHOT.jar ~/dspace/lib/
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
@ -1,21 +1,27 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*/
|
||||
* Copyright (C) 2020 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.scripts;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.dspace.authorize.AuthorizeException;
|
||||
import org.dspace.content.*;
|
||||
import org.dspace.content.Bitstream;
|
||||
import org.dspace.content.Bundle;
|
||||
import org.dspace.content.Collection;
|
||||
import org.dspace.content.Community;
|
||||
import org.dspace.content.DSpaceObject;
|
||||
import org.dspace.content.Item;
|
||||
import org.dspace.content.MetadataValue;
|
||||
import org.dspace.content.factory.ContentServiceFactory;
|
||||
import org.dspace.content.service.BundleService;
|
||||
import org.dspace.content.service.ItemService;
|
||||
import org.dspace.core.Constants;
|
||||
import org.dspace.core.Context;
|
||||
import org.dspace.content.factory.ContentServiceFactory;
|
||||
import org.dspace.content.service.ItemService;
|
||||
import org.dspace.handle.factory.HandleServiceFactory;
|
||||
import org.dspace.handle.service.HandleService;
|
||||
import org.dspace.content.service.BundleService;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
@ -29,10 +35,12 @@ import java.util.List;
|
||||
* @since 5.1
|
||||
*/
|
||||
public class FixJpgJpgThumbnails {
|
||||
//note: static members belong to the class itself, not any one instance
|
||||
// note: static members belong to the class itself, not any one instance
|
||||
public static ItemService itemService = ContentServiceFactory.getInstance().getItemService();
|
||||
public static HandleService handleService = HandleServiceFactory.getInstance().getHandleService();
|
||||
public static BundleService bundleService = ContentServiceFactory.getInstance().getBundleService();
|
||||
public static HandleService handleService =
|
||||
HandleServiceFactory.getInstance().getHandleService();
|
||||
public static BundleService bundleService =
|
||||
ContentServiceFactory.getInstance().getBundleService();
|
||||
|
||||
public static void main(String[] args) {
|
||||
String parentHandle = null;
|
||||
@ -51,17 +59,24 @@ public class FixJpgJpgThumbnails {
|
||||
DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
|
||||
if (parent != null) {
|
||||
switch (parent.getType()) {
|
||||
case Constants.COLLECTION:
|
||||
process(context, itemService.findByCollection(context, (Collection) parent));
|
||||
case Constants.SITE:
|
||||
process(context, itemService.findAll(context));
|
||||
context.commit();
|
||||
break;
|
||||
case Constants.COMMUNITY:
|
||||
List<Collection> collections = ((Community) parent).getCollections();
|
||||
for (Collection collection : collections) {
|
||||
process(context, itemService.findAllByCollection(context, collection));
|
||||
process(
|
||||
context,
|
||||
itemService.findAllByCollection(context, collection));
|
||||
}
|
||||
context.commit();
|
||||
break;
|
||||
case Constants.SITE:
|
||||
process(context, itemService.findAll(context));
|
||||
case Constants.COLLECTION:
|
||||
process(
|
||||
context,
|
||||
itemService.findByCollection(context, (Collection) parent));
|
||||
context.commit();
|
||||
break;
|
||||
case Constants.ITEM:
|
||||
processItem(context, (Item) parent);
|
||||
@ -79,7 +94,8 @@ public class FixJpgJpgThumbnails {
|
||||
}
|
||||
}
|
||||
|
||||
private static void process(Context context, Iterator<Item> items) throws SQLException, IOException, AuthorizeException {
|
||||
private static void process(Context context, Iterator<Item> items)
|
||||
throws SQLException, IOException, AuthorizeException {
|
||||
while (items.hasNext()) {
|
||||
Item item = items.next();
|
||||
processItem(context, item);
|
||||
@ -87,14 +103,18 @@ public class FixJpgJpgThumbnails {
|
||||
}
|
||||
}
|
||||
|
||||
private static void processItem(Context context, Item item) throws SQLException, AuthorizeException, IOException {
|
||||
// Some bitstreams like Infographics are large JPGs and put in the ORIGINAL bundle on purpose so we shouldn't
|
||||
private static void processItem(Context context, Item item)
|
||||
throws SQLException, AuthorizeException, IOException {
|
||||
// Some bitstreams like Infographics and Maps are large JPEGs and put in the ORIGINAL bundle
|
||||
// on purpose so we shouldn't
|
||||
// swap them.
|
||||
List<MetadataValue> itemTypes = itemService.getMetadataByMetadataString(item, "dcterms.type");
|
||||
boolean itemHasInfographic = false;
|
||||
for (MetadataValue itemType: itemTypes) {
|
||||
if (itemType.getValue().equals("Infographic")) {
|
||||
itemHasInfographic = true;
|
||||
List<MetadataValue> itemTypes =
|
||||
itemService.getMetadataByMetadataString(item, "dcterms.type");
|
||||
for (MetadataValue itemType : itemTypes) {
|
||||
if (itemType.getValue().equals("Infographic") || itemType.getValue().equals("Map")) {
|
||||
System.out.println(
|
||||
item.getHandle() + ": item has an Infographic or Map, skipping.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -103,6 +123,12 @@ public class FixJpgJpgThumbnails {
|
||||
List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
|
||||
for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
|
||||
String thumbnailName = thumbnailBitstream.getName();
|
||||
String thumbnailDescription = thumbnailBitstream.getDescription();
|
||||
|
||||
// There is no point continuing if the thumbnail's description is empty or null
|
||||
if (StringUtils.isEmpty(thumbnailDescription)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (thumbnailName.toLowerCase().contains(".jpg.jpg")) {
|
||||
List<Bundle> originalBundles = item.getBundles("ORIGINAL");
|
||||
@ -112,29 +138,33 @@ public class FixJpgJpgThumbnails {
|
||||
for (Bitstream originalBitstream : originalBundleBitstreams) {
|
||||
String originalName = originalBitstream.getName();
|
||||
|
||||
long originalBitstreamBytes = originalBitstream.getSize();
|
||||
long originalBitstreamBytes = originalBitstream.getSizeBytes();
|
||||
|
||||
/*
|
||||
- check if the original file name is the same as the thumbnail name minus the extra ".jpg"
|
||||
- check if the thumbnail description indicates it was automatically generated
|
||||
- check if the item has dc.type Infographic (JPG could be the "real" item!)
|
||||
- check if the original bitstream is less than ~100KiB
|
||||
- Note: in my tests there were 4022 items with ".jpg.jpg" thumbnails totaling 394549249
|
||||
bytes for an average of about 98KiB so ~100KiB seems like a good cut off
|
||||
*/
|
||||
if (
|
||||
originalName.equalsIgnoreCase(StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg"))
|
||||
&& ("Generated Thumbnail".equals(thumbnailBitstream.getDescription()) || "IM Thumbnail".equals(thumbnailBitstream.getDescription()))
|
||||
&& !itemHasInfographic
|
||||
&& originalBitstreamBytes < 100000
|
||||
) {
|
||||
System.out.println(item.getHandle() + ": replacing " + thumbnailName + " with " + originalName);
|
||||
if (originalName.equalsIgnoreCase(
|
||||
StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg"))
|
||||
&& ("Generated Thumbnail".equals(thumbnailDescription)
|
||||
|| "IM Thumbnail".equals(thumbnailDescription))
|
||||
&& originalBitstreamBytes < 100000) {
|
||||
System.out.println(
|
||||
item.getHandle()
|
||||
+ ": replacing "
|
||||
+ thumbnailName
|
||||
+ " with "
|
||||
+ originalName);
|
||||
|
||||
//add the original bitstream to the THUMBNAIL bundle
|
||||
bundleService.addBitstream(context, thumbnailBundle, originalBitstream);
|
||||
//remove the original bitstream from the ORIGINAL bundle
|
||||
// add the original bitstream to the THUMBNAIL bundle
|
||||
bundleService.addBitstream(
|
||||
context, thumbnailBundle, originalBitstream);
|
||||
// remove the original bitstream from the ORIGINAL bundle
|
||||
originalBundle.removeBitstream(originalBitstream);
|
||||
//remove the JpgJpg bitstream from the THUMBNAIL bundle
|
||||
// remove the JpgJpg bitstream from the THUMBNAIL bundle
|
||||
thumbnailBundle.removeBitstream(thumbnailBitstream);
|
||||
}
|
||||
}
|
||||
|
@ -1,14 +1,19 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*/
|
||||
* Copyright (C) 2022 Alan Orth
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package io.github.ilri.cgspace.scripts;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.dspace.authorize.AuthorizeException;
|
||||
import org.dspace.content.*;
|
||||
import org.dspace.content.Bitstream;
|
||||
import org.dspace.content.Bundle;
|
||||
import org.dspace.content.Collection;
|
||||
import org.dspace.content.Community;
|
||||
import org.dspace.content.DSpaceObject;
|
||||
import org.dspace.content.Item;
|
||||
import org.dspace.content.factory.ContentServiceFactory;
|
||||
import org.dspace.content.service.BundleService;
|
||||
import org.dspace.content.service.ItemService;
|
||||
@ -71,10 +76,9 @@ public class FixLowQualityThumbnails {
|
||||
DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
|
||||
if (parent != null) {
|
||||
switch (parent.getType()) {
|
||||
case Constants.COLLECTION:
|
||||
process(
|
||||
context,
|
||||
itemService.findByCollection(context, (Collection) parent));
|
||||
case Constants.SITE:
|
||||
process(context, itemService.findAll(context));
|
||||
context.commit();
|
||||
break;
|
||||
case Constants.COMMUNITY:
|
||||
List<Collection> collections = ((Community) parent).getCollections();
|
||||
@ -83,9 +87,13 @@ public class FixLowQualityThumbnails {
|
||||
context,
|
||||
itemService.findAllByCollection(context, collection));
|
||||
}
|
||||
context.commit();
|
||||
break;
|
||||
case Constants.SITE:
|
||||
process(context, itemService.findAll(context));
|
||||
case Constants.COLLECTION:
|
||||
process(
|
||||
context,
|
||||
itemService.findByCollection(context, (Collection) parent));
|
||||
context.commit();
|
||||
break;
|
||||
case Constants.ITEM:
|
||||
processItem(context, (Item) parent);
|
||||
@ -114,6 +122,8 @@ public class FixLowQualityThumbnails {
|
||||
|
||||
private static void processItem(Context context, Item item)
|
||||
throws SQLException, AuthorizeException, IOException {
|
||||
System.out.println("FixLowQualityThumbnails: processing item: " + item.getHandle());
|
||||
|
||||
// Set some state for the item before we iterate over the THUMBNAIL bundle
|
||||
boolean itemHasImThumbnail = false;
|
||||
|
||||
@ -154,7 +164,7 @@ public class FixLowQualityThumbnails {
|
||||
// ption will *always* be "Generated Thumbnail".
|
||||
if ("Generated Thumbnail".equals(thumbnailDescription)) {
|
||||
System.out.print("\u001b[33m");
|
||||
System.out.println("Deleting (" + item.getHandle() + "):");
|
||||
System.out.println("> Action: remove old thumbnail from THUMBNAIL bundle");
|
||||
System.out.println("> Name: »" + thumbnailName + "«");
|
||||
System.out.println("> Description: »" + thumbnailDescription + "«");
|
||||
System.out.print("\u001b[0m");
|
||||
@ -168,7 +178,7 @@ public class FixLowQualityThumbnails {
|
||||
} else if (thumbnailDescription.toLowerCase().contains("thumbnail")
|
||||
&& !"IM Thumbnail".equals(thumbnailDescription)) {
|
||||
System.out.print("\u001b[33m");
|
||||
System.out.println("Deleting (" + item.getHandle() + "):");
|
||||
System.out.println("> Action: remove manually uploaded thumbnail from THUMBNAIL bundle");
|
||||
System.out.println("> Name: »" + thumbnailName + "«");
|
||||
System.out.println("> Description: »" + thumbnailDescription + "«");
|
||||
System.out.print("\u001b[0m");
|
||||
@ -180,7 +190,7 @@ public class FixLowQualityThumbnails {
|
||||
// a thumbnail for a journal or a limited access item.
|
||||
} else {
|
||||
System.out.print("\u001b[34m");
|
||||
System.out.println("Skipping (" + item.getHandle() + "):");
|
||||
System.out.println("> Action: skip other thumbnail in THUMBNAIL bundle");
|
||||
System.out.println("> Name: »" + thumbnailName + "«");
|
||||
System.out.println("> Description: »" + thumbnailDescription + "«");
|
||||
System.out.print("\u001b[0m");
|
||||
@ -245,7 +255,7 @@ public class FixLowQualityThumbnails {
|
||||
&& (originalName.toLowerCase().contains("thumbnail")
|
||||
|| originalDescription.toLowerCase().contains("thumbnail"))) {
|
||||
System.out.print("\u001b[33m");
|
||||
System.out.println("Removing (" + item.getHandle() + "):");
|
||||
System.out.println("> Action: remove thumbnail from ORIGINAL bundle");
|
||||
System.out.println("> Name: »" + originalName + "«");
|
||||
System.out.println("> Description: »" + originalDescription + "«");
|
||||
System.out.print("\u001b[0m");
|
||||
@ -255,7 +265,7 @@ public class FixLowQualityThumbnails {
|
||||
|
||||
} else {
|
||||
System.out.print("\u001b[34m");
|
||||
System.out.println("Skipping (" + item.getHandle() + "):");
|
||||
System.out.println("> Action: skip other bitstream in ORIGINAL bundle");
|
||||
System.out.println("> Name: »" + originalName + "«");
|
||||
System.out.println("> Description: »" + originalDescription + "«");
|
||||
System.out.print("\u001b[0m");
|
||||
|
@ -4,7 +4,7 @@ Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutiona
|
||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
|
||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
|
||||
|
||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC6x/Curation+System).
|
||||
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC6x/Curation+System).
|
||||
|
||||
## Build and Install
|
||||
|
||||
@ -15,7 +15,7 @@ To use these curation tasks in a DSpace project add the following dependency to
|
||||
<dependency>
|
||||
<groupId>io.github.ilri.cgspace</groupId>
|
||||
<artifactId>cgspace-java-helpers</artifactId>
|
||||
<version>6.1-SNAPSHOT</version>
|
||||
<version>7.6-SNAPSHOT</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@ -31,7 +31,7 @@ $ mvn package
|
||||
Copy the resulting jar to the DSpace `lib` directory:
|
||||
|
||||
```console
|
||||
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
|
||||
$ cp target/cgspace-java-helpers-7.6-SNAPSHOT.jar ~/dspace/lib/
|
||||
```
|
||||
|
||||
## Invocation
|
||||
|
@ -16,29 +16,15 @@
|
||||
"name": "Congo, The Democratic Republic of the",
|
||||
"numeric": "180"
|
||||
},
|
||||
{
|
||||
"alpha_2": "IR",
|
||||
"alpha_3": "IRN",
|
||||
"name": "Iran, Islamic Republic of",
|
||||
"cgspace_name": "Iran",
|
||||
"numeric": "364",
|
||||
"official_name": "Islamic Republic of Iran"
|
||||
},
|
||||
{
|
||||
"alpha_2": "KP",
|
||||
"alpha_3": "PRK",
|
||||
"common_name": "North Korea",
|
||||
"name": "Korea, Democratic People's Republic of",
|
||||
"cgspace_name": "Korea, DPR",
|
||||
"numeric": "408",
|
||||
"official_name": "Democratic People's Republic of Korea"
|
||||
},
|
||||
{
|
||||
"alpha_2": "LA",
|
||||
"alpha_3": "LAO",
|
||||
"name": "Lao People's Democratic Republic",
|
||||
"cgspace_name": "Laos",
|
||||
"numeric": "418"
|
||||
},
|
||||
{
|
||||
"alpha_2": "FM",
|
||||
"alpha_3": "FSM",
|
||||
@ -53,13 +39,6 @@
|
||||
"name": "Russian Federation",
|
||||
"cgspace_name": "Russia",
|
||||
"numeric": "643"
|
||||
},
|
||||
{
|
||||
"alpha_2": "SY",
|
||||
"alpha_3": "SYR",
|
||||
"name": "Syrian Arab Republic",
|
||||
"cgspace_name": "Syria",
|
||||
"numeric": "760"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user