mirror of
				https://github.com/ilri/cgspace-java-helpers.git
				synced 2025-11-04 06:39:09 +01:00 
			
		
		
		
	Compare commits
	
		
			25 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						
						
							
						
						d5cf51c464
	
				 | 
					
					
						|||
| 
						
						
							
						
						98c7cfb3a5
	
				 | 
					
					
						|||
| 
						
						
							
						
						58365cdfda
	
				 | 
					
					
						|||
| 
						
						
							
						
						7190b751e1
	
				 | 
					
					
						|||
| 
						
						
							
						
						34acc351a5
	
				 | 
					
					
						|||
| 
						
						
							
						
						ec293b3b28
	
				 | 
					
					
						|||
| 
						
						
							
						
						31cd979b61
	
				 | 
					
					
						|||
| 
						
						
							
						
						fce81c6003
	
				 | 
					
					
						|||
| 
						
						
							
						
						26d3cbd778
	
				 | 
					
					
						|||
| 
						
						
							
						
						fdc910f93b
	
				 | 
					
					
						|||
| 
						
						
							
						
						e0d514e797
	
				 | 
					
					
						|||
| 
						
						
							
						
						fd893d8c4e
	
				 | 
					
					
						|||
| 
						
						
							
						
						2263ac27e8
	
				 | 
					
					
						|||
| 
						
						
							
						
						cf7012d698
	
				 | 
					
					
						|||
| 
						
						
							
						
						7edc60e6ca
	
				 | 
					
					
						|||
| 
						
						
							
						
						fe2abc86c6
	
				 | 
					
					
						|||
| 
						
						
							
						
						e1d92ef2c7
	
				 | 
					
					
						|||
| 
						
						
							
						
						3e3c544cfa
	
				 | 
					
					
						|||
| 
						
						
							
						
						db9881faf6
	
				 | 
					
					
						|||
| 
						
						
							
						
						fa5fb60b5b
	
				 | 
					
					
						|||
| 
						
						
							
						
						44fb9a9f4d
	
				 | 
					
					
						|||
| 
						
						
							
						
						b790d5e4db
	
				 | 
					
					
						|||
| 
						
						
							
						
						08e7546a87
	
				 | 
					
					
						|||
| 
						
						
							
						
						ff076ecf50
	
				 | 
					
					
						|||
| 
						
						
							
						
						7a5dd1c094
	
				 | 
					
					
						
							
								
								
									
										26
									
								
								.github/workflows/maven.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										26
									
								
								.github/workflows/maven.yml
									
									
									
									
										vendored
									
									
								
							@@ -1,26 +0,0 @@
 | 
				
			|||||||
# This workflow will build a Java project with Maven
 | 
					 | 
				
			||||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
name: Build
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
on:
 | 
					 | 
				
			||||||
  push:
 | 
					 | 
				
			||||||
    branches: [ dspace7 ]
 | 
					 | 
				
			||||||
  pull_request:
 | 
					 | 
				
			||||||
    branches: [ dspace7 ]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
jobs:
 | 
					 | 
				
			||||||
  build:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    runs-on: ubuntu-22.04
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    steps:
 | 
					 | 
				
			||||||
    - uses: actions/checkout@v4
 | 
					 | 
				
			||||||
    - name: Set up JDK 17
 | 
					 | 
				
			||||||
      uses: actions/setup-java@v4
 | 
					 | 
				
			||||||
      with:
 | 
					 | 
				
			||||||
        java-version: 17
 | 
					 | 
				
			||||||
        distribution: 'temurin'
 | 
					 | 
				
			||||||
        cache: 'maven'
 | 
					 | 
				
			||||||
    - name: Build with Maven
 | 
					 | 
				
			||||||
      run: mvn -B package --file pom.xml
 | 
					 | 
				
			||||||
							
								
								
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -70,10 +70,4 @@ fabric.properties
 | 
				
			|||||||
# Android studio 3.1+ serialized cache file
 | 
					# Android studio 3.1+ serialized cache file
 | 
				
			||||||
.idea/caches/build_file_checksums.ser
 | 
					.idea/caches/build_file_checksums.ser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# VS Code settings
 | 
					 | 
				
			||||||
.vscode
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# asdf-vm tool-versions file
 | 
					 | 
				
			||||||
.tool-versions
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
target/
 | 
					target/
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										4
									
								
								.idea/misc.xml
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										4
									
								
								.idea/misc.xml
									
									
									
										generated
									
									
									
								
							@@ -1,13 +1,11 @@
 | 
				
			|||||||
<?xml version="1.0" encoding="UTF-8"?>
 | 
					<?xml version="1.0" encoding="UTF-8"?>
 | 
				
			||||||
<project version="4">
 | 
					<project version="4">
 | 
				
			||||||
  <component name="ExternalStorageConfigurationManager" enabled="true" />
 | 
					 | 
				
			||||||
  <component name="MavenProjectsManager">
 | 
					  <component name="MavenProjectsManager">
 | 
				
			||||||
    <option name="originalFiles">
 | 
					    <option name="originalFiles">
 | 
				
			||||||
      <list>
 | 
					      <list>
 | 
				
			||||||
        <option value="$PROJECT_DIR$/pom.xml" />
 | 
					        <option value="$PROJECT_DIR$/pom.xml" />
 | 
				
			||||||
      </list>
 | 
					      </list>
 | 
				
			||||||
    </option>
 | 
					    </option>
 | 
				
			||||||
    <option name="workspaceImportForciblyTurnedOn" value="true" />
 | 
					 | 
				
			||||||
  </component>
 | 
					  </component>
 | 
				
			||||||
  <component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="17" project-jdk-type="JavaSDK" />
 | 
					  <component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="11" project-jdk-type="JavaSDK" />
 | 
				
			||||||
</project>
 | 
					</project>
 | 
				
			||||||
							
								
								
									
										8
									
								
								.travis.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								.travis.yml
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,8 @@
 | 
				
			|||||||
 | 
					dist: bionic
 | 
				
			||||||
 | 
					language: java
 | 
				
			||||||
 | 
					jdk:
 | 
				
			||||||
 | 
					  - openjdk8
 | 
				
			||||||
 | 
					script:
 | 
				
			||||||
 | 
					  - mvn package -B
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# vim: ts=2 sw=2 et
 | 
				
			||||||
							
								
								
									
										44
									
								
								CHANGELOG.md
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								CHANGELOG.md
									
									
									
									
									
								
							@@ -4,40 +4,16 @@ All notable changes to this project will be documented in this file.
 | 
				
			|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 | 
					The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 | 
				
			||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 | 
					and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## [7.6.1.2] - 2024-04-25
 | 
					## [5.3] - 2020-08-07
 | 
				
			||||||
### Changed
 | 
					### Changed
 | 
				
			||||||
- Remove reporting from curation tasks since "results" are enough
 | 
					- Make sure `FixJpgJpgThumbnails` only replaces thumbnails where the original is less than ~100KiB
 | 
				
			||||||
 | 
					- Make sure `FixJpgJpgThumbnails` only replaces thumbnails if the item type is not `Infographic` (because the JPG in the ORIGINAL bundle is the "real" file and it's OK that the thumbnail is ".jpg.jpg")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## [7.6.1.1] - 2024-04-23
 | 
					## [5.2] - 2020-08-06
 | 
				
			||||||
 | 
					### Changed
 | 
				
			||||||
 | 
					- Make `FixJpgJpgThumbnails` helper check for files named "JPG" as well as "jpg" (case insensitive)
 | 
				
			||||||
 | 
					- Make `FixJpgJpgThumbnails` helper replace thumbnails with description `IM Thumbnail` as well as `Generated Thumbnail`
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## [5.1] - 2020-08-06
 | 
				
			||||||
### Added
 | 
					### Added
 | 
				
			||||||
- New `NormalizeDOIs` curation task
 | 
					- Add `FixJpgJpgThumbnails` helper to replace ".jpg.jpg" thumbnails with their originals
 | 
				
			||||||
 | 
					 | 
				
			||||||
### Updated
 | 
					 | 
				
			||||||
- Update dependencies in `pom.xml`
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## [7.6.1] - 2024-01-02
 | 
					 | 
				
			||||||
### Changed
 | 
					 | 
				
			||||||
- Pin gson dependency to 2.9.0 to avoid dependency convergence issues with DSpace
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## [7.6] - 2024-01-02
 | 
					 | 
				
			||||||
### Updated
 | 
					 | 
				
			||||||
- `iso_3166-1.json` from iso-codes 4.13.0-SNAPSHOT, which [adds common names for Iran, Laos, and Syria](https://salsa.debian.org/iso-codes-team/iso-codes/-/merge_requests/32)
 | 
					 | 
				
			||||||
- DSpace 7.6 compatibility
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## [6.2] - 2023-02-20
 | 
					 | 
				
			||||||
### Updated
 | 
					 | 
				
			||||||
- `iso_3166-1.json` from iso-codes 4.12.0, which updates the name for TR to "Türkiye"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## [6.1] - 2022-10-31
 | 
					 | 
				
			||||||
### Updated
 | 
					 | 
				
			||||||
- Update dependencies in `pom.xml`
 | 
					 | 
				
			||||||
- `iso_3166-1.json` from iso-codes 4.11.0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### Changed
 | 
					 | 
				
			||||||
- Java compiler and target from JDK 7 to JDK 8
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### Added
 | 
					 | 
				
			||||||
- New `FixLowQualityThumbnails` script to detect and remove more low-quality thumbnails
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### Fixed
 | 
					 | 
				
			||||||
- `FixJpgJpgThumbnails` and `FixLowQualityThumbnails` scripts not commiting changes when operating on a site, community, or collection
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										35
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										35
									
								
								README.md
									
									
									
									
									
								
							@@ -1,12 +1,10 @@
 | 
				
			|||||||
# CGSpace Java Helpers [](https://github.com/ilri/cgspace-java-helpers/actions)
 | 
					# CGSpace Java Helpers [](https://travis-ci.org/ilri/dspace-curation-tasks)
 | 
				
			||||||
DSpace curation tasks and other Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
					DSpace curation tasks and other Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
 | 
					- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
 | 
				
			||||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
 | 
					- **FixJpgJpgThumbnails**: Fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
 | 
				
			||||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
 | 
					 | 
				
			||||||
- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System).
 | 
					Tested on DSpace 5.8. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Build and Install
 | 
					## Build and Install
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -17,7 +15,7 @@ To use these curation tasks in a DSpace project add the following dependency to
 | 
				
			|||||||
<dependency>
 | 
					<dependency>
 | 
				
			||||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
					  <groupId>io.github.ilri.cgspace</groupId>
 | 
				
			||||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
					  <artifactId>cgspace-java-helpers</artifactId>
 | 
				
			||||||
  <version>7.6.1.2-SNAPSHOT</version>
 | 
					  <version>5.4-SNAPSHOT</version>
 | 
				
			||||||
</dependency>
 | 
					</dependency>
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -26,26 +24,21 @@ The jar will be copied to all DSpace applications.
 | 
				
			|||||||
### Manual Build and Install
 | 
					### Manual Build and Install
 | 
				
			||||||
To build the standalone jar:
 | 
					To build the standalone jar:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```console
 | 
					```
 | 
				
			||||||
$ mvn package
 | 
					$ mvn package
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Copy the resulting jar to the DSpace `lib` directory:
 | 
					Copy the resulting jar to the DSpace `lib` directory:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```console
 | 
					```
 | 
				
			||||||
$ cp target/cgspace-java-helpers-7.6.1.2-SNAPSHOT.jar ~/dspace/lib/
 | 
					$ cp target/cgspace-java-helpers-5.4-SNAPSHOT.jar ~/dspace/lib
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Configuration
 | 
					## Configuration
 | 
				
			||||||
Please refer to the appropriate README.md file:
 | 
					Please refer to the appropriate README.md file:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- Curation Tasks: [src/main/java/io/github/ilri/cgspace/ctasks/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace7/src/main/java/io/github/ilri/cgspace/ctasks/README.md)
 | 
					- Curation Tasks: [src/main/java/io/github/ilri/cgspace/ctasks/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace5/src/main/java/io/github/ilri/cgspace/ctasks/README.md)
 | 
				
			||||||
- Scripts: [src/main/java/io/github/ilri/cgspace/scripts/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace7/src/main/java/io/github/ilri/cgspace/scripts/README.md)
 | 
					- Scripts: [src/main/java/io/github/ilri/cgspace/scripts/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace5/src/main/java/io/github/ilri/cgspace/scripts/README.md)
 | 
				
			||||||
 | 
					 | 
				
			||||||
## TODO
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Migrate from maven-deploy-plugin to nexus-staging-maven-plugin, see: https://central.sonatype.org/publish/publish-maven/#nexus-staging-maven-plugin-for-deployment-and-release
 | 
					 | 
				
			||||||
- Stop using oss-parent, see: https://central.sonatype.org/publish/publish-maven/#create-a-ticket-with-sonatype
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Notes
 | 
					## Notes
 | 
				
			||||||
This project was initially created according to the [Maven Getting Started Guide](https://maven.apache.org/guides/getting-started/):
 | 
					This project was initially created according to the [Maven Getting Started Guide](https://maven.apache.org/guides/getting-started/):
 | 
				
			||||||
@@ -54,13 +47,11 @@ This project was initially created according to the [Maven Getting Started Guide
 | 
				
			|||||||
$ mvn -B archetype:generate -DgroupId=io.github.ilri.cgspace -DartifactId=cgspace-java-helpers -DarchetypeArtifactId=maven-archetype-quickstart -DarchetypeVersion=1.4
 | 
					$ mvn -B archetype:generate -DgroupId=io.github.ilri.cgspace -DartifactId=cgspace-java-helpers -DarchetypeArtifactId=maven-archetype-quickstart -DarchetypeVersion=1.4
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
To deploy a new `-SNAPSHOT` release to Maven Central (make sure OSSHRH credentials are in `~/.m2/settings.xml`):
 | 
					## TODO
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```console
 | 
					- Make sure this doesn't work on items in the workflow
 | 
				
			||||||
$ mvn clean deploy
 | 
					- Check for existence of metadata field before trying to add metadata
 | 
				
			||||||
```
 | 
					- Add tests
 | 
				
			||||||
 | 
					 | 
				
			||||||
See: <a href="https://central.sonatype.org/publish/publish-maven/#performing-a-snapshot-deployment">Performing a Snapshot Deployment</a>
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
## License
 | 
					## License
 | 
				
			||||||
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
 | 
					This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										52
									
								
								pom.xml
									
									
									
									
									
								
							
							
						
						
									
										52
									
								
								pom.xml
									
									
									
									
									
								
							@@ -6,7 +6,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
					  <groupId>io.github.ilri.cgspace</groupId>
 | 
				
			||||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
					  <artifactId>cgspace-java-helpers</artifactId>
 | 
				
			||||||
  <version>7.6.1.2-SNAPSHOT</version>
 | 
					  <version>5.4-SNAPSHOT</version>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  <name>cgspace-java-helpers</name>
 | 
					  <name>cgspace-java-helpers</name>
 | 
				
			||||||
  <url>https://github.com/ilri/cgspace-java-helpers</url>
 | 
					  <url>https://github.com/ilri/cgspace-java-helpers</url>
 | 
				
			||||||
@@ -14,7 +14,7 @@
 | 
				
			|||||||
  <licenses>
 | 
					  <licenses>
 | 
				
			||||||
    <license>
 | 
					    <license>
 | 
				
			||||||
        <name>GPL-3.0-only</name>
 | 
					        <name>GPL-3.0-only</name>
 | 
				
			||||||
        <url>https://spdx.org/licenses/GPL-3.0-only.html</url>
 | 
					        <url>https://spdx.org/licenses/GPL-3.0-or-later.html</url>
 | 
				
			||||||
    </license>
 | 
					    </license>
 | 
				
			||||||
  </licenses>
 | 
					  </licenses>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -28,27 +28,34 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  <properties>
 | 
					  <properties>
 | 
				
			||||||
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 | 
					    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 | 
				
			||||||
    <maven.compiler.release>11</maven.compiler.release>
 | 
					    <maven.compiler.source>1.7</maven.compiler.source>
 | 
				
			||||||
 | 
					    <maven.compiler.target>1.7</maven.compiler.target>
 | 
				
			||||||
  </properties>
 | 
					  </properties>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  <dependencies>
 | 
					  <dependencies>
 | 
				
			||||||
 | 
					    <dependency>
 | 
				
			||||||
 | 
					      <groupId>junit</groupId>
 | 
				
			||||||
 | 
					      <artifactId>junit</artifactId>
 | 
				
			||||||
 | 
					      <version>4.11</version>
 | 
				
			||||||
 | 
					      <scope>test</scope>
 | 
				
			||||||
 | 
					    </dependency>
 | 
				
			||||||
    <dependency>
 | 
					    <dependency>
 | 
				
			||||||
      <groupId>com.google.code.gson</groupId>
 | 
					      <groupId>com.google.code.gson</groupId>
 | 
				
			||||||
      <artifactId>gson</artifactId>
 | 
					      <artifactId>gson</artifactId>
 | 
				
			||||||
      <version>2.9.0</version>
 | 
					      <version>2.2.1</version>
 | 
				
			||||||
    </dependency>
 | 
					    </dependency>
 | 
				
			||||||
    <dependency>
 | 
					    <dependency>
 | 
				
			||||||
      <groupId>org.dspace</groupId>
 | 
					      <groupId>org.dspace</groupId>
 | 
				
			||||||
      <artifactId>dspace-api</artifactId>
 | 
					      <artifactId>dspace-api</artifactId>
 | 
				
			||||||
      <version>7.6.1</version>
 | 
					      <version>5.8</version>
 | 
				
			||||||
      <scope>provided</scope>
 | 
					      <scope>provided</scope>
 | 
				
			||||||
    </dependency>
 | 
					    </dependency>
 | 
				
			||||||
  </dependencies>
 | 
					  </dependencies>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  <scm>
 | 
					  <scm>
 | 
				
			||||||
      <connection>scm:git:git://github.com/ilri/cgspace-java-helpers.git</connection>
 | 
					      <connection>scm:git:git://github.com/ilri/cgspace-java-helpers.git</connection>
 | 
				
			||||||
      <developerConnection>scm:git:ssh://github.com:ilri/cgspace-java-helpers.git</developerConnection>
 | 
					      <developerConnection>scm:git:ssh://github.com:nanosai/cgspace-java-helpers.git</developerConnection>
 | 
				
			||||||
      <url>https://github.com/ilri/cgspace-java-helpers</url>
 | 
					      <url>http://github.com/ilri/cgspace-java-helpers</url>
 | 
				
			||||||
  </scm>
 | 
					  </scm>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  <distributionManagement>
 | 
					  <distributionManagement>
 | 
				
			||||||
@@ -68,56 +75,43 @@
 | 
				
			|||||||
        <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
 | 
					        <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
 | 
				
			||||||
        <plugin>
 | 
					        <plugin>
 | 
				
			||||||
          <artifactId>maven-clean-plugin</artifactId>
 | 
					          <artifactId>maven-clean-plugin</artifactId>
 | 
				
			||||||
          <version>3.3.2</version>
 | 
					          <version>3.1.0</version>
 | 
				
			||||||
        </plugin>
 | 
					        </plugin>
 | 
				
			||||||
        <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
 | 
					        <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
 | 
				
			||||||
        <plugin>
 | 
					        <plugin>
 | 
				
			||||||
          <artifactId>maven-resources-plugin</artifactId>
 | 
					          <artifactId>maven-resources-plugin</artifactId>
 | 
				
			||||||
          <version>3.3.1</version>
 | 
					          <version>3.0.2</version>
 | 
				
			||||||
        </plugin>
 | 
					        </plugin>
 | 
				
			||||||
        <plugin>
 | 
					        <plugin>
 | 
				
			||||||
          <artifactId>maven-compiler-plugin</artifactId>
 | 
					          <artifactId>maven-compiler-plugin</artifactId>
 | 
				
			||||||
          <version>3.13.0</version>
 | 
					          <version>3.8.0</version>
 | 
				
			||||||
        </plugin>
 | 
					        </plugin>
 | 
				
			||||||
        <plugin>
 | 
					        <plugin>
 | 
				
			||||||
          <artifactId>maven-surefire-plugin</artifactId>
 | 
					          <artifactId>maven-surefire-plugin</artifactId>
 | 
				
			||||||
          <version>3.2.5</version>
 | 
					          <version>2.22.1</version>
 | 
				
			||||||
        </plugin>
 | 
					        </plugin>
 | 
				
			||||||
        <plugin>
 | 
					        <plugin>
 | 
				
			||||||
          <artifactId>maven-jar-plugin</artifactId>
 | 
					          <artifactId>maven-jar-plugin</artifactId>
 | 
				
			||||||
          <version>3.4.1</version>
 | 
					          <version>3.0.2</version>
 | 
				
			||||||
        </plugin>
 | 
					        </plugin>
 | 
				
			||||||
        <plugin>
 | 
					        <plugin>
 | 
				
			||||||
          <artifactId>maven-install-plugin</artifactId>
 | 
					          <artifactId>maven-install-plugin</artifactId>
 | 
				
			||||||
          <version>3.1.1</version>
 | 
					          <version>2.5.2</version>
 | 
				
			||||||
        </plugin>
 | 
					        </plugin>
 | 
				
			||||||
        <plugin>
 | 
					        <plugin>
 | 
				
			||||||
          <artifactId>maven-deploy-plugin</artifactId>
 | 
					          <artifactId>maven-deploy-plugin</artifactId>
 | 
				
			||||||
          <version>3.1.1</version>
 | 
					          <version>2.8.2</version>
 | 
				
			||||||
        </plugin>
 | 
					        </plugin>
 | 
				
			||||||
        <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
 | 
					        <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
 | 
				
			||||||
        <plugin>
 | 
					        <plugin>
 | 
				
			||||||
          <artifactId>maven-site-plugin</artifactId>
 | 
					          <artifactId>maven-site-plugin</artifactId>
 | 
				
			||||||
          <version>3.12.1</version>
 | 
					          <version>3.7.1</version>
 | 
				
			||||||
        </plugin>
 | 
					        </plugin>
 | 
				
			||||||
        <plugin>
 | 
					        <plugin>
 | 
				
			||||||
          <artifactId>maven-project-info-reports-plugin</artifactId>
 | 
					          <artifactId>maven-project-info-reports-plugin</artifactId>
 | 
				
			||||||
          <version>3.5.0</version>
 | 
					          <version>3.0.0</version>
 | 
				
			||||||
        </plugin>
 | 
					        </plugin>
 | 
				
			||||||
      </plugins>
 | 
					      </plugins>
 | 
				
			||||||
    </pluginManagement>
 | 
					    </pluginManagement>
 | 
				
			||||||
  </build>
 | 
					  </build>
 | 
				
			||||||
 | 
					 | 
				
			||||||
  <repositories>
 | 
					 | 
				
			||||||
    <!-- Check Maven Central first (before other repos below) -->
 | 
					 | 
				
			||||||
    <repository>
 | 
					 | 
				
			||||||
        <id>maven-central</id>
 | 
					 | 
				
			||||||
        <url>https://repo.maven.apache.org/maven2</url>
 | 
					 | 
				
			||||||
    </repository>
 | 
					 | 
				
			||||||
    <!-- For Handle Server -->
 | 
					 | 
				
			||||||
    <repository>
 | 
					 | 
				
			||||||
        <id>handle.net</id>
 | 
					 | 
				
			||||||
        <url>https://handle.net/maven</url>
 | 
					 | 
				
			||||||
    </repository>
 | 
					 | 
				
			||||||
  </repositories>
 | 
					 | 
				
			||||||
</project>
 | 
					</project>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,8 +1,20 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2020 Alan Orth
 | 
					DSpace Curation Tasks
 | 
				
			||||||
 *
 | 
					Copyright (C) 2020  Alan Orth
 | 
				
			||||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
					
 | 
				
			||||||
 */
 | 
					This program is free software: you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation, either version 3 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License
 | 
				
			||||||
 | 
					along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
package io.github.ilri.cgspace.ctasks;
 | 
					package io.github.ilri.cgspace.ctasks;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,8 +1,20 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2020 Alan Orth
 | 
					    DSpace Curation Tasks
 | 
				
			||||||
 *
 | 
					    Copyright (C) 2020  Alan Orth
 | 
				
			||||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
					
 | 
				
			||||||
 */
 | 
					    This program is free software: you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation, either version 3 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License
 | 
				
			||||||
 | 
					    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
package io.github.ilri.cgspace.ctasks;
 | 
					package io.github.ilri.cgspace.ctasks;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -10,17 +22,16 @@ import javax.annotation.Nullable;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
public class CountriesVocabulary {
 | 
					public class CountriesVocabulary {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    static class Country {
 | 
					    class Country {
 | 
				
			||||||
        private final String name; // required
 | 
					        private String name;            //required
 | 
				
			||||||
        private final String common_name; // optional
 | 
					        private String common_name;     //optional
 | 
				
			||||||
        private final String official_name; // optional
 | 
					        private String official_name;   //optional
 | 
				
			||||||
        private final String cgspace_name; // optional
 | 
					        private String cgspace_name;    //optional
 | 
				
			||||||
        private final String numeric; // required Hmmmm need to cast this...
 | 
					        private String numeric;         //required Hmmmm need to cast this...
 | 
				
			||||||
        private final String alpha_2; // required
 | 
					        private String alpha_2;         //required
 | 
				
			||||||
        private final String alpha_3; // required
 | 
					        private String alpha_3;         //required
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        public Country(
 | 
					        public Country(String name,
 | 
				
			||||||
                String name,
 | 
					 | 
				
			||||||
                       @Nullable String common_name,
 | 
					                       @Nullable String common_name,
 | 
				
			||||||
                       @Nullable String official_name,
 | 
					                       @Nullable String official_name,
 | 
				
			||||||
                       @Nullable String cgspace_name,
 | 
					                       @Nullable String cgspace_name,
 | 
				
			||||||
@@ -31,9 +42,7 @@ public class CountriesVocabulary {
 | 
				
			|||||||
            this.common_name = common_name;
 | 
					            this.common_name = common_name;
 | 
				
			||||||
            this.official_name = official_name;
 | 
					            this.official_name = official_name;
 | 
				
			||||||
            this.cgspace_name = cgspace_name;
 | 
					            this.cgspace_name = cgspace_name;
 | 
				
			||||||
            this.numeric =
 | 
					            this.numeric = numeric; // fuuuuu this is a string and we can't cast to Integer because some values are zeropadded like "004"
 | 
				
			||||||
                    numeric; // fuuuuu this is a string and we can't cast to Integer because some
 | 
					 | 
				
			||||||
                             // values are zeropadded like "004"
 | 
					 | 
				
			||||||
            this.alpha_2 = alpha_2;
 | 
					            this.alpha_2 = alpha_2;
 | 
				
			||||||
            this.alpha_3 = alpha_3;
 | 
					            this.alpha_3 = alpha_3;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,19 +1,29 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2020 Alan Orth
 | 
					    DSpace Curation Tasks
 | 
				
			||||||
 *
 | 
					    Copyright (C) 2020  Alan Orth
 | 
				
			||||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
					
 | 
				
			||||||
 */
 | 
					    This program is free software: you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation, either version 3 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License
 | 
				
			||||||
 | 
					    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
package io.github.ilri.cgspace.ctasks;
 | 
					package io.github.ilri.cgspace.ctasks;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import com.google.gson.Gson;
 | 
					import com.google.gson.Gson;
 | 
				
			||||||
 | 
					import org.apache.log4j.Logger;
 | 
				
			||||||
import org.apache.logging.log4j.LogManager;
 | 
					 | 
				
			||||||
import org.apache.logging.log4j.Logger;
 | 
					 | 
				
			||||||
import org.dspace.authorize.AuthorizeException;
 | 
					import org.dspace.authorize.AuthorizeException;
 | 
				
			||||||
import org.dspace.content.DSpaceObject;
 | 
					import org.dspace.content.DSpaceObject;
 | 
				
			||||||
import org.dspace.content.Item;
 | 
					import org.dspace.content.Item;
 | 
				
			||||||
import org.dspace.content.MetadataValue;
 | 
					import org.dspace.content.Metadatum;
 | 
				
			||||||
import org.dspace.core.Constants;
 | 
					import org.dspace.core.Constants;
 | 
				
			||||||
import org.dspace.curate.AbstractCurationTask;
 | 
					import org.dspace.curate.AbstractCurationTask;
 | 
				
			||||||
import org.dspace.curate.Curator;
 | 
					import org.dspace.curate.Curator;
 | 
				
			||||||
@@ -24,28 +34,25 @@ import java.io.InputStreamReader;
 | 
				
			|||||||
import java.sql.SQLException;
 | 
					import java.sql.SQLException;
 | 
				
			||||||
import java.util.ArrayList;
 | 
					import java.util.ArrayList;
 | 
				
			||||||
import java.util.List;
 | 
					import java.util.List;
 | 
				
			||||||
import java.util.Objects;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/**
 | 
				
			||||||
 * Add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * @author Alan Orth for the International Livestock Research Institute
 | 
					 * @author Alan Orth for the International Livestock Research Institute
 | 
				
			||||||
 * @version 7.6.1.2
 | 
					 * @version 5.1
 | 
				
			||||||
 * @since 5.1
 | 
					 * @since 1.0
 | 
				
			||||||
 */
 | 
					*/
 | 
				
			||||||
public class CountryCodeTagger extends AbstractCurationTask {
 | 
					public class CountryCodeTagger extends AbstractCurationTask
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
    public class CountryCodeTaggerConfig {
 | 
					    public class CountryCodeTaggerConfig {
 | 
				
			||||||
        private final String isocodesJsonPath = "/io/github/ilri/cgspace/ctasks/iso_3166-1.json";
 | 
					        private final String isocodesJsonPath = "/io/github/ilri/cgspace/ctasks/iso_3166-1.json";
 | 
				
			||||||
        private final String cgspaceCountriesJsonPath =
 | 
					        private final String cgspaceCountriesJsonPath = "/io/github/ilri/cgspace/ctasks/cgspace-countries.json";
 | 
				
			||||||
                "/io/github/ilri/cgspace/ctasks/cgspace-countries.json";
 | 
					 | 
				
			||||||
        private final String iso3166Field = taskProperty("iso3166.field");
 | 
					        private final String iso3166Field = taskProperty("iso3166.field");
 | 
				
			||||||
        private final String iso3166Alpha2Field = taskProperty("iso3166-alpha2.field");
 | 
					        private final String iso3166Alpha2Field = taskProperty("iso3166-alpha2.field");
 | 
				
			||||||
        private final boolean forceupdate = taskBooleanProperty("forceupdate", false);
 | 
					        private final boolean forceupdate = taskBooleanProperty("forceupdate", false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        private final Logger log = LogManager.getLogger();
 | 
					        private Logger log = Logger.getLogger(CountryCodeTagger.class);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    public static class CountryCodeTaggerResult {
 | 
					    public class CountryCodeTaggerResult {
 | 
				
			||||||
        private int status = Curator.CURATE_UNSET;
 | 
					        private int status = Curator.CURATE_UNSET;
 | 
				
			||||||
        private String result = null;
 | 
					        private String result = null;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -67,130 +74,96 @@ public class CountryCodeTagger extends AbstractCurationTask {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @Override
 | 
					    @Override
 | 
				
			||||||
    public int perform(DSpaceObject dso) throws IOException {
 | 
					    public int perform(DSpaceObject dso) throws IOException
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
        // gotta define this here so we can access it after the if context...
 | 
					        // gotta define this here so we can access it after the if context...
 | 
				
			||||||
        CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
 | 
					        CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (dso.getType() == Constants.ITEM) {
 | 
							if (dso.getType() == Constants.ITEM)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
            // Load configuration
 | 
					            // Load configuration
 | 
				
			||||||
            CountryCodeTaggerConfig config = new CountryCodeTaggerConfig();
 | 
					            CountryCodeTaggerConfig config = new CountryCodeTaggerConfig();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            Item item = (Item) dso;
 | 
					            Item item = (Item)dso;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            try {
 | 
					 | 
				
			||||||
            alpha2Result = performAlpha2(item, config);
 | 
					            alpha2Result = performAlpha2(item, config);
 | 
				
			||||||
            } catch (SQLException throwables) {
 | 
					 | 
				
			||||||
                throwables.printStackTrace();
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            setResult(alpha2Result.getResult());
 | 
					            setResult(alpha2Result.getResult());
 | 
				
			||||||
 | 
					            report(alpha2Result.getResult());
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		return alpha2Result.getStatus();
 | 
							return alpha2Result.getStatus();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config)
 | 
					    public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config) throws IOException
 | 
				
			||||||
            throws IOException, SQLException {
 | 
					    {
 | 
				
			||||||
        CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
 | 
					        CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
 | 
				
			||||||
 | 
					        String itemHandle = item.getHandle();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        List<MetadataValue> itemCountries =
 | 
					        Metadatum[] itemCountries = item.getMetadataByMetadataString(config.iso3166Field);
 | 
				
			||||||
                itemService.getMetadataByMetadataString(item, config.iso3166Field);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // skip items that don't have country metadata
 | 
					        // skip items that don't have country metadata
 | 
				
			||||||
        if (itemCountries.isEmpty()) {
 | 
					        if (itemCountries.length == 0) {
 | 
				
			||||||
            alpha2Result.setResult("No countries, skipping.");
 | 
					            alpha2Result.setResult(itemHandle + ": no countries, skipping.");
 | 
				
			||||||
            alpha2Result.setStatus(Curator.CURATE_SKIP);
 | 
					            alpha2Result.setStatus(Curator.CURATE_SKIP);
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            Gson gson = new Gson();
 | 
					            Gson gson = new Gson();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // TODO: convert to try:
 | 
					            // TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
 | 
				
			||||||
            // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
 | 
					            BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(config.isocodesJsonPath)));
 | 
				
			||||||
            BufferedReader reader =
 | 
					            ISO3166CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, ISO3166CountriesVocabulary.class);
 | 
				
			||||||
                    new BufferedReader(
 | 
					 | 
				
			||||||
                            new InputStreamReader(
 | 
					 | 
				
			||||||
                                    Objects.requireNonNull(this.getClass().getResourceAsStream(config.isocodesJsonPath))));
 | 
					 | 
				
			||||||
            ISO3166CountriesVocabulary isocodesCountriesJson =
 | 
					 | 
				
			||||||
                    gson.fromJson(reader, ISO3166CountriesVocabulary.class);
 | 
					 | 
				
			||||||
            reader.close();
 | 
					            reader.close();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            reader =
 | 
					            reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(config.cgspaceCountriesJsonPath)));
 | 
				
			||||||
                    new BufferedReader(
 | 
					            CGSpaceCountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
 | 
				
			||||||
                            new InputStreamReader(
 | 
					 | 
				
			||||||
                                    Objects.requireNonNull(this.getClass()
 | 
					 | 
				
			||||||
                                            .getResourceAsStream(config.cgspaceCountriesJsonPath))));
 | 
					 | 
				
			||||||
            CGSpaceCountriesVocabulary cgspaceCountriesJson =
 | 
					 | 
				
			||||||
                    gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
 | 
					 | 
				
			||||||
            reader.close();
 | 
					            reader.close();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // split the alpha2 country code field into schema, element, and qualifier so we can use
 | 
					            // split the alpha2 country code field into schema, element, and qualifier so we can use it with item.addMetadata()
 | 
				
			||||||
            // it with item.addMetadata()
 | 
					 | 
				
			||||||
            String[] iso3166Alpha2FieldParts = config.iso3166Alpha2Field.split("\\.");
 | 
					            String[] iso3166Alpha2FieldParts = config.iso3166Alpha2Field.split("\\.");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if (config.forceupdate) {
 | 
					            if (config.forceupdate) {
 | 
				
			||||||
                itemService.clearMetadata(
 | 
					                item.clearMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], Item.ANY);
 | 
				
			||||||
                        Curator.curationContext(),
 | 
					 | 
				
			||||||
                        item,
 | 
					 | 
				
			||||||
                        iso3166Alpha2FieldParts[0],
 | 
					 | 
				
			||||||
                        iso3166Alpha2FieldParts[1],
 | 
					 | 
				
			||||||
                        iso3166Alpha2FieldParts[2],
 | 
					 | 
				
			||||||
                        Item.ANY);
 | 
					 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // check the item's country codes, if any
 | 
					            // check the item's country codes, if any
 | 
				
			||||||
            List<MetadataValue> itemAlpha2CountryCodes =
 | 
					            Metadatum[] itemAlpha2CountryCodes = item.getMetadataByMetadataString(config.iso3166Alpha2Field);
 | 
				
			||||||
                    itemService.getMetadataByMetadataString(item, config.iso3166Alpha2Field);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if (itemAlpha2CountryCodes.isEmpty()) {
 | 
					            if (itemAlpha2CountryCodes.length == 0) {
 | 
				
			||||||
                List<String> newAlpha2Codes = new ArrayList<String>();
 | 
					                List<String> newAlpha2Codes = new ArrayList<String>();
 | 
				
			||||||
                for (MetadataValue itemCountry : itemCountries) {
 | 
					                for (Metadatum itemCountry : itemCountries) {
 | 
				
			||||||
                    // check ISO 3166-1 countries
 | 
					                    //check ISO 3166-1 countries
 | 
				
			||||||
                    for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
 | 
					                    for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
 | 
				
			||||||
                        if (itemCountry.getValue().equalsIgnoreCase(country.getName())
 | 
					                        if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) {
 | 
				
			||||||
                                || itemCountry
 | 
					 | 
				
			||||||
                                        .getValue()
 | 
					 | 
				
			||||||
                                        .equalsIgnoreCase(country.get_official_name())
 | 
					 | 
				
			||||||
                                || itemCountry
 | 
					 | 
				
			||||||
                                        .getValue()
 | 
					 | 
				
			||||||
                                        .equalsIgnoreCase(country.get_common_name())) {
 | 
					 | 
				
			||||||
                            newAlpha2Codes.add(country.getAlpha_2());
 | 
					                            newAlpha2Codes.add(country.getAlpha_2());
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    // check CGSpace countries
 | 
					                    //check CGSpace countries
 | 
				
			||||||
                    for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) {
 | 
					                    for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) {
 | 
				
			||||||
                        if (itemCountry.getValue().equalsIgnoreCase(country.getCgspace_name())) {
 | 
					                        if (itemCountry.value.equalsIgnoreCase(country.getCgspace_name())) {
 | 
				
			||||||
                            newAlpha2Codes.add(country.getAlpha_2());
 | 
					                            newAlpha2Codes.add(country.getAlpha_2());
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if (!newAlpha2Codes.isEmpty()) {
 | 
					                if (newAlpha2Codes.size() > 0) {
 | 
				
			||||||
                    try {
 | 
					                    try {
 | 
				
			||||||
                        itemService.addMetadata(
 | 
					                        // add metadata values (casting the List<String> to an array)
 | 
				
			||||||
                                Curator.curationContext(),
 | 
					                        item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", newAlpha2Codes.toArray(new String[0]));
 | 
				
			||||||
                                item,
 | 
					                        item.update();
 | 
				
			||||||
                                iso3166Alpha2FieldParts[0],
 | 
					 | 
				
			||||||
                                iso3166Alpha2FieldParts[1],
 | 
					 | 
				
			||||||
                                iso3166Alpha2FieldParts[2],
 | 
					 | 
				
			||||||
                                "en_US",
 | 
					 | 
				
			||||||
                                newAlpha2Codes);
 | 
					 | 
				
			||||||
                        itemService.update(Curator.curationContext(), item);
 | 
					 | 
				
			||||||
                    } catch (SQLException | AuthorizeException sqle) {
 | 
					                    } catch (SQLException | AuthorizeException sqle) {
 | 
				
			||||||
                        config.log.debug(sqle.getMessage());
 | 
					                        config.log.debug(sqle.getMessage());
 | 
				
			||||||
                        alpha2Result.setResult("Error");
 | 
					                        alpha2Result.setResult(itemHandle + ": error");
 | 
				
			||||||
                        alpha2Result.setStatus(Curator.CURATE_ERROR);
 | 
					                        alpha2Result.setStatus(Curator.CURATE_ERROR);
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    alpha2Result.setResult(
 | 
					                    alpha2Result.setResult(itemHandle + ": added " + newAlpha2Codes.size() + " alpha2 country code(s)");
 | 
				
			||||||
                            "Added "
 | 
					 | 
				
			||||||
                                    + newAlpha2Codes.size()
 | 
					 | 
				
			||||||
                                    + " alpha2 country code(s)");
 | 
					 | 
				
			||||||
                } else {
 | 
					                } else {
 | 
				
			||||||
                    alpha2Result.setResult("No matching countries found");
 | 
					                    alpha2Result.setResult(itemHandle + ": no matching countries found");
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                alpha2Result.setStatus(Curator.CURATE_SUCCESS);
 | 
					                alpha2Result.setStatus(Curator.CURATE_SUCCESS);
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
                alpha2Result.setResult("Item already has country codes, skipping unless forced");
 | 
					                alpha2Result.setResult(itemHandle + ": item has country codes, skipping");
 | 
				
			||||||
                alpha2Result.setStatus(Curator.CURATE_SKIP);
 | 
					                alpha2Result.setStatus(Curator.CURATE_SKIP);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,18 +1,27 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * Copyright (C) 2020 Alan Orth
 | 
					DSpace Curation Tasks
 | 
				
			||||||
 *
 | 
					Copyright (C) 2020  Alan Orth
 | 
				
			||||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
					
 | 
				
			||||||
 */
 | 
					This program is free software: you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation, either version 3 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License
 | 
				
			||||||
 | 
					along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
package io.github.ilri.cgspace.ctasks;
 | 
					package io.github.ilri.cgspace.ctasks;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import com.google.gson.annotations.SerializedName;
 | 
					import com.google.gson.annotations.SerializedName;
 | 
				
			||||||
 | 
					 | 
				
			||||||
import java.util.List;
 | 
					import java.util.List;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
public class ISO3166CountriesVocabulary extends CountriesVocabulary {
 | 
					public class ISO3166CountriesVocabulary extends CountriesVocabulary {
 | 
				
			||||||
    // support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since
 | 
					    // support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly
 | 
				
			||||||
    // our class needs to match the JSON exactly
 | 
					    @SerializedName("3166-1") List<Country> countries;
 | 
				
			||||||
    @SerializedName("3166-1")
 | 
					 | 
				
			||||||
    List<Country> countries;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -1,96 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2024 Alan Orth
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
package io.github.ilri.cgspace.ctasks;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import org.dspace.content.DSpaceObject;
 | 
					 | 
				
			||||||
import org.dspace.content.Item;
 | 
					 | 
				
			||||||
import org.dspace.content.MetadataValue;
 | 
					 | 
				
			||||||
import org.dspace.core.Constants;
 | 
					 | 
				
			||||||
import org.dspace.curate.AbstractCurationTask;
 | 
					 | 
				
			||||||
import org.dspace.curate.Curator;
 | 
					 | 
				
			||||||
import org.dspace.curate.Suspendable;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import java.io.IOException;
 | 
					 | 
				
			||||||
import java.util.List;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/**
 | 
					 | 
				
			||||||
 * Attempt to normalize DOIs by stripping whitespace, lower casing, and
 | 
					 | 
				
			||||||
 * converting to <code>https://doi.org</code> format. The reason is that DOIs are case
 | 
					 | 
				
			||||||
 * insensitive and must be unique, which we can only guarantee if they are
 | 
					 | 
				
			||||||
 * normalized to the same format.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * See: <a href="https://www.crossref.org/documentation/member-setup/constructing-your-dois/">https://www.crossref.org/documentation/member-setup/constructing-your-dois/</a>
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * TODO: set curation to failed if invalid DOI submitted (and configure to reject in workflow)
 | 
					 | 
				
			||||||
 * TODO: allow operation on communities and collections (currently only works on items)
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * @author Alan Orth for the International Livestock Research Institute
 | 
					 | 
				
			||||||
 * @version 7.6.1.2
 | 
					 | 
				
			||||||
 * @since 7.6.1.1
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
@Suspendable
 | 
					 | 
				
			||||||
public class NormalizeDOIs extends AbstractCurationTask {
 | 
					 | 
				
			||||||
    @Override
 | 
					 | 
				
			||||||
    public int perform(DSpaceObject dso) throws IOException {
 | 
					 | 
				
			||||||
        if (dso.getType() == Constants.ITEM) {
 | 
					 | 
				
			||||||
            Item item = (Item) dso;
 | 
					 | 
				
			||||||
            String result;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            // Keep track of whether we change metadata, and how many
 | 
					 | 
				
			||||||
            boolean metadataChanged = false;
 | 
					 | 
				
			||||||
            int count = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            // Hard coding the metadata field for now since I can't figure out how to read the taskProperty
 | 
					 | 
				
			||||||
            List<MetadataValue> itemDOIs = itemService.getMetadataByMetadataString(item, "cg.identifier.doi");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            // skip items that don't have DOIs
 | 
					 | 
				
			||||||
            if (itemDOIs.isEmpty()) {
 | 
					 | 
				
			||||||
                setResult("No DOIs, skipping");
 | 
					 | 
				
			||||||
                return Curator.CURATE_SKIP;
 | 
					 | 
				
			||||||
            } else {
 | 
					 | 
				
			||||||
                for (MetadataValue itemDOI : itemDOIs) {
 | 
					 | 
				
			||||||
                    String newDOI = getNormalizedDOI(itemDOI);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    // Check if the normalized DOI is different than the original
 | 
					 | 
				
			||||||
                    if (!newDOI.equals(itemDOI.getValue())) {
 | 
					 | 
				
			||||||
                        itemDOI.setValue(newDOI);
 | 
					 | 
				
			||||||
                        metadataChanged = true;
 | 
					 | 
				
			||||||
                        count++;
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            if (metadataChanged) {
 | 
					 | 
				
			||||||
                result = "Normalized " + count + " DOI(s)";
 | 
					 | 
				
			||||||
            } else {
 | 
					 | 
				
			||||||
                result = "All DOIs already normalized";
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            setResult(result);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            return Curator.CURATE_SUCCESS;
 | 
					 | 
				
			||||||
        } else {
 | 
					 | 
				
			||||||
            setResult("Object skipped");
 | 
					 | 
				
			||||||
            return Curator.CURATE_SKIP;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    private static String getNormalizedDOI(MetadataValue itemDOI) {
 | 
					 | 
				
			||||||
        // 1. Convert to lowercase
 | 
					 | 
				
			||||||
        String newDOI = itemDOI.getValue().toLowerCase();
 | 
					 | 
				
			||||||
        // 2. Strip leading and trailing whitespace
 | 
					 | 
				
			||||||
        newDOI = newDOI.strip();
 | 
					 | 
				
			||||||
        // 3. Convert to HTTPS
 | 
					 | 
				
			||||||
        newDOI = newDOI.replace("http://", "https://");
 | 
					 | 
				
			||||||
        // 4. Prefer doi.org to dx.doi.org
 | 
					 | 
				
			||||||
        newDOI = newDOI.replace("dx.doi.org", "doi.org");
 | 
					 | 
				
			||||||
        // 5. Replace values like doi: 10.11648/j.jps.20140201.14
 | 
					 | 
				
			||||||
        newDOI = newDOI.replaceAll("^doi: 10\\.", "https://doi.org/10.");
 | 
					 | 
				
			||||||
        // 6. Replace values like 10.3390/foods12010115
 | 
					 | 
				
			||||||
        newDOI = newDOI.replaceAll("^10\\.", "https://doi.org/10.");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return newDOI;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -2,9 +2,8 @@
 | 
				
			|||||||
DSpace curation tasks used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
					DSpace curation tasks used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
 | 
					- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
 | 
				
			||||||
- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
 | 
					Tested on DSpace 5.8. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Build and Install
 | 
					## Build and Install
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -15,7 +14,7 @@ To use these curation tasks in a DSpace project add the following dependency to
 | 
				
			|||||||
<dependency>
 | 
					<dependency>
 | 
				
			||||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
					  <groupId>io.github.ilri.cgspace</groupId>
 | 
				
			||||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
					  <artifactId>cgspace-java-helpers</artifactId>
 | 
				
			||||||
  <version>7.6.1.2-SNAPSHOT</version>
 | 
					  <version>5.3</version>
 | 
				
			||||||
</dependency>
 | 
					</dependency>
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -31,41 +30,42 @@ $ mvn package
 | 
				
			|||||||
Copy the resulting jar to the DSpace `lib` directory:
 | 
					Copy the resulting jar to the DSpace `lib` directory:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
$ cp target/cgspace-java-helpers-7.6.1.2-SNAPSHOT.jar ~/dspace/lib/
 | 
					$ cp target/cgspace-java-helpers-5.3.jar ~/dspace/lib
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Configuration
 | 
					## Configuration
 | 
				
			||||||
Add the curation task(s) to DSpace's `config/modules/curate.cfg`:
 | 
					Add the curation task to DSpace's `config/modules/curate.cfg`:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger
 | 
					plugin.named.org.dspace.curate.CurationTask = \
 | 
				
			||||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
 | 
					...
 | 
				
			||||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.NormalizeDOIs = normalizedois
 | 
					    io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger \
 | 
				
			||||||
 | 
					    io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
And then add the following variables to your `local.cfg` or some other [configuration file that is included](https://wiki.lyrasis.org/display/DSDOC6x/Configuration+Reference#ConfigurationReference-IncludingotherPropertyFiles):
 | 
					And then add a configuration file for the task in `config/modules/countrycodetagger.cfg`:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
# name of the field containing ISO 3166-1 country names
 | 
					# name of the field containing ISO 3166-1 country names
 | 
				
			||||||
countrycodetagger.iso3166.field = cg.coverage.country
 | 
					iso3166.field = cg.coverage.country
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# name of the field containing ISO 3166-1 Alpha2 country codes
 | 
					# name of the field containing ISO 3166-1 Alpha2 country codes
 | 
				
			||||||
countrycodetagger.iso3166-alpha2.field = cg.coverage.iso3166-alpha2
 | 
					iso3166-alpha2.field = cg.coverage.iso3166-alpha2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# only add country codes if an item doesn't have any (default false)
 | 
					# only add country codes if an item doesn't have any (default false)
 | 
				
			||||||
#countrycodetagger.forceupdate = false
 | 
					#forceupdate = false
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
*Note*: DSpace's curation system supports "profiles" where you can use the same task with different options, for example above I have a normal country code tagger task and a "force" variant. The "force" variant is the same task, but it looks for configuration variables using the `countrycodetagger.force` instead. To use the "force" variant you simply need to add these new variables with the `forceupdate` parameter overridden to the same configuration file where you put the other variables. The "force" profile clears all existing country codes and updates everything.
 | 
					*Note*: DSpace's curation system supports "profiles" where you can use the same task with different options, for example above I have a normal country code tagger and a "force" variant. To use the "force" variant you create a new configuration file with the overridden options in `config/modules/countrycodetagger.force.cfg`. The "force" profile clears all existing country codes and updates everything.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Invocation
 | 
					## Invocation
 | 
				
			||||||
Once the jar is installed and you have added appropriate configuration in `~/dspace/config/modules`:
 | 
					Once the jar is installed and you have added appropriate configuration in `~/dspace/config/modules`:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
$ ~/dspace/bin/dspace curate -t countrycodetagger -i 10568/3 -r - -s object
 | 
					$ ~/dspace/bin/dspace curate -t countrycodetagger -i 10568/3 -r - -l 500 -s object
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
*Note*: it is very important to set the database transaction scope to something sensible (`object`) if you're curating a community or collection with more than a few hundred items.
 | 
					*Note*: it is very important to set the cache limit (`-l`) and the database transaction scope to something sensible (`object`) if you're curating a community or collection with more than a few hundred items.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## TODO
 | 
					## TODO
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,46 +1,22 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2020 Alan Orth
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
package io.github.ilri.cgspace.scripts;
 | 
					package io.github.ilri.cgspace.scripts;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import org.apache.commons.lang.StringUtils;
 | 
					import org.apache.commons.lang.StringUtils;
 | 
				
			||||||
import org.dspace.authorize.AuthorizeException;
 | 
					import org.dspace.authorize.AuthorizeException;
 | 
				
			||||||
import org.dspace.content.Bitstream;
 | 
					import org.dspace.content.*;
 | 
				
			||||||
import org.dspace.content.Bundle;
 | 
					 | 
				
			||||||
import org.dspace.content.Collection;
 | 
					 | 
				
			||||||
import org.dspace.content.Community;
 | 
					 | 
				
			||||||
import org.dspace.content.DSpaceObject;
 | 
					 | 
				
			||||||
import org.dspace.content.Item;
 | 
					 | 
				
			||||||
import org.dspace.content.MetadataValue;
 | 
					 | 
				
			||||||
import org.dspace.content.factory.ContentServiceFactory;
 | 
					 | 
				
			||||||
import org.dspace.content.service.BundleService;
 | 
					 | 
				
			||||||
import org.dspace.content.service.ItemService;
 | 
					 | 
				
			||||||
import org.dspace.core.Constants;
 | 
					import org.dspace.core.Constants;
 | 
				
			||||||
import org.dspace.core.Context;
 | 
					import org.dspace.core.Context;
 | 
				
			||||||
import org.dspace.handle.factory.HandleServiceFactory;
 | 
					import org.dspace.handle.HandleManager;
 | 
				
			||||||
import org.dspace.handle.service.HandleService;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
import java.io.IOException;
 | 
					import java.io.IOException;
 | 
				
			||||||
import java.sql.SQLException;
 | 
					import java.sql.SQLException;
 | 
				
			||||||
import java.util.Iterator;
 | 
					 | 
				
			||||||
import java.util.List;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories
 | 
					 * @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories
 | 
				
			||||||
 * @author Alan Orth for the International Livestock Research Institute
 | 
					 * @author Alan Orth for the International Livestock Research Institute
 | 
				
			||||||
 * @version 6.1
 | 
					 * @version 5.4
 | 
				
			||||||
 * @since 5.1
 | 
					 * @since 5.1
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
public class FixJpgJpgThumbnails {
 | 
					public class FixJpgJpgThumbnails {
 | 
				
			||||||
    // note: static members belong to the class itself, not any one instance
 | 
					 | 
				
			||||||
    public static ItemService itemService = ContentServiceFactory.getInstance().getItemService();
 | 
					 | 
				
			||||||
    public static HandleService handleService =
 | 
					 | 
				
			||||||
            HandleServiceFactory.getInstance().getHandleService();
 | 
					 | 
				
			||||||
    public static BundleService bundleService =
 | 
					 | 
				
			||||||
            ContentServiceFactory.getInstance().getBundleService();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	public static void main(String[] args) {
 | 
						public static void main(String[] args) {
 | 
				
			||||||
		String parentHandle = null;
 | 
							String parentHandle = null;
 | 
				
			||||||
@@ -54,32 +30,25 @@ public class FixJpgJpgThumbnails {
 | 
				
			|||||||
			context.turnOffAuthorisationSystem();
 | 
								context.turnOffAuthorisationSystem();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (StringUtils.isBlank(parentHandle)) {
 | 
								if (StringUtils.isBlank(parentHandle)) {
 | 
				
			||||||
                process(context, itemService.findAll(context));
 | 
									process(context, Item.findAll(context));
 | 
				
			||||||
			} else {
 | 
								} else {
 | 
				
			||||||
                DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
 | 
									DSpaceObject parent = HandleManager.resolveToObject(context, parentHandle);
 | 
				
			||||||
				if (parent != null) {
 | 
									if (parent != null) {
 | 
				
			||||||
					switch (parent.getType()) {
 | 
										switch (parent.getType()) {
 | 
				
			||||||
                        case Constants.SITE:
 | 
											case Constants.COLLECTION:
 | 
				
			||||||
                            process(context, itemService.findAll(context));
 | 
												process(context, ((Collection) parent).getAllItems()); // getAllItems because we want to work on non-archived ones as well
 | 
				
			||||||
                            context.commit();
 | 
					 | 
				
			||||||
							break;
 | 
												break;
 | 
				
			||||||
						case Constants.COMMUNITY:
 | 
											case Constants.COMMUNITY:
 | 
				
			||||||
                            List<Collection> collections = ((Community) parent).getCollections();
 | 
												Collection[] collections = ((Community) parent).getCollections();
 | 
				
			||||||
							for (Collection collection : collections) {
 | 
												for (Collection collection : collections) {
 | 
				
			||||||
                                process(
 | 
													process(context, collection.getAllItems()); // getAllItems because we want to work on non-archived ones as well
 | 
				
			||||||
                                        context,
 | 
					 | 
				
			||||||
                                        itemService.findAllByCollection(context, collection));
 | 
					 | 
				
			||||||
							}
 | 
												}
 | 
				
			||||||
                            context.commit();
 | 
					 | 
				
			||||||
							break;
 | 
												break;
 | 
				
			||||||
                        case Constants.COLLECTION:
 | 
											case Constants.SITE:
 | 
				
			||||||
                            process(
 | 
												process(context, Item.findAll(context));
 | 
				
			||||||
                                    context,
 | 
					 | 
				
			||||||
                                    itemService.findByCollection(context, (Collection) parent));
 | 
					 | 
				
			||||||
                            context.commit();
 | 
					 | 
				
			||||||
							break;
 | 
												break;
 | 
				
			||||||
						case Constants.ITEM:
 | 
											case Constants.ITEM:
 | 
				
			||||||
                            processItem(context, (Item) parent);
 | 
												processItem((Item) parent);
 | 
				
			||||||
							context.commit();
 | 
												context.commit();
 | 
				
			||||||
							break;
 | 
												break;
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
@@ -94,77 +63,64 @@ public class FixJpgJpgThumbnails {
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    private static void process(Context context, Iterator<Item> items)
 | 
						private static void process(Context context, ItemIterator items) throws SQLException, IOException, AuthorizeException {
 | 
				
			||||||
            throws SQLException, IOException, AuthorizeException {
 | 
					 | 
				
			||||||
		while (items.hasNext()) {
 | 
							while (items.hasNext()) {
 | 
				
			||||||
			Item item = items.next();
 | 
								Item item = items.next();
 | 
				
			||||||
            processItem(context, item);
 | 
								processItem(item);
 | 
				
			||||||
            itemService.update(context, item);
 | 
								context.commit();
 | 
				
			||||||
 | 
								item.decache();
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    private static void processItem(Context context, Item item)
 | 
						private static void processItem(Item item) throws SQLException, AuthorizeException, IOException {
 | 
				
			||||||
            throws SQLException, AuthorizeException, IOException {
 | 
							// Some bitstreams like Infographics are large JPGs and put in the ORIGINAL bundle on purpose so we shouldn't
 | 
				
			||||||
        // Some bitstreams like Infographics and Maps are large JPEGs and put in the ORIGINAL bundle
 | 
					 | 
				
			||||||
        // on purpose so we shouldn't
 | 
					 | 
				
			||||||
		// swap them.
 | 
							// swap them.
 | 
				
			||||||
        List<MetadataValue> itemTypes =
 | 
							Metadatum[] itemTypes = item.getMetadataByMetadataString("dc.type");
 | 
				
			||||||
                itemService.getMetadataByMetadataString(item, "dcterms.type");
 | 
							boolean itemHasInfographic = false;
 | 
				
			||||||
        for (MetadataValue itemType : itemTypes) {
 | 
							for (Metadatum itemType: itemTypes) {
 | 
				
			||||||
            if (itemType.getValue().equals("Infographic") || itemType.getValue().equals("Map")) {
 | 
								if (itemType.value.equals("Infographic")) {
 | 
				
			||||||
                System.out.println(
 | 
									itemHasInfographic = true;
 | 
				
			||||||
                        item.getHandle() + ": item has an Infographic or Map, skipping.");
 | 
									break;
 | 
				
			||||||
                return;
 | 
					 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        List<Bundle> thumbnailBundles = item.getBundles("THUMBNAIL");
 | 
							Bundle[] thumbnailBundles = item.getBundles("THUMBNAIL");
 | 
				
			||||||
		for (Bundle thumbnailBundle : thumbnailBundles) {
 | 
							for (Bundle thumbnailBundle : thumbnailBundles) {
 | 
				
			||||||
            List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
 | 
								Bitstream[] thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
 | 
				
			||||||
			for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
								for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
				
			||||||
				String thumbnailName = thumbnailBitstream.getName();
 | 
									String thumbnailName = thumbnailBitstream.getName();
 | 
				
			||||||
                String thumbnailDescription = thumbnailBitstream.getDescription();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                // There is no point continuing if the thumbnail's description is empty or null
 | 
					 | 
				
			||||||
                if (StringUtils.isEmpty(thumbnailDescription)) {
 | 
					 | 
				
			||||||
                    continue;
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
				if (thumbnailName.toLowerCase().contains(".jpg.jpg")) {
 | 
									if (thumbnailName.toLowerCase().contains(".jpg.jpg")) {
 | 
				
			||||||
                    List<Bundle> originalBundles = item.getBundles("ORIGINAL");
 | 
										Bundle[] originalBundles = item.getBundles("ORIGINAL");
 | 
				
			||||||
					for (Bundle originalBundle : originalBundles) {
 | 
										for (Bundle originalBundle : originalBundles) {
 | 
				
			||||||
                        List<Bitstream> originalBundleBitstreams = originalBundle.getBitstreams();
 | 
											Bitstream[] originalBundleBitstreams = originalBundle.getBitstreams();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
						for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
											for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
				
			||||||
							String originalName = originalBitstream.getName();
 | 
												String originalName = originalBitstream.getName();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                            long originalBitstreamBytes = originalBitstream.getSizeBytes();
 | 
												long originalBitstreamBytes = originalBitstream.getSize();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							/*
 | 
												/*
 | 
				
			||||||
							- check if the original file name is the same as the thumbnail name minus the extra ".jpg"
 | 
												- check if the original file name is the same as the thumbnail name minus the extra ".jpg"
 | 
				
			||||||
							- check if the thumbnail description indicates it was automatically generated
 | 
												- check if the thumbnail description indicates it was automatically generated
 | 
				
			||||||
 | 
												- check if the item has dc.type Infographic (JPG could be the "real" item!)
 | 
				
			||||||
							- check if the original bitstream is less than ~100KiB
 | 
												- check if the original bitstream is less than ~100KiB
 | 
				
			||||||
							    - Note: in my tests there were 4022 items with ".jpg.jpg" thumbnails totaling 394549249
 | 
												    - Note: in my tests there were 4022 items with ".jpg.jpg" thumbnails totaling 394549249
 | 
				
			||||||
							      bytes for an average of about 98KiB so ~100KiB seems like a good cut off
 | 
												      bytes for an average of about 98KiB so ~100KiB seems like a good cut off
 | 
				
			||||||
							*/
 | 
												*/
 | 
				
			||||||
                            if (originalName.equalsIgnoreCase(
 | 
												if (
 | 
				
			||||||
                                            StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg"))
 | 
														originalName.equalsIgnoreCase(StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg"))
 | 
				
			||||||
                                    && ("Generated Thumbnail".equals(thumbnailDescription)
 | 
														&& ("Generated Thumbnail".equals(thumbnailBitstream.getDescription()) || "IM Thumbnail".equals(thumbnailBitstream.getDescription()))
 | 
				
			||||||
                                            || "IM Thumbnail".equals(thumbnailDescription))
 | 
														&& !itemHasInfographic
 | 
				
			||||||
                                    && originalBitstreamBytes < 100000) {
 | 
														&& originalBitstreamBytes < 100000
 | 
				
			||||||
                                System.out.println(
 | 
												) {
 | 
				
			||||||
                                        item.getHandle()
 | 
													System.out.println(item.getHandle() + ": replacing " + thumbnailName + " with " + originalName);
 | 
				
			||||||
                                                + ": replacing "
 | 
					 | 
				
			||||||
                                                + thumbnailName
 | 
					 | 
				
			||||||
                                                + " with "
 | 
					 | 
				
			||||||
                                                + originalName);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                                // add the original bitstream to the THUMBNAIL bundle
 | 
													//add the original bitstream to the THUMBNAIL bundle
 | 
				
			||||||
                                bundleService.addBitstream(
 | 
													thumbnailBundle.addBitstream(originalBitstream);
 | 
				
			||||||
                                        context, thumbnailBundle, originalBitstream);
 | 
													//remove the original bitstream from the ORIGINAL bundle
 | 
				
			||||||
                                // remove the original bitstream from the ORIGINAL bundle
 | 
					 | 
				
			||||||
								originalBundle.removeBitstream(originalBitstream);
 | 
													originalBundle.removeBitstream(originalBitstream);
 | 
				
			||||||
                                // remove the JpgJpg bitstream from the THUMBNAIL bundle
 | 
													//remove the JpgJpg bitstream from the THUMBNAIL bundle
 | 
				
			||||||
								thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
													thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
				
			||||||
							}
 | 
												}
 | 
				
			||||||
						}
 | 
											}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,280 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
 * Copyright (C) 2022 Alan Orth
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
package io.github.ilri.cgspace.scripts;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import org.apache.commons.lang.StringUtils;
 | 
					 | 
				
			||||||
import org.dspace.authorize.AuthorizeException;
 | 
					 | 
				
			||||||
import org.dspace.content.Bitstream;
 | 
					 | 
				
			||||||
import org.dspace.content.Bundle;
 | 
					 | 
				
			||||||
import org.dspace.content.Collection;
 | 
					 | 
				
			||||||
import org.dspace.content.Community;
 | 
					 | 
				
			||||||
import org.dspace.content.DSpaceObject;
 | 
					 | 
				
			||||||
import org.dspace.content.Item;
 | 
					 | 
				
			||||||
import org.dspace.content.factory.ContentServiceFactory;
 | 
					 | 
				
			||||||
import org.dspace.content.service.BundleService;
 | 
					 | 
				
			||||||
import org.dspace.content.service.ItemService;
 | 
					 | 
				
			||||||
import org.dspace.core.Constants;
 | 
					 | 
				
			||||||
import org.dspace.core.Context;
 | 
					 | 
				
			||||||
import org.dspace.handle.factory.HandleServiceFactory;
 | 
					 | 
				
			||||||
import org.dspace.handle.service.HandleService;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import java.io.IOException;
 | 
					 | 
				
			||||||
import java.sql.SQLException;
 | 
					 | 
				
			||||||
import java.util.Iterator;
 | 
					 | 
				
			||||||
import java.util.List;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/**
 | 
					 | 
				
			||||||
 * Fix low-quality thumbnails in a DSpace repository.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * <p>Search the DSpace repository for items containing bitstreams matching the following criteria:
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * <ul>
 | 
					 | 
				
			||||||
 *   <li>If an item has an <code>IM Thumbnail</code> and a <code>Generated Thumbnail</code> in the
 | 
					 | 
				
			||||||
 *       <code>THUMBNAIL</code> bundle, remove the <code>Generated Thumbnail</code>.
 | 
					 | 
				
			||||||
 *   <li>If an item has a PDF bitstream and a JPEG bitstream with description "thumbnail" in the
 | 
					 | 
				
			||||||
 *       <code>ORIGINAL</code> bundle, remove the "thumbnail" bitstream in the ORIGINAL bundle.
 | 
					 | 
				
			||||||
 * </ul>
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * <p>The general idea is that we should always prefer thumbnails generated from PDFs by ImageMagick
 | 
					 | 
				
			||||||
 * to manually uploaded JPEGs because ImageMagick Thumbnails can be regenerated with higher quality,
 | 
					 | 
				
			||||||
 * resolution, etc. Furthermore, if there are JPEG bitstreams in the ORIGINAL bundle DSpace will
 | 
					 | 
				
			||||||
 * automatically create ".jpg.jpg" thumbnails from them in the THUMBNAIL bundle so we should remove
 | 
					 | 
				
			||||||
 * those as well!
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories
 | 
					 | 
				
			||||||
 * @author Alan Orth for the International Livestock Research Institute
 | 
					 | 
				
			||||||
 * @version 6.1
 | 
					 | 
				
			||||||
 * @since 6.1
 | 
					 | 
				
			||||||
 * @see FixJpgJpgThumbnails
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
public class FixLowQualityThumbnails {
 | 
					 | 
				
			||||||
    // note: static members belong to the class itself, not any one instance
 | 
					 | 
				
			||||||
    public static ItemService itemService = ContentServiceFactory.getInstance().getItemService();
 | 
					 | 
				
			||||||
    public static HandleService handleService =
 | 
					 | 
				
			||||||
            HandleServiceFactory.getInstance().getHandleService();
 | 
					 | 
				
			||||||
    public static BundleService bundleService =
 | 
					 | 
				
			||||||
            ContentServiceFactory.getInstance().getBundleService();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    public static void main(String[] args) {
 | 
					 | 
				
			||||||
        String parentHandle = null;
 | 
					 | 
				
			||||||
        if (args.length >= 1) {
 | 
					 | 
				
			||||||
            parentHandle = args[0];
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Context context = null;
 | 
					 | 
				
			||||||
        try {
 | 
					 | 
				
			||||||
            context = new Context();
 | 
					 | 
				
			||||||
            context.turnOffAuthorisationSystem();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            if (StringUtils.isBlank(parentHandle)) {
 | 
					 | 
				
			||||||
                process(context, itemService.findAll(context));
 | 
					 | 
				
			||||||
            } else {
 | 
					 | 
				
			||||||
                DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
 | 
					 | 
				
			||||||
                if (parent != null) {
 | 
					 | 
				
			||||||
                    switch (parent.getType()) {
 | 
					 | 
				
			||||||
                        case Constants.SITE:
 | 
					 | 
				
			||||||
                            process(context, itemService.findAll(context));
 | 
					 | 
				
			||||||
                            context.commit();
 | 
					 | 
				
			||||||
                            break;
 | 
					 | 
				
			||||||
                        case Constants.COMMUNITY:
 | 
					 | 
				
			||||||
                            List<Collection> collections = ((Community) parent).getCollections();
 | 
					 | 
				
			||||||
                            for (Collection collection : collections) {
 | 
					 | 
				
			||||||
                                process(
 | 
					 | 
				
			||||||
                                        context,
 | 
					 | 
				
			||||||
                                        itemService.findAllByCollection(context, collection));
 | 
					 | 
				
			||||||
                            }
 | 
					 | 
				
			||||||
                            context.commit();
 | 
					 | 
				
			||||||
                            break;
 | 
					 | 
				
			||||||
                        case Constants.COLLECTION:
 | 
					 | 
				
			||||||
                            process(
 | 
					 | 
				
			||||||
                                    context,
 | 
					 | 
				
			||||||
                                    itemService.findByCollection(context, (Collection) parent));
 | 
					 | 
				
			||||||
                            context.commit();
 | 
					 | 
				
			||||||
                            break;
 | 
					 | 
				
			||||||
                        case Constants.ITEM:
 | 
					 | 
				
			||||||
                            processItem(context, (Item) parent);
 | 
					 | 
				
			||||||
                            context.commit();
 | 
					 | 
				
			||||||
                            break;
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        } catch (SQLException | AuthorizeException | IOException e) {
 | 
					 | 
				
			||||||
            e.printStackTrace(System.err);
 | 
					 | 
				
			||||||
        } finally {
 | 
					 | 
				
			||||||
            if (context != null && context.isValid()) {
 | 
					 | 
				
			||||||
                context.abort();
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    private static void process(Context context, Iterator<Item> items)
 | 
					 | 
				
			||||||
            throws SQLException, IOException, AuthorizeException {
 | 
					 | 
				
			||||||
        while (items.hasNext()) {
 | 
					 | 
				
			||||||
            Item item = items.next();
 | 
					 | 
				
			||||||
            processItem(context, item);
 | 
					 | 
				
			||||||
            itemService.update(context, item);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    private static void processItem(Context context, Item item)
 | 
					 | 
				
			||||||
            throws SQLException, AuthorizeException, IOException {
 | 
					 | 
				
			||||||
        System.out.println("FixLowQualityThumbnails: processing item: " + item.getHandle());
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        // Set some state for the item before we iterate over the THUMBNAIL bundle
 | 
					 | 
				
			||||||
        boolean itemHasImThumbnail = false;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        // Iterate over the THUMBNAIL bundle to first identify if this item has an "IM Thumbnail"
 | 
					 | 
				
			||||||
        List<Bundle> thumbnailBundles = item.getBundles("THUMBNAIL");
 | 
					 | 
				
			||||||
        for (Bundle thumbnailBundle : thumbnailBundles) {
 | 
					 | 
				
			||||||
            List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
 | 
					 | 
				
			||||||
            for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
					 | 
				
			||||||
                String thumbnailDescription = thumbnailBitstream.getDescription();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                if (StringUtils.isEmpty(thumbnailDescription)) {
 | 
					 | 
				
			||||||
                    continue;
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                // Check if this item has a bitstream in the THUMBNAIL bundle with description "IM
 | 
					 | 
				
			||||||
                // Thumbnail", but only if we haven't already seen one in another iteration for this
 | 
					 | 
				
			||||||
                // bundle.
 | 
					 | 
				
			||||||
                if (!itemHasImThumbnail && "IM Thumbnail".equals(thumbnailDescription)) {
 | 
					 | 
				
			||||||
                    itemHasImThumbnail = true;
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            // If this item has an IM Thumbnail we can be reasonably sure that there is a PDF
 | 
					 | 
				
			||||||
            // in the ORIGINAL bundle and we don't need any other thumbnails.
 | 
					 | 
				
			||||||
            if (itemHasImThumbnail) {
 | 
					 | 
				
			||||||
                // Iterate over the bitstreams in the THUMBNAIL bundle again.
 | 
					 | 
				
			||||||
                for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
					 | 
				
			||||||
                    String thumbnailName = thumbnailBitstream.getName();
 | 
					 | 
				
			||||||
                    String thumbnailDescription = thumbnailBitstream.getDescription();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    if (StringUtils.isEmpty(thumbnailDescription)) {
 | 
					 | 
				
			||||||
                        continue;
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    // If this item has a "Generated Thumbnail" we can remove it, because those
 | 
					 | 
				
			||||||
                    // typically come from other JPEGs in the ORIGINAL bundle and we would prefer
 | 
					 | 
				
			||||||
                    // the IM Thumbnail generated from a PDF anyway. The DSpace-generated descri-
 | 
					 | 
				
			||||||
                    // ption will *always* be "Generated Thumbnail".
 | 
					 | 
				
			||||||
                    if ("Generated Thumbnail".equals(thumbnailDescription)) {
 | 
					 | 
				
			||||||
                        System.out.print("\u001b[33m");
 | 
					 | 
				
			||||||
                        System.out.println("> Action: remove old thumbnail from THUMBNAIL bundle");
 | 
					 | 
				
			||||||
                        System.out.println("> Name: »" + thumbnailName + "«");
 | 
					 | 
				
			||||||
                        System.out.println("> Description: »" + thumbnailDescription + "«");
 | 
					 | 
				
			||||||
                        System.out.print("\u001b[0m");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                        // Remove the "Generated Thumbnail" bitstream from the THUMBNAIL bundle
 | 
					 | 
				
			||||||
                        thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                        // If this item has a bitstream with the word "thumbnail" in it then we can
 | 
					 | 
				
			||||||
                        // remove it because we already know this item has an IM Thumbnail and we
 | 
					 | 
				
			||||||
                        // prefer that one.
 | 
					 | 
				
			||||||
                    } else if (thumbnailDescription.toLowerCase().contains("thumbnail")
 | 
					 | 
				
			||||||
                            && !"IM Thumbnail".equals(thumbnailDescription)) {
 | 
					 | 
				
			||||||
                        System.out.print("\u001b[33m");
 | 
					 | 
				
			||||||
                        System.out.println("> Action: remove manually uploaded thumbnail from THUMBNAIL bundle");
 | 
					 | 
				
			||||||
                        System.out.println("> Name: »" + thumbnailName + "«");
 | 
					 | 
				
			||||||
                        System.out.println("> Description: »" + thumbnailDescription + "«");
 | 
					 | 
				
			||||||
                        System.out.print("\u001b[0m");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                        // Remove the "thumbnail" bitstream from the THUMBNAIL bundle
 | 
					 | 
				
			||||||
                        thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                        // Otherwise skip it because it might be something uploaded manually, like
 | 
					 | 
				
			||||||
                        // a thumbnail for a journal or a limited access item.
 | 
					 | 
				
			||||||
                    } else {
 | 
					 | 
				
			||||||
                        System.out.print("\u001b[34m");
 | 
					 | 
				
			||||||
                        System.out.println("> Action: skip other thumbnail in THUMBNAIL bundle");
 | 
					 | 
				
			||||||
                        System.out.println("> Name: »" + thumbnailName + "«");
 | 
					 | 
				
			||||||
                        System.out.println("> Description: »" + thumbnailDescription + "«");
 | 
					 | 
				
			||||||
                        System.out.print("\u001b[0m");
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    // Print a blank line
 | 
					 | 
				
			||||||
                    System.out.println();
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        // Set some state before we iterate over the ORIGINAL bundle
 | 
					 | 
				
			||||||
        boolean itemHasOriginalPdfBitstream = false;
 | 
					 | 
				
			||||||
        boolean itemHasOriginalJpegBitstream = false;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        // Iterate over the ORIGINAL bundle to delete manually-uploaded JPEG
 | 
					 | 
				
			||||||
        // bitstreams labeled "Thumbnail" whenever we have a PDF because they
 | 
					 | 
				
			||||||
        // don't belong in the ORIGINAL bundle and DSpace will automatically
 | 
					 | 
				
			||||||
        // create a better thumbnail from the PDF anyway.
 | 
					 | 
				
			||||||
        List<Bundle> originalBundles = item.getBundles("ORIGINAL");
 | 
					 | 
				
			||||||
        for (Bundle originalBundle : originalBundles) {
 | 
					 | 
				
			||||||
            List<Bitstream> originalBundleBitstreams = originalBundle.getBitstreams();
 | 
					 | 
				
			||||||
            for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
					 | 
				
			||||||
                String originalFormat = originalBitstream.getFormat(context).getMIMEType();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                // Check if this item has a PDF bitstream in the ORIGINAL bundle,
 | 
					 | 
				
			||||||
                // but only if we haven't already seen one in another iteration
 | 
					 | 
				
			||||||
                // for this bundle. DSpace will return "format application/pdf"
 | 
					 | 
				
			||||||
                // for the MIME type.
 | 
					 | 
				
			||||||
                if (!itemHasOriginalPdfBitstream && originalFormat.contains("application/pdf")) {
 | 
					 | 
				
			||||||
                    itemHasOriginalPdfBitstream = true;
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                // Check if this item has a JPEG bitstream in the ORIGINAL bundle,
 | 
					 | 
				
			||||||
                // but only if we haven't already seen one in another iteration
 | 
					 | 
				
			||||||
                // for this bundle. DSpace will return "format image/jpeg" for
 | 
					 | 
				
			||||||
                // the MIME type.
 | 
					 | 
				
			||||||
                if (!itemHasOriginalJpegBitstream && originalFormat.contains("image/jpeg")) {
 | 
					 | 
				
			||||||
                    itemHasOriginalJpegBitstream = true;
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            // Check if we found a PDF *and* a JPEG in this item's ORIGINAL
 | 
					 | 
				
			||||||
            // bundle.
 | 
					 | 
				
			||||||
            if (itemHasOriginalPdfBitstream && itemHasOriginalJpegBitstream) {
 | 
					 | 
				
			||||||
                // Yes! Now iterate over the bitstreams in the ORIGINAL bundle
 | 
					 | 
				
			||||||
                // again to see if the JPEG is a manually uploaded "Thumbnail"
 | 
					 | 
				
			||||||
                for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
					 | 
				
			||||||
                    String originalName = originalBitstream.getName();
 | 
					 | 
				
			||||||
                    String originalDescription = originalBitstream.getDescription();
 | 
					 | 
				
			||||||
                    String originalFormat = originalBitstream.getFormat(context).getMIMEType();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    if (StringUtils.isEmpty(originalDescription)) {
 | 
					 | 
				
			||||||
                        continue;
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    /*
 | 
					 | 
				
			||||||
                    - check if the bitstream is a JPEG based on its MIME Type
 | 
					 | 
				
			||||||
                    - check if the bitstream's name or description is "Thumbnail"
 | 
					 | 
				
			||||||
                    */
 | 
					 | 
				
			||||||
                    if (originalFormat.toLowerCase().contains("image/jpeg")
 | 
					 | 
				
			||||||
                            && (originalName.toLowerCase().contains("thumbnail")
 | 
					 | 
				
			||||||
                                    || originalDescription.toLowerCase().contains("thumbnail"))) {
 | 
					 | 
				
			||||||
                        System.out.print("\u001b[33m");
 | 
					 | 
				
			||||||
                        System.out.println("> Action: remove thumbnail from ORIGINAL bundle");
 | 
					 | 
				
			||||||
                        System.out.println("> Name: »" + originalName + "«");
 | 
					 | 
				
			||||||
                        System.out.println("> Description: »" + originalDescription + "«");
 | 
					 | 
				
			||||||
                        System.out.print("\u001b[0m");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                        // Remove the original bitstream from the ORIGINAL bundle
 | 
					 | 
				
			||||||
                        originalBundle.removeBitstream(originalBitstream);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    } else {
 | 
					 | 
				
			||||||
                        System.out.print("\u001b[34m");
 | 
					 | 
				
			||||||
                        System.out.println("> Action: skip other bitstream in ORIGINAL bundle");
 | 
					 | 
				
			||||||
                        System.out.println("> Name: »" + originalName + "«");
 | 
					 | 
				
			||||||
                        System.out.println("> Description: »" + originalDescription + "«");
 | 
					 | 
				
			||||||
                        System.out.print("\u001b[0m");
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    // Print a blank line
 | 
					 | 
				
			||||||
                    System.out.println();
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,21 +1,20 @@
 | 
				
			|||||||
# Scripts
 | 
					# Scripts
 | 
				
			||||||
Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
					Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
 | 
					- **FixJpgJpgThumbnails**: Fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
 | 
				
			||||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Tested on DSpace 7.6. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC6x/Curation+System).
 | 
					Tested on DSpace 5.8. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Build and Install
 | 
					## Build and Install
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Integrate into DSpace Build
 | 
					### Integrate into DSpace Build
 | 
				
			||||||
To use these curation tasks in a DSpace project add the following dependency to `dspace/modules/additions/pom.xml`:
 | 
					To use these curation tasks in a DSpace project add the following dependency to `dspace/modules/additions/pom.xml`:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```xml
 | 
					```
 | 
				
			||||||
<dependency>
 | 
					<dependency>
 | 
				
			||||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
					  <groupId>io.github.ilri.cgspace</groupId>
 | 
				
			||||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
					  <artifactId>cgspace-java-helpers</artifactId>
 | 
				
			||||||
  <version>7.6.1.2-SNAPSHOT</version>
 | 
					  <version>5.3</version>
 | 
				
			||||||
</dependency>
 | 
					</dependency>
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -24,19 +23,19 @@ The jar will be copied to all DSpace applications.
 | 
				
			|||||||
### Manual Build and Install
 | 
					### Manual Build and Install
 | 
				
			||||||
To build the standalone jar:
 | 
					To build the standalone jar:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```console
 | 
					```
 | 
				
			||||||
$ mvn package
 | 
					$ mvn package
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Copy the resulting jar to the DSpace `lib` directory:
 | 
					Copy the resulting jar to the DSpace `lib` directory:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```console
 | 
					```
 | 
				
			||||||
$ cp target/cgspace-java-helpers-7.6.1.2-SNAPSHOT.jar ~/dspace/lib/
 | 
					$ cp target/cgspace-java-helpers-5.3.jar ~/dspace/lib
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Invocation
 | 
					### Invocation
 | 
				
			||||||
The scripts take only one argument, which is a community, collection, or item:
 | 
					The script only takes one argument, which is a community, collection, or item:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```console
 | 
					```
 | 
				
			||||||
$ dspace dsrun io.github.ilri.cgspace.scripts.FixJpgJpgThumbnails 10568/83389
 | 
					$ dspace dsrun io.github.ilri.cgspace.scripts.FixJpgJpgThumbnails 10568/83389
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -16,15 +16,29 @@
 | 
				
			|||||||
      "name": "Congo, The Democratic Republic of the",
 | 
					      "name": "Congo, The Democratic Republic of the",
 | 
				
			||||||
      "numeric": "180"
 | 
					      "numeric": "180"
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      "alpha_2": "IR",
 | 
				
			||||||
 | 
					      "alpha_3": "IRN",
 | 
				
			||||||
 | 
					      "name": "Iran, Islamic Republic of",
 | 
				
			||||||
 | 
					      "cgspace_name": "Iran",
 | 
				
			||||||
 | 
					      "numeric": "364",
 | 
				
			||||||
 | 
					      "official_name": "Islamic Republic of Iran"
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      "alpha_2": "KP",
 | 
					      "alpha_2": "KP",
 | 
				
			||||||
      "alpha_3": "PRK",
 | 
					      "alpha_3": "PRK",
 | 
				
			||||||
      "common_name": "North Korea",
 | 
					 | 
				
			||||||
      "name": "Korea, Democratic People's Republic of",
 | 
					      "name": "Korea, Democratic People's Republic of",
 | 
				
			||||||
      "cgspace_name": "Korea, DPR",
 | 
					      "cgspace_name": "Korea, DPR",
 | 
				
			||||||
      "numeric": "408",
 | 
					      "numeric": "408",
 | 
				
			||||||
      "official_name": "Democratic People's Republic of Korea"
 | 
					      "official_name": "Democratic People's Republic of Korea"
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      "alpha_2": "LA",
 | 
				
			||||||
 | 
					      "alpha_3": "LAO",
 | 
				
			||||||
 | 
					      "name": "Lao People's Democratic Republic",
 | 
				
			||||||
 | 
					      "cgspace_name": "Laos",
 | 
				
			||||||
 | 
					      "numeric": "418"
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      "alpha_2": "FM",
 | 
					      "alpha_2": "FM",
 | 
				
			||||||
      "alpha_3": "FSM",
 | 
					      "alpha_3": "FSM",
 | 
				
			||||||
@@ -39,6 +53,13 @@
 | 
				
			|||||||
      "name": "Russian Federation",
 | 
					      "name": "Russian Federation",
 | 
				
			||||||
      "cgspace_name": "Russia",
 | 
					      "cgspace_name": "Russia",
 | 
				
			||||||
      "numeric": "643"
 | 
					      "numeric": "643"
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      "alpha_2": "SY",
 | 
				
			||||||
 | 
					      "alpha_3": "SYR",
 | 
				
			||||||
 | 
					      "name": "Syrian Arab Republic",
 | 
				
			||||||
 | 
					      "cgspace_name": "Syria",
 | 
				
			||||||
 | 
					      "numeric": "760"
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  ]
 | 
					  ]
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user