mirror of
				https://github.com/ilri/cgspace-java-helpers.git
				synced 2025-11-03 14:19:12 +01:00 
			
		
		
		
	Compare commits
	
		
			113 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						
						
							
						
						13c6612c7f
	
				 | 
					
					
						|||
| 
						
						
							
						
						813517c789
	
				 | 
					
					
						|||
| 
						
						
							
						
						5f9490e4e5
	
				 | 
					
					
						|||
| 
						
						
							
						
						9a46416331
	
				 | 
					
					
						|||
| 
						
						
							
						
						2be5c62d92
	
				 | 
					
					
						|||
| 
						
						
							
						
						2bd7d5e679
	
				 | 
					
					
						|||
| 
						
						
							
						
						70cf68b8bc
	
				 | 
					
					
						|||
| 
						
						
							
						
						4f81e1e17e
	
				 | 
					
					
						|||
| 
						
						
							
						
						5113a91257
	
				 | 
					
					
						|||
| 
						
						
							
						
						3c36452891
	
				 | 
					
					
						|||
| 
						
						
							
						
						3a860dabe4
	
				 | 
					
					
						|||
| 
						
						
							
						
						5f44c9ea8a
	
				 | 
					
					
						|||
| 
						
						
							
						
						32a14c0ea5
	
				 | 
					
					
						|||
| 
						
						
							
						
						13d3dfb885
	
				 | 
					
					
						|||
| 
						
						
							
						
						1e7df1ce46
	
				 | 
					
					
						|||
| 
						
						
							
						
						443e5576ab
	
				 | 
					
					
						|||
| 
						
						
							
						
						8531992412
	
				 | 
					
					
						|||
| 
						
						
							
						
						27016f5f77
	
				 | 
					
					
						|||
| 
						
						
							
						
						3a583c4f86
	
				 | 
					
					
						|||
| 
						
						
							
						
						28668f76c9
	
				 | 
					
					
						|||
| 
						
						
							
						
						e0153fd38a
	
				 | 
					
					
						|||
| 
						
						
							
						
						12a606ac61
	
				 | 
					
					
						|||
| 
						
						
							
						
						692a62b454
	
				 | 
					
					
						|||
| 
						
						
							
						
						d4ca92066a
	
				 | 
					
					
						|||
| 
						
						
							
						
						5ad8c556e9
	
				 | 
					
					
						|||
| 
						
						
							
						
						77425c13bf
	
				 | 
					
					
						|||
| 
						
						
							
						
						5e0a456fb5
	
				 | 
					
					
						|||
| 
						
						
							
						
						9050caf37f
	
				 | 
					
					
						|||
| 
						
						
							
						
						639148dc19
	
				 | 
					
					
						|||
| 
						
						
							
						
						369f81d181
	
				 | 
					
					
						|||
| 
						
						
							
						
						7a91305742
	
				 | 
					
					
						|||
| 
						
						
							
						
						b15dd50c16
	
				 | 
					
					
						|||
| 
						
						
							
						
						0c35e81362
	
				 | 
					
					
						|||
| 
						
						
							
						
						2fb8d274c9
	
				 | 
					
					
						|||
| 
						
						
							
						
						169b063e9a
	
				 | 
					
					
						|||
| 
						
						
							
						
						0cb533b2c4
	
				 | 
					
					
						|||
| 
						
						
							
						
						ee6518035e
	
				 | 
					
					
						|||
| 
						
						
							
						
						14051984f3
	
				 | 
					
					
						|||
| 
						
						
							
						
						9faf657c59
	
				 | 
					
					
						|||
| 
						
						
							
						
						7fb78c2722
	
				 | 
					
					
						|||
| 
						
						
							
						
						6ef9f521bf
	
				 | 
					
					
						|||
| 
						
						
							
						
						1a345de36a
	
				 | 
					
					
						|||
| 
						
						
							
						
						eb66ccbd0d
	
				 | 
					
					
						|||
| 
						
						
							
						
						62138540ae
	
				 | 
					
					
						|||
| 
						
						
							
						
						c0d0e40321
	
				 | 
					
					
						|||
| 
						
						
							
						
						f2a637f0a8
	
				 | 
					
					
						|||
| 
						
						
							
						
						6e38a2f7e1
	
				 | 
					
					
						|||
| 
						
						
							
						
						f9d7e5f6a2
	
				 | 
					
					
						|||
| 
						
						
							
						
						9e965afdb7
	
				 | 
					
					
						|||
| 
						
						
							
						
						408a0e1c19
	
				 | 
					
					
						|||
| 
						
						
							
						
						ea9f669e9c
	
				 | 
					
					
						|||
| 
						
						
							
						
						546101bc92
	
				 | 
					
					
						|||
| 
						
						
							
						
						0a7cf7bf59
	
				 | 
					
					
						|||
| 
						
						
							
						
						8c0a8fbcd1
	
				 | 
					
					
						|||
| 
						
						
							
						
						c05a2e4f96
	
				 | 
					
					
						|||
| 
						
						
							
						
						cf2af393c0
	
				 | 
					
					
						|||
| 
						
						
							
						
						1f6ba4af67
	
				 | 
					
					
						|||
| 
						
						
							
						
						5ceaebaeae
	
				 | 
					
					
						|||
| 
						
						
							
						
						f3dcc6e261
	
				 | 
					
					
						|||
| 
						
						
							
						
						3eddbc3e22
	
				 | 
					
					
						|||
| 
						
						
							
						
						dbf59f784c
	
				 | 
					
					
						|||
| 
						
						
							
						
						0ffa4c8d37
	
				 | 
					
					
						|||
| 
						
						
							
						
						970d0c074e
	
				 | 
					
					
						|||
| 
						
						
							
						
						6b2b899957
	
				 | 
					
					
						|||
| 
						
						
							
						
						dfaa234a90
	
				 | 
					
					
						|||
| 
						
						
							
						
						f46e81b8cd
	
				 | 
					
					
						|||
| 
						
						
							
						
						dbd8721579
	
				 | 
					
					
						|||
| 
						
						
							
						
						a234b39064
	
				 | 
					
					
						|||
| 
						
						
							
						
						80a336f94d
	
				 | 
					
					
						|||
| 
						
						
							
						
						5ebf4930cf
	
				 | 
					
					
						|||
| 
						
						
							
						
						8e01595cc1
	
				 | 
					
					
						|||
| 
						
						
							
						
						8b3aac610d
	
				 | 
					
					
						|||
| 
						
						
							
						
						c2d7535d01
	
				 | 
					
					
						|||
| 
						
						
							
						
						b396fba043
	
				 | 
					
					
						|||
| 
						
						
							
						
						38a9cc5188
	
				 | 
					
					
						|||
| 
						
						
							
						
						16db38967b
	
				 | 
					
					
						|||
| 
						
						
							
						
						2604dc3cce
	
				 | 
					
					
						|||
| 
						
						
							
						
						f0754ab419
	
				 | 
					
					
						|||
| 
						
						
							
						
						6772145bec
	
				 | 
					
					
						|||
| 
						
						
							
						
						b31557aa05
	
				 | 
					
					
						|||
| 
						
						
							
						
						095f843067
	
				 | 
					
					
						|||
| 
						
						
							
						
						f7fda9922f
	
				 | 
					
					
						|||
| 
						
						
							
						
						83a416afaf
	
				 | 
					
					
						|||
| 
						
						
							
						
						922e3892a7
	
				 | 
					
					
						|||
| 
						
						
							
						
						6b648c2c85
	
				 | 
					
					
						|||
| 
						
						
							
						
						781ddcd931
	
				 | 
					
					
						|||
| 
						
						
							
						
						49cb8e3468
	
				 | 
					
					
						|||
| 
						
						
							
						
						3aa1503163
	
				 | 
					
					
						|||
| 
						
						
							
						
						26597e2f8f
	
				 | 
					
					
						|||
| 
						
						
							
						
						1497ebb476
	
				 | 
					
					
						|||
| 
						
						
							
						
						b2027e3e44
	
				 | 
					
					
						|||
| 
						
						
							
						
						26eaa2d94f
	
				 | 
					
					
						|||
| e3b95f6a30 | |||
| 
						
						
							
						
						5e545e37e2
	
				 | 
					
					
						|||
| 
						
						
							
						
						3f711db1b2
	
				 | 
					
					
						|||
| 
						
						
							
						
						112cb8a133
	
				 | 
					
					
						|||
| 
						
						
							
						
						4d59c1a00d
	
				 | 
					
					
						|||
| 
						
						
							
						
						2e779efb14
	
				 | 
					
					
						|||
| 
						
						
							
						
						735e759033
	
				 | 
					
					
						|||
| 
						
						
							
						
						271a9ce970
	
				 | 
					
					
						|||
| 
						
						
							
						
						6b72ddefc1
	
				 | 
					
					
						|||
| 
						
						
							
						
						cea97aebe5
	
				 | 
					
					
						|||
| 
						
						
							
						
						4bc7971ecb
	
				 | 
					
					
						|||
| 
						
						
							
						
						197aad0124
	
				 | 
					
					
						|||
| 
						
						
							
						
						da1ecad238
	
				 | 
					
					
						|||
| 
						
						
							
						
						307480f249
	
				 | 
					
					
						|||
| 
						
						
							
						
						4698b6eb38
	
				 | 
					
					
						|||
| 
						
						
							
						
						f1629f65fe
	
				 | 
					
					
						|||
| 
						
						
							
						
						29f6aff35e
	
				 | 
					
					
						|||
| 
						
						
							
						
						9bf487a336
	
				 | 
					
					
						|||
| 
						
						
							
						
						f50357b7cc
	
				 | 
					
					
						|||
| 
						
						
							
						
						f3ab89f7a1
	
				 | 
					
					
						|||
| 
						
						
							
						
						5a467f92e0
	
				 | 
					
					
						
							
								
								
									
										26
									
								
								.github/workflows/maven.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								.github/workflows/maven.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,26 @@
 | 
			
		||||
# This workflow will build a Java project with Maven
 | 
			
		||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
 | 
			
		||||
 | 
			
		||||
name: Build
 | 
			
		||||
 | 
			
		||||
on:
 | 
			
		||||
  push:
 | 
			
		||||
    branches: [ dspace7 ]
 | 
			
		||||
  pull_request:
 | 
			
		||||
    branches: [ dspace7 ]
 | 
			
		||||
 | 
			
		||||
jobs:
 | 
			
		||||
  build:
 | 
			
		||||
 | 
			
		||||
    runs-on: ubuntu-22.04
 | 
			
		||||
 | 
			
		||||
    steps:
 | 
			
		||||
    - uses: actions/checkout@v4
 | 
			
		||||
    - name: Set up JDK 17
 | 
			
		||||
      uses: actions/setup-java@v4
 | 
			
		||||
      with:
 | 
			
		||||
        java-version: 17
 | 
			
		||||
        distribution: 'temurin'
 | 
			
		||||
        cache: 'maven'
 | 
			
		||||
    - name: Build with Maven
 | 
			
		||||
      run: mvn -B package --file pom.xml
 | 
			
		||||
							
								
								
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -70,4 +70,10 @@ fabric.properties
 | 
			
		||||
# Android studio 3.1+ serialized cache file
 | 
			
		||||
.idea/caches/build_file_checksums.ser
 | 
			
		||||
 | 
			
		||||
# VS Code settings
 | 
			
		||||
.vscode
 | 
			
		||||
 | 
			
		||||
# asdf-vm tool-versions file
 | 
			
		||||
.tool-versions
 | 
			
		||||
 | 
			
		||||
target/
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										4
									
								
								.idea/misc.xml
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										4
									
								
								.idea/misc.xml
									
									
									
										generated
									
									
									
								
							@@ -1,11 +1,13 @@
 | 
			
		||||
<?xml version="1.0" encoding="UTF-8"?>
 | 
			
		||||
<project version="4">
 | 
			
		||||
  <component name="ExternalStorageConfigurationManager" enabled="true" />
 | 
			
		||||
  <component name="MavenProjectsManager">
 | 
			
		||||
    <option name="originalFiles">
 | 
			
		||||
      <list>
 | 
			
		||||
        <option value="$PROJECT_DIR$/pom.xml" />
 | 
			
		||||
      </list>
 | 
			
		||||
    </option>
 | 
			
		||||
    <option name="workspaceImportForciblyTurnedOn" value="true" />
 | 
			
		||||
  </component>
 | 
			
		||||
  <component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="11" project-jdk-type="JavaSDK" />
 | 
			
		||||
  <component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="17" project-jdk-type="JavaSDK" />
 | 
			
		||||
</project>
 | 
			
		||||
@@ -1,8 +0,0 @@
 | 
			
		||||
dist: bionic
 | 
			
		||||
language: java
 | 
			
		||||
jdk:
 | 
			
		||||
  - openjdk8
 | 
			
		||||
script:
 | 
			
		||||
  - mvn package -B
 | 
			
		||||
 | 
			
		||||
# vim: ts=2 sw=2 et
 | 
			
		||||
							
								
								
									
										52
									
								
								CHANGELOG.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								CHANGELOG.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,52 @@
 | 
			
		||||
# Changelog
 | 
			
		||||
All notable changes to this project will be documented in this file.
 | 
			
		||||
 | 
			
		||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 | 
			
		||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 | 
			
		||||
 | 
			
		||||
## Unreleased
 | 
			
		||||
### Updated
 | 
			
		||||
- Update dspace-api dependency to 7.6.3
 | 
			
		||||
- Update gson dependency to 2.11.0 to match dspace-api
 | 
			
		||||
 | 
			
		||||
## [7.6.1.3] - 2024-06-26
 | 
			
		||||
### Updated
 | 
			
		||||
- Add more formats to `NormalizeDOIs` curation task
 | 
			
		||||
 | 
			
		||||
## [7.6.1.2] - 2024-04-25
 | 
			
		||||
### Changed
 | 
			
		||||
- Remove reporting from curation tasks since "results" are enough
 | 
			
		||||
 | 
			
		||||
## [7.6.1.1] - 2024-04-23
 | 
			
		||||
### Added
 | 
			
		||||
- New `NormalizeDOIs` curation task
 | 
			
		||||
 | 
			
		||||
### Updated
 | 
			
		||||
- Update dependencies in `pom.xml`
 | 
			
		||||
 | 
			
		||||
## [7.6.1] - 2024-01-02
 | 
			
		||||
### Changed
 | 
			
		||||
- Pin gson dependency to 2.9.0 to avoid dependency convergence issues with DSpace
 | 
			
		||||
 | 
			
		||||
## [7.6] - 2024-01-02
 | 
			
		||||
### Updated
 | 
			
		||||
- `iso_3166-1.json` from iso-codes 4.13.0-SNAPSHOT, which [adds common names for Iran, Laos, and Syria](https://salsa.debian.org/iso-codes-team/iso-codes/-/merge_requests/32)
 | 
			
		||||
- DSpace 7.6 compatibility
 | 
			
		||||
 | 
			
		||||
## [6.2] - 2023-02-20
 | 
			
		||||
### Updated
 | 
			
		||||
- `iso_3166-1.json` from iso-codes 4.12.0, which updates the name for TR to "Türkiye"
 | 
			
		||||
 | 
			
		||||
## [6.1] - 2022-10-31
 | 
			
		||||
### Updated
 | 
			
		||||
- Update dependencies in `pom.xml`
 | 
			
		||||
- `iso_3166-1.json` from iso-codes 4.11.0
 | 
			
		||||
 | 
			
		||||
### Changed
 | 
			
		||||
- Java compiler and target from JDK 7 to JDK 8
 | 
			
		||||
 | 
			
		||||
### Added
 | 
			
		||||
- New `FixLowQualityThumbnails` script to detect and remove more low-quality thumbnails
 | 
			
		||||
 | 
			
		||||
### Fixed
 | 
			
		||||
- `FixJpgJpgThumbnails` and `FixLowQualityThumbnails` scripts not commiting changes when operating on a site, community, or collection
 | 
			
		||||
							
								
								
									
										60
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										60
									
								
								README.md
									
									
									
									
									
								
							@@ -1,10 +1,12 @@
 | 
			
		||||
# CGSpace Java Helpers [](https://travis-ci.org/ilri/dspace-curation-tasks)
 | 
			
		||||
# CGSpace Java Helpers [](https://github.com/ilri/cgspace-java-helpers/actions)
 | 
			
		||||
DSpace curation tasks and other Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
			
		||||
 | 
			
		||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
 | 
			
		||||
- **FixJpgJpgThumbnails**: Fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
 | 
			
		||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
 | 
			
		||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
 | 
			
		||||
- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
 | 
			
		||||
 | 
			
		||||
Tested on DSpace 5.8. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
 | 
			
		||||
Tested on DSpace 7.6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System).
 | 
			
		||||
 | 
			
		||||
## Build and Install
 | 
			
		||||
 | 
			
		||||
@@ -15,7 +17,7 @@ To use these curation tasks in a DSpace project add the following dependency to
 | 
			
		||||
<dependency>
 | 
			
		||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
			
		||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
			
		||||
  <version>5.1</version>
 | 
			
		||||
  <version>7.6.1.4-SNAPSHOT</version>
 | 
			
		||||
</dependency>
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
@@ -24,49 +26,21 @@ The jar will be copied to all DSpace applications.
 | 
			
		||||
### Manual Build and Install
 | 
			
		||||
To build the standalone jar:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
```console
 | 
			
		||||
$ mvn package
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Copy the resulting jar to the DSpace `lib` directory:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ cp target/cgspace-java-helpers-5.1.jar ~/dspace/lib
 | 
			
		||||
```console
 | 
			
		||||
$ cp target/cgspace-java-helpers-7.6.1.4-SNAPSHOT.jar ~/dspace/lib/
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Configuration
 | 
			
		||||
Add the curation task to DSpace's `config/modules/curate.cfg`:
 | 
			
		||||
Please refer to the appropriate README.md file:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
plugin.named.org.dspace.curate.CurationTask = \
 | 
			
		||||
...
 | 
			
		||||
    io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger \
 | 
			
		||||
    io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
And then add a configuration file for the task in `config/modules/countrycodetagger.cfg`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
# name of the field containing ISO 3166-1 country names
 | 
			
		||||
iso3166.field = cg.coverage.country
 | 
			
		||||
 | 
			
		||||
# name of the field containing ISO 3166-1 Alpha2 country codes
 | 
			
		||||
iso3166-alpha2.field = cg.coverage.iso3166-alpha2
 | 
			
		||||
 | 
			
		||||
# only add country codes if an item doesn't have any (default false)
 | 
			
		||||
#forceupdate = false
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
*Note*: DSpace's curation system supports "profiles" where you can use the same task with different options, for example above I have a normal country code tagger and a "force" variant. To use the "force" variant you create a new configuration file with the overridden options in `config/modules/countrycodetagger.force.cfg`. The "force" profile clears all existing country codes and updates everything.
 | 
			
		||||
 | 
			
		||||
## Invocation
 | 
			
		||||
Once the jar is installed and you have added appropriate configuration in `~/dspace/config/modules`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ~/dspace/bin/dspace curate -t countrycodetagger -i 10568/3 -r - -l 500 -s object
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
*Note*: it is very important to set the cache limit (`-l`) and the database transaction scope to something sensible (`object`) if you're curating a community or collection with more than a few hundred items.
 | 
			
		||||
- Curation Tasks: [src/main/java/io/github/ilri/cgspace/ctasks/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace7/src/main/java/io/github/ilri/cgspace/ctasks/README.md)
 | 
			
		||||
- Scripts: [src/main/java/io/github/ilri/cgspace/scripts/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace7/src/main/java/io/github/ilri/cgspace/scripts/README.md)
 | 
			
		||||
 | 
			
		||||
## Notes
 | 
			
		||||
This project was initially created according to the [Maven Getting Started Guide](https://maven.apache.org/guides/getting-started/):
 | 
			
		||||
@@ -75,11 +49,13 @@ This project was initially created according to the [Maven Getting Started Guide
 | 
			
		||||
$ mvn -B archetype:generate -DgroupId=io.github.ilri.cgspace -DartifactId=cgspace-java-helpers -DarchetypeArtifactId=maven-archetype-quickstart -DarchetypeVersion=1.4
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## TODO
 | 
			
		||||
To deploy a new `-SNAPSHOT` release to Maven Central (make sure OSSHRH credentials are in `~/.m2/settings.xml`):
 | 
			
		||||
 | 
			
		||||
- Make sure this doesn't work on items in the workflow
 | 
			
		||||
- Check for existence of metadata field before trying to add metadata
 | 
			
		||||
- Add tests
 | 
			
		||||
```console
 | 
			
		||||
$ mvn clean deploy
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
See: <a href="https://central.sonatype.org/publish/publish-maven/#performing-a-snapshot-deployment">Performing a Snapshot Deployment</a>
 | 
			
		||||
 | 
			
		||||
## License
 | 
			
		||||
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										89
									
								
								pom.xml
									
									
									
									
									
								
							
							
						
						
									
										89
									
								
								pom.xml
									
									
									
									
									
								
							@@ -6,56 +6,57 @@
 | 
			
		||||
 | 
			
		||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
			
		||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
			
		||||
  <version>5.1</version>
 | 
			
		||||
  <version>7.6.1.4-SNAPSHOT</version>
 | 
			
		||||
 | 
			
		||||
  <name>cgspace-java-helpers</name>
 | 
			
		||||
  <url>https://github.com/ilri/cgspace-java-helpers</url>
 | 
			
		||||
  <description>Curation tasks and helper scripts for the CGSpace institutional repository</description>
 | 
			
		||||
 | 
			
		||||
  <developers>
 | 
			
		||||
    <developer>
 | 
			
		||||
      <name>Alan Orth</name>
 | 
			
		||||
      <email>maven@mjanja.mozmail.com</email>
 | 
			
		||||
      <organizationUrl>https://mjanja.ch</organizationUrl>
 | 
			
		||||
    </developer>
 | 
			
		||||
  </developers>
 | 
			
		||||
 | 
			
		||||
  <licenses>
 | 
			
		||||
    <license>
 | 
			
		||||
        <name>GPL-3.0-only</name>
 | 
			
		||||
        <url>https://spdx.org/licenses/GPL-3.0-or-later.html</url>
 | 
			
		||||
        <url>https://spdx.org/licenses/GPL-3.0-only.html</url>
 | 
			
		||||
    </license>
 | 
			
		||||
  </licenses>
 | 
			
		||||
 | 
			
		||||
  <!-- brings the sonatype snapshot repository and signing requirement on board -->
 | 
			
		||||
  <parent>
 | 
			
		||||
    <groupId>org.sonatype.oss</groupId>
 | 
			
		||||
    <artifactId>oss-parent</artifactId>
 | 
			
		||||
    <version>9</version>
 | 
			
		||||
    <relativePath />
 | 
			
		||||
  </parent>
 | 
			
		||||
 | 
			
		||||
  <properties>
 | 
			
		||||
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 | 
			
		||||
    <maven.compiler.source>1.7</maven.compiler.source>
 | 
			
		||||
    <maven.compiler.target>1.7</maven.compiler.target>
 | 
			
		||||
    <maven.compiler.release>11</maven.compiler.release>
 | 
			
		||||
  </properties>
 | 
			
		||||
 | 
			
		||||
  <dependencies>
 | 
			
		||||
    <dependency>
 | 
			
		||||
      <groupId>junit</groupId>
 | 
			
		||||
      <artifactId>junit</artifactId>
 | 
			
		||||
      <version>4.11</version>
 | 
			
		||||
      <scope>test</scope>
 | 
			
		||||
    </dependency>
 | 
			
		||||
    <dependency>
 | 
			
		||||
      <groupId>com.google.code.gson</groupId>
 | 
			
		||||
      <artifactId>gson</artifactId>
 | 
			
		||||
      <version>2.2.1</version>
 | 
			
		||||
      <version>2.11.0</version>
 | 
			
		||||
      <!-- Ignore gson's dependency on error_prone_annotations because it causes dependency convergence with something pulled in by dspace-api -->
 | 
			
		||||
      <exclusions>
 | 
			
		||||
        <exclusion>
 | 
			
		||||
          <groupId>com.google.errorprone</groupId>
 | 
			
		||||
            <artifactId>error_prone_annotations</artifactId>
 | 
			
		||||
          </exclusion>
 | 
			
		||||
      </exclusions>
 | 
			
		||||
    </dependency>
 | 
			
		||||
    <dependency>
 | 
			
		||||
      <groupId>org.dspace</groupId>
 | 
			
		||||
      <artifactId>dspace-api</artifactId>
 | 
			
		||||
      <version>5.8</version>
 | 
			
		||||
      <version>7.6.3</version>
 | 
			
		||||
      <scope>provided</scope>
 | 
			
		||||
    </dependency>
 | 
			
		||||
  </dependencies>
 | 
			
		||||
 | 
			
		||||
  <scm>
 | 
			
		||||
      <connection>scm:git:git://github.com/ilri/cgspace-java-helpers.git</connection>
 | 
			
		||||
      <developerConnection>scm:git:ssh://github.com:nanosai/cgspace-java-helpers.git</developerConnection>
 | 
			
		||||
      <url>http://github.com/ilri/cgspace-java-helpers</url>
 | 
			
		||||
      <developerConnection>scm:git:ssh://github.com:ilri/cgspace-java-helpers.git</developerConnection>
 | 
			
		||||
      <url>https://github.com/ilri/cgspace-java-helpers</url>
 | 
			
		||||
  </scm>
 | 
			
		||||
 | 
			
		||||
  <distributionManagement>
 | 
			
		||||
@@ -75,43 +76,63 @@
 | 
			
		||||
        <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-clean-plugin</artifactId>
 | 
			
		||||
          <version>3.1.0</version>
 | 
			
		||||
          <version>3.3.2</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-resources-plugin</artifactId>
 | 
			
		||||
          <version>3.0.2</version>
 | 
			
		||||
          <version>3.3.1</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-compiler-plugin</artifactId>
 | 
			
		||||
          <version>3.8.0</version>
 | 
			
		||||
          <version>3.13.0</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-surefire-plugin</artifactId>
 | 
			
		||||
          <version>2.22.1</version>
 | 
			
		||||
          <version>3.2.5</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-jar-plugin</artifactId>
 | 
			
		||||
          <version>3.0.2</version>
 | 
			
		||||
          <version>3.4.1</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-install-plugin</artifactId>
 | 
			
		||||
          <version>2.5.2</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-deploy-plugin</artifactId>
 | 
			
		||||
          <version>2.8.2</version>
 | 
			
		||||
          <version>3.1.1</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-site-plugin</artifactId>
 | 
			
		||||
          <version>3.7.1</version>
 | 
			
		||||
          <version>3.12.1</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-project-info-reports-plugin</artifactId>
 | 
			
		||||
          <version>3.0.0</version>
 | 
			
		||||
          <version>3.5.0</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <groupId>org.sonatype.plugins</groupId>
 | 
			
		||||
          <artifactId>nexus-staging-maven-plugin</artifactId>
 | 
			
		||||
          <version>1.7.0</version>
 | 
			
		||||
          <extensions>true</extensions>
 | 
			
		||||
          <configuration>
 | 
			
		||||
            <serverId>ossrh</serverId>
 | 
			
		||||
            <nexusUrl>https://oss.sonatype.org/</nexusUrl>
 | 
			
		||||
            <autoReleaseAfterClose>true</autoReleaseAfterClose>
 | 
			
		||||
          </configuration>
 | 
			
		||||
        </plugin>
 | 
			
		||||
      </plugins>
 | 
			
		||||
    </pluginManagement>
 | 
			
		||||
  </build>
 | 
			
		||||
 | 
			
		||||
  <repositories>
 | 
			
		||||
    <!-- Check Maven Central first (before other repos below) -->
 | 
			
		||||
    <repository>
 | 
			
		||||
        <id>maven-central</id>
 | 
			
		||||
        <url>https://repo.maven.apache.org/maven2</url>
 | 
			
		||||
    </repository>
 | 
			
		||||
    <!-- For Handle Server -->
 | 
			
		||||
    <repository>
 | 
			
		||||
        <id>handle.net</id>
 | 
			
		||||
        <url>https://handle.net/maven</url>
 | 
			
		||||
    </repository>
 | 
			
		||||
  </repositories>
 | 
			
		||||
</project>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,20 +1,8 @@
 | 
			
		||||
/*
 | 
			
		||||
DSpace Curation Tasks
 | 
			
		||||
Copyright (C) 2020  Alan Orth
 | 
			
		||||
 | 
			
		||||
This program is free software: you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License
 | 
			
		||||
along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
 * Copyright (C) 2020 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
 | 
			
		||||
@@ -22,4 +10,4 @@ import java.util.List;
 | 
			
		||||
 | 
			
		||||
public class CGSpaceCountriesVocabulary extends CountriesVocabulary {
 | 
			
		||||
    List<Country> countries;
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,20 +1,8 @@
 | 
			
		||||
/*
 | 
			
		||||
    DSpace Curation Tasks
 | 
			
		||||
    Copyright (C) 2020  Alan Orth
 | 
			
		||||
 | 
			
		||||
    This program is free software: you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License
 | 
			
		||||
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
 * Copyright (C) 2020 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
 | 
			
		||||
@@ -22,27 +10,30 @@ import javax.annotation.Nullable;
 | 
			
		||||
 | 
			
		||||
public class CountriesVocabulary {
 | 
			
		||||
 | 
			
		||||
    class Country {
 | 
			
		||||
        private String name;            //required
 | 
			
		||||
        private String common_name;     //optional
 | 
			
		||||
        private String official_name;   //optional
 | 
			
		||||
        private String cgspace_name;    //optional
 | 
			
		||||
        private String numeric;         //required Hmmmm need to cast this...
 | 
			
		||||
        private String alpha_2;         //required
 | 
			
		||||
        private String alpha_3;         //required
 | 
			
		||||
    static class Country {
 | 
			
		||||
        private final String name; // required
 | 
			
		||||
        private final String common_name; // optional
 | 
			
		||||
        private final String official_name; // optional
 | 
			
		||||
        private final String cgspace_name; // optional
 | 
			
		||||
        private final String numeric; // required Hmmmm need to cast this...
 | 
			
		||||
        private final String alpha_2; // required
 | 
			
		||||
        private final String alpha_3; // required
 | 
			
		||||
 | 
			
		||||
        public Country(String name,
 | 
			
		||||
                       @Nullable String common_name,
 | 
			
		||||
                       @Nullable String official_name,
 | 
			
		||||
                       @Nullable String cgspace_name,
 | 
			
		||||
                       String numeric,
 | 
			
		||||
                       String alpha_2,
 | 
			
		||||
                       String alpha_3) {
 | 
			
		||||
        public Country(
 | 
			
		||||
                String name,
 | 
			
		||||
                @Nullable String common_name,
 | 
			
		||||
                @Nullable String official_name,
 | 
			
		||||
                @Nullable String cgspace_name,
 | 
			
		||||
                String numeric,
 | 
			
		||||
                String alpha_2,
 | 
			
		||||
                String alpha_3) {
 | 
			
		||||
            this.name = name;
 | 
			
		||||
            this.common_name = common_name;
 | 
			
		||||
            this.official_name = official_name;
 | 
			
		||||
            this.cgspace_name = cgspace_name;
 | 
			
		||||
            this.numeric = numeric; // fuuuuu this is a string and we can't cast to Integer because some values are zeropadded like "004"
 | 
			
		||||
            this.numeric =
 | 
			
		||||
                    numeric; // fuuuuu this is a string and we can't cast to Integer because some
 | 
			
		||||
                             // values are zeropadded like "004"
 | 
			
		||||
            this.alpha_2 = alpha_2;
 | 
			
		||||
            this.alpha_3 = alpha_3;
 | 
			
		||||
        }
 | 
			
		||||
@@ -75,4 +66,4 @@ public class CountriesVocabulary {
 | 
			
		||||
            return cgspace_name;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,29 +1,19 @@
 | 
			
		||||
/*
 | 
			
		||||
    DSpace Curation Tasks
 | 
			
		||||
    Copyright (C) 2020  Alan Orth
 | 
			
		||||
 | 
			
		||||
    This program is free software: you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License
 | 
			
		||||
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
 * Copyright (C) 2020 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
 | 
			
		||||
import com.google.gson.Gson;
 | 
			
		||||
import org.apache.log4j.Logger;
 | 
			
		||||
 | 
			
		||||
import org.apache.logging.log4j.LogManager;
 | 
			
		||||
import org.apache.logging.log4j.Logger;
 | 
			
		||||
import org.dspace.authorize.AuthorizeException;
 | 
			
		||||
import org.dspace.content.DSpaceObject;
 | 
			
		||||
import org.dspace.content.Item;
 | 
			
		||||
import org.dspace.content.Metadatum;
 | 
			
		||||
import org.dspace.content.MetadataValue;
 | 
			
		||||
import org.dspace.core.Constants;
 | 
			
		||||
import org.dspace.curate.AbstractCurationTask;
 | 
			
		||||
import org.dspace.curate.Curator;
 | 
			
		||||
@@ -34,20 +24,28 @@ import java.io.InputStreamReader;
 | 
			
		||||
import java.sql.SQLException;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
import java.util.Objects;
 | 
			
		||||
 | 
			
		||||
public class CountryCodeTagger extends AbstractCurationTask
 | 
			
		||||
{
 | 
			
		||||
/*
 | 
			
		||||
 * Add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata.
 | 
			
		||||
 *
 | 
			
		||||
 * @author Alan Orth for the International Livestock Research Institute
 | 
			
		||||
 * @version 7.6.1.2
 | 
			
		||||
 * @since 5.1
 | 
			
		||||
 */
 | 
			
		||||
public class CountryCodeTagger extends AbstractCurationTask {
 | 
			
		||||
    public class CountryCodeTaggerConfig {
 | 
			
		||||
        private final String isocodesJsonPath = "/io/github/ilri/cgspace/ctasks/iso_3166-1.json";
 | 
			
		||||
        private final String cgspaceCountriesJsonPath = "/io/github/ilri/cgspace/ctasks/cgspace-countries.json";
 | 
			
		||||
        private final String cgspaceCountriesJsonPath =
 | 
			
		||||
                "/io/github/ilri/cgspace/ctasks/cgspace-countries.json";
 | 
			
		||||
        private final String iso3166Field = taskProperty("iso3166.field");
 | 
			
		||||
        private final String iso3166Alpha2Field = taskProperty("iso3166-alpha2.field");
 | 
			
		||||
        private final boolean forceupdate = taskBooleanProperty("forceupdate", false);
 | 
			
		||||
 | 
			
		||||
        private Logger log = Logger.getLogger(CountryCodeTagger.class);
 | 
			
		||||
        private final Logger log = LogManager.getLogger();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public class CountryCodeTaggerResult {
 | 
			
		||||
    public static class CountryCodeTaggerResult {
 | 
			
		||||
        private int status = Curator.CURATE_UNSET;
 | 
			
		||||
        private String result = null;
 | 
			
		||||
 | 
			
		||||
@@ -69,100 +67,134 @@ public class CountryCodeTagger extends AbstractCurationTask
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Override
 | 
			
		||||
    public int perform(DSpaceObject dso) throws IOException
 | 
			
		||||
    {
 | 
			
		||||
    public int perform(DSpaceObject dso) throws IOException {
 | 
			
		||||
        // gotta define this here so we can access it after the if context...
 | 
			
		||||
        CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
 | 
			
		||||
 | 
			
		||||
		if (dso.getType() == Constants.ITEM)
 | 
			
		||||
        {
 | 
			
		||||
        if (dso.getType() == Constants.ITEM) {
 | 
			
		||||
            // Load configuration
 | 
			
		||||
            CountryCodeTaggerConfig config = new CountryCodeTaggerConfig();
 | 
			
		||||
 | 
			
		||||
            Item item = (Item)dso;
 | 
			
		||||
            Item item = (Item) dso;
 | 
			
		||||
 | 
			
		||||
            alpha2Result = performAlpha2(item, config);
 | 
			
		||||
            try {
 | 
			
		||||
                alpha2Result = performAlpha2(item, config);
 | 
			
		||||
            } catch (SQLException throwables) {
 | 
			
		||||
                throwables.printStackTrace();
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            setResult(alpha2Result.getResult());
 | 
			
		||||
            report(alpha2Result.getResult());
 | 
			
		||||
		}
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
		return alpha2Result.getStatus();
 | 
			
		||||
        return alpha2Result.getStatus();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config) throws IOException
 | 
			
		||||
    {
 | 
			
		||||
    public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config)
 | 
			
		||||
            throws IOException, SQLException {
 | 
			
		||||
        CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
 | 
			
		||||
        String itemHandle = item.getHandle();
 | 
			
		||||
 | 
			
		||||
        Metadatum[] itemCountries = item.getMetadataByMetadataString(config.iso3166Field);
 | 
			
		||||
        List<MetadataValue> itemCountries =
 | 
			
		||||
                itemService.getMetadataByMetadataString(item, config.iso3166Field);
 | 
			
		||||
 | 
			
		||||
        // skip items that don't have country metadata
 | 
			
		||||
        if (itemCountries.length == 0) {
 | 
			
		||||
            alpha2Result.setResult(itemHandle + ": no countries, skipping.");
 | 
			
		||||
        if (itemCountries.isEmpty()) {
 | 
			
		||||
            alpha2Result.setResult("No countries, skipping.");
 | 
			
		||||
            alpha2Result.setStatus(Curator.CURATE_SKIP);
 | 
			
		||||
        } else {
 | 
			
		||||
            Gson gson = new Gson();
 | 
			
		||||
 | 
			
		||||
            // TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
 | 
			
		||||
            BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(config.isocodesJsonPath)));
 | 
			
		||||
            ISO3166CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, ISO3166CountriesVocabulary.class);
 | 
			
		||||
            // TODO: convert to try:
 | 
			
		||||
            // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
 | 
			
		||||
            BufferedReader reader =
 | 
			
		||||
                    new BufferedReader(
 | 
			
		||||
                            new InputStreamReader(
 | 
			
		||||
                                    Objects.requireNonNull(this.getClass().getResourceAsStream(config.isocodesJsonPath))));
 | 
			
		||||
            ISO3166CountriesVocabulary isocodesCountriesJson =
 | 
			
		||||
                    gson.fromJson(reader, ISO3166CountriesVocabulary.class);
 | 
			
		||||
            reader.close();
 | 
			
		||||
 | 
			
		||||
            reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(config.cgspaceCountriesJsonPath)));
 | 
			
		||||
            CGSpaceCountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
 | 
			
		||||
            reader =
 | 
			
		||||
                    new BufferedReader(
 | 
			
		||||
                            new InputStreamReader(
 | 
			
		||||
                                    Objects.requireNonNull(this.getClass()
 | 
			
		||||
                                            .getResourceAsStream(config.cgspaceCountriesJsonPath))));
 | 
			
		||||
            CGSpaceCountriesVocabulary cgspaceCountriesJson =
 | 
			
		||||
                    gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
 | 
			
		||||
            reader.close();
 | 
			
		||||
 | 
			
		||||
            // split the alpha2 country code field into schema, element, and qualifier so we can use it with item.addMetadata()
 | 
			
		||||
            // split the alpha2 country code field into schema, element, and qualifier so we can use
 | 
			
		||||
            // it with item.addMetadata()
 | 
			
		||||
            String[] iso3166Alpha2FieldParts = config.iso3166Alpha2Field.split("\\.");
 | 
			
		||||
 | 
			
		||||
            if (config.forceupdate) {
 | 
			
		||||
                item.clearMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], Item.ANY);
 | 
			
		||||
                itemService.clearMetadata(
 | 
			
		||||
                        Curator.curationContext(),
 | 
			
		||||
                        item,
 | 
			
		||||
                        iso3166Alpha2FieldParts[0],
 | 
			
		||||
                        iso3166Alpha2FieldParts[1],
 | 
			
		||||
                        iso3166Alpha2FieldParts[2],
 | 
			
		||||
                        Item.ANY);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // check the item's country codes, if any
 | 
			
		||||
            Metadatum[] itemAlpha2CountryCodes = item.getMetadataByMetadataString(config.iso3166Alpha2Field);
 | 
			
		||||
            List<MetadataValue> itemAlpha2CountryCodes =
 | 
			
		||||
                    itemService.getMetadataByMetadataString(item, config.iso3166Alpha2Field);
 | 
			
		||||
 | 
			
		||||
            if (itemAlpha2CountryCodes.length == 0) {
 | 
			
		||||
            if (itemAlpha2CountryCodes.isEmpty()) {
 | 
			
		||||
                List<String> newAlpha2Codes = new ArrayList<String>();
 | 
			
		||||
                for (Metadatum itemCountry : itemCountries) {
 | 
			
		||||
                    //check ISO 3166-1 countries
 | 
			
		||||
                for (MetadataValue itemCountry : itemCountries) {
 | 
			
		||||
                    // check ISO 3166-1 countries
 | 
			
		||||
                    for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
 | 
			
		||||
                        if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) {
 | 
			
		||||
                        if (itemCountry.getValue().equalsIgnoreCase(country.getName())
 | 
			
		||||
                                || itemCountry
 | 
			
		||||
                                        .getValue()
 | 
			
		||||
                                        .equalsIgnoreCase(country.get_official_name())
 | 
			
		||||
                                || itemCountry
 | 
			
		||||
                                        .getValue()
 | 
			
		||||
                                        .equalsIgnoreCase(country.get_common_name())) {
 | 
			
		||||
                            newAlpha2Codes.add(country.getAlpha_2());
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    //check CGSpace countries
 | 
			
		||||
                    // check CGSpace countries
 | 
			
		||||
                    for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) {
 | 
			
		||||
                        if (itemCountry.value.equalsIgnoreCase(country.getCgspace_name())) {
 | 
			
		||||
                        if (itemCountry.getValue().equalsIgnoreCase(country.getCgspace_name())) {
 | 
			
		||||
                            newAlpha2Codes.add(country.getAlpha_2());
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                if (newAlpha2Codes.size() > 0) {
 | 
			
		||||
                if (!newAlpha2Codes.isEmpty()) {
 | 
			
		||||
                    try {
 | 
			
		||||
                        // add metadata values (casting the List<String> to an array)
 | 
			
		||||
                        item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", newAlpha2Codes.toArray(new String[0]));
 | 
			
		||||
                        item.update();
 | 
			
		||||
                        itemService.addMetadata(
 | 
			
		||||
                                Curator.curationContext(),
 | 
			
		||||
                                item,
 | 
			
		||||
                                iso3166Alpha2FieldParts[0],
 | 
			
		||||
                                iso3166Alpha2FieldParts[1],
 | 
			
		||||
                                iso3166Alpha2FieldParts[2],
 | 
			
		||||
                                "en_US",
 | 
			
		||||
                                newAlpha2Codes);
 | 
			
		||||
                        itemService.update(Curator.curationContext(), item);
 | 
			
		||||
                    } catch (SQLException | AuthorizeException sqle) {
 | 
			
		||||
                        config.log.debug(sqle.getMessage());
 | 
			
		||||
                        alpha2Result.setResult(itemHandle + ": error");
 | 
			
		||||
                        alpha2Result.setResult("Error");
 | 
			
		||||
                        alpha2Result.setStatus(Curator.CURATE_ERROR);
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    alpha2Result.setResult(itemHandle + ": added " + newAlpha2Codes.size() + " alpha2 country code(s)");
 | 
			
		||||
                    alpha2Result.setResult(
 | 
			
		||||
                            "Added "
 | 
			
		||||
                                    + newAlpha2Codes.size()
 | 
			
		||||
                                    + " alpha2 country code(s)");
 | 
			
		||||
                } else {
 | 
			
		||||
                    alpha2Result.setResult(itemHandle + ": no matching countries found");
 | 
			
		||||
                    alpha2Result.setResult("No matching countries found");
 | 
			
		||||
                }
 | 
			
		||||
                alpha2Result.setStatus(Curator.CURATE_SUCCESS);
 | 
			
		||||
            } else {
 | 
			
		||||
                alpha2Result.setResult(itemHandle + ": item has country codes, skipping");
 | 
			
		||||
                alpha2Result.setResult("Item already has country codes, skipping unless forced");
 | 
			
		||||
                alpha2Result.setStatus(Curator.CURATE_SKIP);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return alpha2Result;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,27 +1,18 @@
 | 
			
		||||
/*
 | 
			
		||||
DSpace Curation Tasks
 | 
			
		||||
Copyright (C) 2020  Alan Orth
 | 
			
		||||
 | 
			
		||||
This program is free software: you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License
 | 
			
		||||
along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
 * Copyright (C) 2020 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
 | 
			
		||||
import com.google.gson.annotations.SerializedName;
 | 
			
		||||
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
public class ISO3166CountriesVocabulary extends CountriesVocabulary {
 | 
			
		||||
    // support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly
 | 
			
		||||
    @SerializedName("3166-1") List<Country> countries;
 | 
			
		||||
}
 | 
			
		||||
    // support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since
 | 
			
		||||
    // our class needs to match the JSON exactly
 | 
			
		||||
    @SerializedName("3166-1")
 | 
			
		||||
    List<Country> countries;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										100
									
								
								src/main/java/io/github/ilri/cgspace/ctasks/NormalizeDOIs.java
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								src/main/java/io/github/ilri/cgspace/ctasks/NormalizeDOIs.java
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,100 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (C) 2024 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
 | 
			
		||||
import org.dspace.content.DSpaceObject;
 | 
			
		||||
import org.dspace.content.Item;
 | 
			
		||||
import org.dspace.content.MetadataValue;
 | 
			
		||||
import org.dspace.core.Constants;
 | 
			
		||||
import org.dspace.curate.AbstractCurationTask;
 | 
			
		||||
import org.dspace.curate.Curator;
 | 
			
		||||
import org.dspace.curate.Suspendable;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Attempt to normalize DOIs by stripping whitespace, lower casing, and
 | 
			
		||||
 * converting to <code>https://doi.org</code> format. The reason is that DOIs are case
 | 
			
		||||
 * insensitive and must be unique, which we can only guarantee if they are
 | 
			
		||||
 * normalized to the same format.
 | 
			
		||||
 *
 | 
			
		||||
 * See: <a href="https://www.crossref.org/documentation/member-setup/constructing-your-dois/">https://www.crossref.org/documentation/member-setup/constructing-your-dois/</a>
 | 
			
		||||
 *
 | 
			
		||||
 * TODO: set curation to failed if invalid DOI submitted (and configure to reject in workflow)
 | 
			
		||||
 * TODO: allow operation on communities and collections (currently only works on items)
 | 
			
		||||
 *
 | 
			
		||||
 * @author Alan Orth for the International Livestock Research Institute
 | 
			
		||||
 * @version 7.6.1.3
 | 
			
		||||
 * @since 7.6.1.1
 | 
			
		||||
 */
 | 
			
		||||
@Suspendable
 | 
			
		||||
public class NormalizeDOIs extends AbstractCurationTask {
 | 
			
		||||
    @Override
 | 
			
		||||
    public int perform(DSpaceObject dso) throws IOException {
 | 
			
		||||
        if (dso.getType() == Constants.ITEM) {
 | 
			
		||||
            Item item = (Item) dso;
 | 
			
		||||
            String result;
 | 
			
		||||
 | 
			
		||||
            // Keep track of whether we change metadata, and how many
 | 
			
		||||
            boolean metadataChanged = false;
 | 
			
		||||
            int count = 0;
 | 
			
		||||
 | 
			
		||||
            // Hard coding the metadata field for now since I can't figure out how to read the taskProperty
 | 
			
		||||
            List<MetadataValue> itemDOIs = itemService.getMetadataByMetadataString(item, "cg.identifier.doi");
 | 
			
		||||
 | 
			
		||||
            // skip items that don't have DOIs
 | 
			
		||||
            if (itemDOIs.isEmpty()) {
 | 
			
		||||
                setResult("No DOIs, skipping");
 | 
			
		||||
                return Curator.CURATE_SKIP;
 | 
			
		||||
            } else {
 | 
			
		||||
                for (MetadataValue itemDOI : itemDOIs) {
 | 
			
		||||
                    String newDOI = getNormalizedDOI(itemDOI);
 | 
			
		||||
 | 
			
		||||
                    // Check if the normalized DOI is different than the original
 | 
			
		||||
                    if (!newDOI.equals(itemDOI.getValue())) {
 | 
			
		||||
                        itemDOI.setValue(newDOI);
 | 
			
		||||
                        metadataChanged = true;
 | 
			
		||||
                        count++;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            if (metadataChanged) {
 | 
			
		||||
                result = "Normalized " + count + " DOI(s)";
 | 
			
		||||
            } else {
 | 
			
		||||
                result = "All DOIs already normalized";
 | 
			
		||||
            }
 | 
			
		||||
            setResult(result);
 | 
			
		||||
 | 
			
		||||
            return Curator.CURATE_SUCCESS;
 | 
			
		||||
        } else {
 | 
			
		||||
            setResult("Object skipped");
 | 
			
		||||
            return Curator.CURATE_SKIP;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private static String getNormalizedDOI(MetadataValue itemDOI) {
 | 
			
		||||
        // Convert to lowercase
 | 
			
		||||
        String newDOI = itemDOI.getValue().toLowerCase();
 | 
			
		||||
        // Strip leading and trailing whitespace
 | 
			
		||||
        newDOI = newDOI.strip();
 | 
			
		||||
        // Convert to HTTPS
 | 
			
		||||
        newDOI = newDOI.replace("http://", "https://");
 | 
			
		||||
        // Prefer doi.org to dx.doi.org
 | 
			
		||||
        newDOI = newDOI.replace("dx.doi.org", "doi.org");
 | 
			
		||||
        // Prefer doi.org to www.doi.org
 | 
			
		||||
        newDOI = newDOI.replace("www.doi.org", "doi.org");
 | 
			
		||||
        // Fix URL encoded slashes (%2f)
 | 
			
		||||
        newDOI = newDOI.replace("%2f", "/");
 | 
			
		||||
        // Replace values like doi: 10.11648/j.jps.20140201.14
 | 
			
		||||
        newDOI = newDOI.replaceAll("^doi: 10\\.", "https://doi.org/10.");
 | 
			
		||||
        // Replace values like 10.3390/foods12010115
 | 
			
		||||
        newDOI = newDOI.replaceAll("^10\\.", "https://doi.org/10.");
 | 
			
		||||
 | 
			
		||||
        return newDOI;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										74
									
								
								src/main/java/io/github/ilri/cgspace/ctasks/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								src/main/java/io/github/ilri/cgspace/ctasks/README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,74 @@
 | 
			
		||||
# Curation Tasks
 | 
			
		||||
DSpace curation tasks used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
			
		||||
 | 
			
		||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
 | 
			
		||||
- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
 | 
			
		||||
 | 
			
		||||
Tested on DSpace 7.6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System).
 | 
			
		||||
 | 
			
		||||
## Build and Install
 | 
			
		||||
 | 
			
		||||
### Integrate into DSpace Build
 | 
			
		||||
To use these curation tasks in a DSpace project add the following dependency to `dspace/modules/additions/pom.xml`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
<dependency>
 | 
			
		||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
			
		||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
			
		||||
  <version>7.6.1.4-SNAPSHOT</version>
 | 
			
		||||
</dependency>
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The jar will be copied to all DSpace applications.
 | 
			
		||||
 | 
			
		||||
### Manual Build and Install
 | 
			
		||||
To build the standalone jar:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ mvn package
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Copy the resulting jar to the DSpace `lib` directory:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ cp target/cgspace-java-helpers-7.6.1.4-SNAPSHOT.jar ~/dspace/lib/
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Configuration
 | 
			
		||||
Add the curation task(s) to DSpace's `config/modules/curate.cfg`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger
 | 
			
		||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
 | 
			
		||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.NormalizeDOIs = normalizedois
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
And then add the following variables to your `local.cfg` or some other [configuration file that is included](https://wiki.lyrasis.org/display/DSDOC7x/Configuration+Reference#ConfigurationReference-IncludingotherPropertyFiles):
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
# name of the field containing ISO 3166-1 country names
 | 
			
		||||
countrycodetagger.iso3166.field = cg.coverage.country
 | 
			
		||||
 | 
			
		||||
# name of the field containing ISO 3166-1 Alpha2 country codes
 | 
			
		||||
countrycodetagger.iso3166-alpha2.field = cg.coverage.iso3166-alpha2
 | 
			
		||||
 | 
			
		||||
# only add country codes if an item doesn't have any (default false)
 | 
			
		||||
#countrycodetagger.forceupdate = false
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
*Note*: DSpace's curation system supports "profiles" where you can use the same task with different options, for example above I have a normal country code tagger task and a "force" variant. The "force" variant is the same task, but it looks for configuration variables using the `countrycodetagger.force` instead. To use the "force" variant you simply need to add these new variables with the `forceupdate` parameter overridden to the same configuration file where you put the other variables. The "force" profile clears all existing country codes and updates everything.
 | 
			
		||||
 | 
			
		||||
## Invocation
 | 
			
		||||
Once the jar is installed and you have added appropriate configuration in `~/dspace/config/modules`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ~/dspace/bin/dspace curate -e eperson@repo.org -t countrycodetagger -i 10568/3 -r - -s object
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
*Note*: it is very important to set the database transaction scope to something sensible (`object`) if you're curating a community or collection with more than a few hundred items.
 | 
			
		||||
 | 
			
		||||
## TODO
 | 
			
		||||
 | 
			
		||||
- Make sure this doesn't work on items in the workflow
 | 
			
		||||
- Check for existence of metadata field before trying to add metadata
 | 
			
		||||
- Add tests
 | 
			
		||||
@@ -1,107 +1,176 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (C) 2020 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.scripts;
 | 
			
		||||
 | 
			
		||||
import org.apache.commons.lang.StringUtils;
 | 
			
		||||
import org.dspace.authorize.AuthorizeException;
 | 
			
		||||
import org.dspace.content.*;
 | 
			
		||||
import org.dspace.content.Bitstream;
 | 
			
		||||
import org.dspace.content.Bundle;
 | 
			
		||||
import org.dspace.content.Collection;
 | 
			
		||||
import org.dspace.content.Community;
 | 
			
		||||
import org.dspace.content.DSpaceObject;
 | 
			
		||||
import org.dspace.content.Item;
 | 
			
		||||
import org.dspace.content.MetadataValue;
 | 
			
		||||
import org.dspace.content.factory.ContentServiceFactory;
 | 
			
		||||
import org.dspace.content.service.BundleService;
 | 
			
		||||
import org.dspace.content.service.ItemService;
 | 
			
		||||
import org.dspace.core.Constants;
 | 
			
		||||
import org.dspace.core.Context;
 | 
			
		||||
import org.dspace.handle.HandleManager;
 | 
			
		||||
import org.dspace.handle.factory.HandleServiceFactory;
 | 
			
		||||
import org.dspace.handle.service.HandleService;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.sql.SQLException;
 | 
			
		||||
import java.util.Iterator;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories
 | 
			
		||||
 * @author Alan Orth for the International Livestock Research Institute
 | 
			
		||||
 * @version 5.1-SNAPSHOT
 | 
			
		||||
 * @since 5.1-SNAPSHOT
 | 
			
		||||
 * @version 6.1
 | 
			
		||||
 * @since 5.1
 | 
			
		||||
 */
 | 
			
		||||
public class FixJpgJpgThumbnails {
 | 
			
		||||
    // note: static members belong to the class itself, not any one instance
 | 
			
		||||
    public static ItemService itemService = ContentServiceFactory.getInstance().getItemService();
 | 
			
		||||
    public static HandleService handleService =
 | 
			
		||||
            HandleServiceFactory.getInstance().getHandleService();
 | 
			
		||||
    public static BundleService bundleService =
 | 
			
		||||
            ContentServiceFactory.getInstance().getBundleService();
 | 
			
		||||
 | 
			
		||||
	public static void main(String[] args) {
 | 
			
		||||
		String parentHandle = null;
 | 
			
		||||
		if (args.length >= 1) {
 | 
			
		||||
			parentHandle = args[0];
 | 
			
		||||
		}
 | 
			
		||||
    public static void main(String[] args) {
 | 
			
		||||
        String parentHandle = null;
 | 
			
		||||
        if (args.length >= 1) {
 | 
			
		||||
            parentHandle = args[0];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
		Context context = null;
 | 
			
		||||
		try {
 | 
			
		||||
			context = new Context();
 | 
			
		||||
			context.turnOffAuthorisationSystem();
 | 
			
		||||
        Context context = null;
 | 
			
		||||
        try {
 | 
			
		||||
            context = new Context();
 | 
			
		||||
            context.turnOffAuthorisationSystem();
 | 
			
		||||
 | 
			
		||||
			if (StringUtils.isBlank(parentHandle)) {
 | 
			
		||||
				process(context, Item.findAll(context));
 | 
			
		||||
			} else {
 | 
			
		||||
				DSpaceObject parent = HandleManager.resolveToObject(context, parentHandle);
 | 
			
		||||
				if (parent != null) {
 | 
			
		||||
					switch (parent.getType()) {
 | 
			
		||||
						case Constants.COLLECTION:
 | 
			
		||||
							process(context, ((Collection) parent).getAllItems()); // getAllItems because we want to work on non-archived ones as well
 | 
			
		||||
							break;
 | 
			
		||||
						case Constants.COMMUNITY:
 | 
			
		||||
							Collection[] collections = ((Community) parent).getCollections();
 | 
			
		||||
							for (Collection collection : collections) {
 | 
			
		||||
								process(context, collection.getAllItems()); // getAllItems because we want to work on non-archived ones as well
 | 
			
		||||
							}
 | 
			
		||||
							break;
 | 
			
		||||
						case Constants.SITE:
 | 
			
		||||
							process(context, Item.findAll(context));
 | 
			
		||||
							break;
 | 
			
		||||
						case Constants.ITEM:
 | 
			
		||||
							processItem((Item) parent);
 | 
			
		||||
							context.commit();
 | 
			
		||||
							break;
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		} catch (SQLException | AuthorizeException | IOException e) {
 | 
			
		||||
			e.printStackTrace(System.err);
 | 
			
		||||
		} finally {
 | 
			
		||||
			if (context != null && context.isValid()) {
 | 
			
		||||
				context.abort();
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
            if (StringUtils.isBlank(parentHandle)) {
 | 
			
		||||
                process(context, itemService.findAll(context));
 | 
			
		||||
            } else {
 | 
			
		||||
                DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
 | 
			
		||||
                if (parent != null) {
 | 
			
		||||
                    switch (parent.getType()) {
 | 
			
		||||
                        case Constants.SITE:
 | 
			
		||||
                            process(context, itemService.findAll(context));
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.COMMUNITY:
 | 
			
		||||
                            List<Collection> collections = ((Community) parent).getCollections();
 | 
			
		||||
                            for (Collection collection : collections) {
 | 
			
		||||
                                process(
 | 
			
		||||
                                        context,
 | 
			
		||||
                                        itemService.findAllByCollection(context, collection));
 | 
			
		||||
                            }
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.COLLECTION:
 | 
			
		||||
                            process(
 | 
			
		||||
                                    context,
 | 
			
		||||
                                    itemService.findByCollection(context, (Collection) parent));
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.ITEM:
 | 
			
		||||
                            processItem(context, (Item) parent);
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        } catch (SQLException | AuthorizeException | IOException e) {
 | 
			
		||||
            e.printStackTrace(System.err);
 | 
			
		||||
        } finally {
 | 
			
		||||
            if (context != null && context.isValid()) {
 | 
			
		||||
                context.abort();
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
	private static void process(Context context, ItemIterator items) throws SQLException, IOException, AuthorizeException {
 | 
			
		||||
		while (items.hasNext()) {
 | 
			
		||||
			Item item = items.next();
 | 
			
		||||
			processItem(item);
 | 
			
		||||
			context.commit();
 | 
			
		||||
			item.decache();
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
    private static void process(Context context, Iterator<Item> items)
 | 
			
		||||
            throws SQLException, IOException, AuthorizeException {
 | 
			
		||||
        while (items.hasNext()) {
 | 
			
		||||
            Item item = items.next();
 | 
			
		||||
            processItem(context, item);
 | 
			
		||||
            itemService.update(context, item);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
	private static void processItem(Item item) throws SQLException, AuthorizeException, IOException {
 | 
			
		||||
		Bundle[] thumbnailBundles = item.getBundles("THUMBNAIL");
 | 
			
		||||
		for (Bundle thumbnailBundle : thumbnailBundles) {
 | 
			
		||||
			Bitstream[] thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
 | 
			
		||||
			for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
			
		||||
				String thumbnailName = thumbnailBitstream.getName();
 | 
			
		||||
    private static void processItem(Context context, Item item)
 | 
			
		||||
            throws SQLException, AuthorizeException, IOException {
 | 
			
		||||
        // Some bitstreams like Infographics and Maps are large JPEGs and put in the ORIGINAL bundle
 | 
			
		||||
        // on purpose so we shouldn't
 | 
			
		||||
        // swap them.
 | 
			
		||||
        List<MetadataValue> itemTypes =
 | 
			
		||||
                itemService.getMetadataByMetadataString(item, "dcterms.type");
 | 
			
		||||
        for (MetadataValue itemType : itemTypes) {
 | 
			
		||||
            if (itemType.getValue().equals("Infographic") || itemType.getValue().equals("Map")) {
 | 
			
		||||
                System.out.println(
 | 
			
		||||
                        item.getHandle() + ": item has an Infographic or Map, skipping.");
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
				if (thumbnailName.contains(".jpg.jpg")) {
 | 
			
		||||
					Bundle[] originalBundles = item.getBundles("ORIGINAL");
 | 
			
		||||
					for (Bundle originalBundle : originalBundles) {
 | 
			
		||||
						Bitstream[] originalBundleBitstreams = originalBundle.getBitstreams();
 | 
			
		||||
        List<Bundle> thumbnailBundles = item.getBundles("THUMBNAIL");
 | 
			
		||||
        for (Bundle thumbnailBundle : thumbnailBundles) {
 | 
			
		||||
            List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
 | 
			
		||||
            for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
			
		||||
                String thumbnailName = thumbnailBitstream.getName();
 | 
			
		||||
                String thumbnailDescription = thumbnailBitstream.getDescription();
 | 
			
		||||
 | 
			
		||||
						for(Bitstream originalBitstream : originalBundleBitstreams) {
 | 
			
		||||
							String originalName = originalBitstream.getName();
 | 
			
		||||
                // There is no point continuing if the thumbnail's description is empty or null
 | 
			
		||||
                if (StringUtils.isEmpty(thumbnailDescription)) {
 | 
			
		||||
                    continue;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
							//check if the original file name is the same as the thumbnail name minus the extra ".jpg"
 | 
			
		||||
							if (originalName.equals(StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg")) && "Generated Thumbnail".equals(thumbnailBitstream.getDescription())) {
 | 
			
		||||
								System.out.println(item.getHandle() + ": replacing " + thumbnailName + " with " + originalName);
 | 
			
		||||
                if (thumbnailName.toLowerCase().contains(".jpg.jpg")) {
 | 
			
		||||
                    List<Bundle> originalBundles = item.getBundles("ORIGINAL");
 | 
			
		||||
                    for (Bundle originalBundle : originalBundles) {
 | 
			
		||||
                        List<Bitstream> originalBundleBitstreams = originalBundle.getBitstreams();
 | 
			
		||||
 | 
			
		||||
								//add the original bitstream to the THUMBNAIL bundle
 | 
			
		||||
								thumbnailBundle.addBitstream(originalBitstream);
 | 
			
		||||
								//remove the original bitstream from the ORIGINAL bundle
 | 
			
		||||
								originalBundle.removeBitstream(originalBitstream);
 | 
			
		||||
								//remove the JpgJpg bitstream from the THUMBNAIL bundle
 | 
			
		||||
								thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
			
		||||
							}
 | 
			
		||||
						}
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
                        for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
			
		||||
                            String originalName = originalBitstream.getName();
 | 
			
		||||
 | 
			
		||||
                            long originalBitstreamBytes = originalBitstream.getSizeBytes();
 | 
			
		||||
 | 
			
		||||
                            /*
 | 
			
		||||
                            - check if the original file name is the same as the thumbnail name minus the extra ".jpg"
 | 
			
		||||
                            - check if the thumbnail description indicates it was automatically generated
 | 
			
		||||
                            - check if the original bitstream is less than ~100KiB
 | 
			
		||||
                                - Note: in my tests there were 4022 items with ".jpg.jpg" thumbnails totaling 394549249
 | 
			
		||||
                                  bytes for an average of about 98KiB so ~100KiB seems like a good cut off
 | 
			
		||||
                            */
 | 
			
		||||
                            if (originalName.equalsIgnoreCase(
 | 
			
		||||
                                            StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg"))
 | 
			
		||||
                                    && ("Generated Thumbnail".equals(thumbnailDescription)
 | 
			
		||||
                                            || "IM Thumbnail".equals(thumbnailDescription))
 | 
			
		||||
                                    && originalBitstreamBytes < 100000) {
 | 
			
		||||
                                System.out.println(
 | 
			
		||||
                                        item.getHandle()
 | 
			
		||||
                                                + ": replacing "
 | 
			
		||||
                                                + thumbnailName
 | 
			
		||||
                                                + " with "
 | 
			
		||||
                                                + originalName);
 | 
			
		||||
 | 
			
		||||
                                // add the original bitstream to the THUMBNAIL bundle
 | 
			
		||||
                                bundleService.addBitstream(
 | 
			
		||||
                                        context, thumbnailBundle, originalBitstream);
 | 
			
		||||
                                // remove the original bitstream from the ORIGINAL bundle
 | 
			
		||||
                                originalBundle.removeBitstream(originalBitstream);
 | 
			
		||||
                                // remove the JpgJpg bitstream from the THUMBNAIL bundle
 | 
			
		||||
                                thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,280 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (C) 2022 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-only
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.scripts;
 | 
			
		||||
 | 
			
		||||
import org.apache.commons.lang.StringUtils;
 | 
			
		||||
import org.dspace.authorize.AuthorizeException;
 | 
			
		||||
import org.dspace.content.Bitstream;
 | 
			
		||||
import org.dspace.content.Bundle;
 | 
			
		||||
import org.dspace.content.Collection;
 | 
			
		||||
import org.dspace.content.Community;
 | 
			
		||||
import org.dspace.content.DSpaceObject;
 | 
			
		||||
import org.dspace.content.Item;
 | 
			
		||||
import org.dspace.content.factory.ContentServiceFactory;
 | 
			
		||||
import org.dspace.content.service.BundleService;
 | 
			
		||||
import org.dspace.content.service.ItemService;
 | 
			
		||||
import org.dspace.core.Constants;
 | 
			
		||||
import org.dspace.core.Context;
 | 
			
		||||
import org.dspace.handle.factory.HandleServiceFactory;
 | 
			
		||||
import org.dspace.handle.service.HandleService;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.sql.SQLException;
 | 
			
		||||
import java.util.Iterator;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Fix low-quality thumbnails in a DSpace repository.
 | 
			
		||||
 *
 | 
			
		||||
 * <p>Search the DSpace repository for items containing bitstreams matching the following criteria:
 | 
			
		||||
 *
 | 
			
		||||
 * <ul>
 | 
			
		||||
 *   <li>If an item has an <code>IM Thumbnail</code> and a <code>Generated Thumbnail</code> in the
 | 
			
		||||
 *       <code>THUMBNAIL</code> bundle, remove the <code>Generated Thumbnail</code>.
 | 
			
		||||
 *   <li>If an item has a PDF bitstream and a JPEG bitstream with description "thumbnail" in the
 | 
			
		||||
 *       <code>ORIGINAL</code> bundle, remove the "thumbnail" bitstream in the ORIGINAL bundle.
 | 
			
		||||
 * </ul>
 | 
			
		||||
 *
 | 
			
		||||
 * <p>The general idea is that we should always prefer thumbnails generated from PDFs by ImageMagick
 | 
			
		||||
 * to manually uploaded JPEGs because ImageMagick Thumbnails can be regenerated with higher quality,
 | 
			
		||||
 * resolution, etc. Furthermore, if there are JPEG bitstreams in the ORIGINAL bundle DSpace will
 | 
			
		||||
 * automatically create ".jpg.jpg" thumbnails from them in the THUMBNAIL bundle so we should remove
 | 
			
		||||
 * those as well!
 | 
			
		||||
 *
 | 
			
		||||
 * @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories
 | 
			
		||||
 * @author Alan Orth for the International Livestock Research Institute
 | 
			
		||||
 * @version 6.1
 | 
			
		||||
 * @since 6.1
 | 
			
		||||
 * @see FixJpgJpgThumbnails
 | 
			
		||||
 */
 | 
			
		||||
public class FixLowQualityThumbnails {
 | 
			
		||||
    // note: static members belong to the class itself, not any one instance
 | 
			
		||||
    public static ItemService itemService = ContentServiceFactory.getInstance().getItemService();
 | 
			
		||||
    public static HandleService handleService =
 | 
			
		||||
            HandleServiceFactory.getInstance().getHandleService();
 | 
			
		||||
    public static BundleService bundleService =
 | 
			
		||||
            ContentServiceFactory.getInstance().getBundleService();
 | 
			
		||||
 | 
			
		||||
    public static void main(String[] args) {
 | 
			
		||||
        String parentHandle = null;
 | 
			
		||||
        if (args.length >= 1) {
 | 
			
		||||
            parentHandle = args[0];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Context context = null;
 | 
			
		||||
        try {
 | 
			
		||||
            context = new Context();
 | 
			
		||||
            context.turnOffAuthorisationSystem();
 | 
			
		||||
 | 
			
		||||
            if (StringUtils.isBlank(parentHandle)) {
 | 
			
		||||
                process(context, itemService.findAll(context));
 | 
			
		||||
            } else {
 | 
			
		||||
                DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
 | 
			
		||||
                if (parent != null) {
 | 
			
		||||
                    switch (parent.getType()) {
 | 
			
		||||
                        case Constants.SITE:
 | 
			
		||||
                            process(context, itemService.findAll(context));
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.COMMUNITY:
 | 
			
		||||
                            List<Collection> collections = ((Community) parent).getCollections();
 | 
			
		||||
                            for (Collection collection : collections) {
 | 
			
		||||
                                process(
 | 
			
		||||
                                        context,
 | 
			
		||||
                                        itemService.findAllByCollection(context, collection));
 | 
			
		||||
                            }
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.COLLECTION:
 | 
			
		||||
                            process(
 | 
			
		||||
                                    context,
 | 
			
		||||
                                    itemService.findByCollection(context, (Collection) parent));
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.ITEM:
 | 
			
		||||
                            processItem(context, (Item) parent);
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        } catch (SQLException | AuthorizeException | IOException e) {
 | 
			
		||||
            e.printStackTrace(System.err);
 | 
			
		||||
        } finally {
 | 
			
		||||
            if (context != null && context.isValid()) {
 | 
			
		||||
                context.abort();
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private static void process(Context context, Iterator<Item> items)
 | 
			
		||||
            throws SQLException, IOException, AuthorizeException {
 | 
			
		||||
        while (items.hasNext()) {
 | 
			
		||||
            Item item = items.next();
 | 
			
		||||
            processItem(context, item);
 | 
			
		||||
            itemService.update(context, item);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private static void processItem(Context context, Item item)
 | 
			
		||||
            throws SQLException, AuthorizeException, IOException {
 | 
			
		||||
        System.out.println("FixLowQualityThumbnails: processing item: " + item.getHandle());
 | 
			
		||||
 | 
			
		||||
        // Set some state for the item before we iterate over the THUMBNAIL bundle
 | 
			
		||||
        boolean itemHasImThumbnail = false;
 | 
			
		||||
 | 
			
		||||
        // Iterate over the THUMBNAIL bundle to first identify if this item has an "IM Thumbnail"
 | 
			
		||||
        List<Bundle> thumbnailBundles = item.getBundles("THUMBNAIL");
 | 
			
		||||
        for (Bundle thumbnailBundle : thumbnailBundles) {
 | 
			
		||||
            List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
 | 
			
		||||
            for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
			
		||||
                String thumbnailDescription = thumbnailBitstream.getDescription();
 | 
			
		||||
 | 
			
		||||
                if (StringUtils.isEmpty(thumbnailDescription)) {
 | 
			
		||||
                    continue;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                // Check if this item has a bitstream in the THUMBNAIL bundle with description "IM
 | 
			
		||||
                // Thumbnail", but only if we haven't already seen one in another iteration for this
 | 
			
		||||
                // bundle.
 | 
			
		||||
                if (!itemHasImThumbnail && "IM Thumbnail".equals(thumbnailDescription)) {
 | 
			
		||||
                    itemHasImThumbnail = true;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // If this item has an IM Thumbnail we can be reasonably sure that there is a PDF
 | 
			
		||||
            // in the ORIGINAL bundle and we don't need any other thumbnails.
 | 
			
		||||
            if (itemHasImThumbnail) {
 | 
			
		||||
                // Iterate over the bitstreams in the THUMBNAIL bundle again.
 | 
			
		||||
                for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
			
		||||
                    String thumbnailName = thumbnailBitstream.getName();
 | 
			
		||||
                    String thumbnailDescription = thumbnailBitstream.getDescription();
 | 
			
		||||
 | 
			
		||||
                    if (StringUtils.isEmpty(thumbnailDescription)) {
 | 
			
		||||
                        continue;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    // If this item has a "Generated Thumbnail" we can remove it, because those
 | 
			
		||||
                    // typically come from other JPEGs in the ORIGINAL bundle and we would prefer
 | 
			
		||||
                    // the IM Thumbnail generated from a PDF anyway. The DSpace-generated descri-
 | 
			
		||||
                    // ption will *always* be "Generated Thumbnail".
 | 
			
		||||
                    if ("Generated Thumbnail".equals(thumbnailDescription)) {
 | 
			
		||||
                        System.out.print("\u001b[33m");
 | 
			
		||||
                        System.out.println("> Action: remove old thumbnail from THUMBNAIL bundle");
 | 
			
		||||
                        System.out.println("> Name: »" + thumbnailName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + thumbnailDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
 | 
			
		||||
                        // Remove the "Generated Thumbnail" bitstream from the THUMBNAIL bundle
 | 
			
		||||
                        thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
			
		||||
 | 
			
		||||
                        // If this item has a bitstream with the word "thumbnail" in it then we can
 | 
			
		||||
                        // remove it because we already know this item has an IM Thumbnail and we
 | 
			
		||||
                        // prefer that one.
 | 
			
		||||
                    } else if (thumbnailDescription.toLowerCase().contains("thumbnail")
 | 
			
		||||
                            && !"IM Thumbnail".equals(thumbnailDescription)) {
 | 
			
		||||
                        System.out.print("\u001b[33m");
 | 
			
		||||
                        System.out.println("> Action: remove manually uploaded thumbnail from THUMBNAIL bundle");
 | 
			
		||||
                        System.out.println("> Name: »" + thumbnailName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + thumbnailDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
 | 
			
		||||
                        // Remove the "thumbnail" bitstream from the THUMBNAIL bundle
 | 
			
		||||
                        thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
			
		||||
 | 
			
		||||
                        // Otherwise skip it because it might be something uploaded manually, like
 | 
			
		||||
                        // a thumbnail for a journal or a limited access item.
 | 
			
		||||
                    } else {
 | 
			
		||||
                        System.out.print("\u001b[34m");
 | 
			
		||||
                        System.out.println("> Action: skip other thumbnail in THUMBNAIL bundle");
 | 
			
		||||
                        System.out.println("> Name: »" + thumbnailName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + thumbnailDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    // Print a blank line
 | 
			
		||||
                    System.out.println();
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Set some state before we iterate over the ORIGINAL bundle
 | 
			
		||||
        boolean itemHasOriginalPdfBitstream = false;
 | 
			
		||||
        boolean itemHasOriginalJpegBitstream = false;
 | 
			
		||||
 | 
			
		||||
        // Iterate over the ORIGINAL bundle to delete manually-uploaded JPEG
 | 
			
		||||
        // bitstreams labeled "Thumbnail" whenever we have a PDF because they
 | 
			
		||||
        // don't belong in the ORIGINAL bundle and DSpace will automatically
 | 
			
		||||
        // create a better thumbnail from the PDF anyway.
 | 
			
		||||
        List<Bundle> originalBundles = item.getBundles("ORIGINAL");
 | 
			
		||||
        for (Bundle originalBundle : originalBundles) {
 | 
			
		||||
            List<Bitstream> originalBundleBitstreams = originalBundle.getBitstreams();
 | 
			
		||||
            for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
			
		||||
                String originalFormat = originalBitstream.getFormat(context).getMIMEType();
 | 
			
		||||
 | 
			
		||||
                // Check if this item has a PDF bitstream in the ORIGINAL bundle,
 | 
			
		||||
                // but only if we haven't already seen one in another iteration
 | 
			
		||||
                // for this bundle. DSpace will return "format application/pdf"
 | 
			
		||||
                // for the MIME type.
 | 
			
		||||
                if (!itemHasOriginalPdfBitstream && originalFormat.contains("application/pdf")) {
 | 
			
		||||
                    itemHasOriginalPdfBitstream = true;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                // Check if this item has a JPEG bitstream in the ORIGINAL bundle,
 | 
			
		||||
                // but only if we haven't already seen one in another iteration
 | 
			
		||||
                // for this bundle. DSpace will return "format image/jpeg" for
 | 
			
		||||
                // the MIME type.
 | 
			
		||||
                if (!itemHasOriginalJpegBitstream && originalFormat.contains("image/jpeg")) {
 | 
			
		||||
                    itemHasOriginalJpegBitstream = true;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // Check if we found a PDF *and* a JPEG in this item's ORIGINAL
 | 
			
		||||
            // bundle.
 | 
			
		||||
            if (itemHasOriginalPdfBitstream && itemHasOriginalJpegBitstream) {
 | 
			
		||||
                // Yes! Now iterate over the bitstreams in the ORIGINAL bundle
 | 
			
		||||
                // again to see if the JPEG is a manually uploaded "Thumbnail"
 | 
			
		||||
                for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
			
		||||
                    String originalName = originalBitstream.getName();
 | 
			
		||||
                    String originalDescription = originalBitstream.getDescription();
 | 
			
		||||
                    String originalFormat = originalBitstream.getFormat(context).getMIMEType();
 | 
			
		||||
 | 
			
		||||
                    if (StringUtils.isEmpty(originalDescription)) {
 | 
			
		||||
                        continue;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    /*
 | 
			
		||||
                    - check if the bitstream is a JPEG based on its MIME Type
 | 
			
		||||
                    - check if the bitstream's name or description is "Thumbnail"
 | 
			
		||||
                    */
 | 
			
		||||
                    if (originalFormat.toLowerCase().contains("image/jpeg")
 | 
			
		||||
                            && (originalName.toLowerCase().contains("thumbnail")
 | 
			
		||||
                                    || originalDescription.toLowerCase().contains("thumbnail"))) {
 | 
			
		||||
                        System.out.print("\u001b[33m");
 | 
			
		||||
                        System.out.println("> Action: remove thumbnail from ORIGINAL bundle");
 | 
			
		||||
                        System.out.println("> Name: »" + originalName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + originalDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
 | 
			
		||||
                        // Remove the original bitstream from the ORIGINAL bundle
 | 
			
		||||
                        originalBundle.removeBitstream(originalBitstream);
 | 
			
		||||
 | 
			
		||||
                    } else {
 | 
			
		||||
                        System.out.print("\u001b[34m");
 | 
			
		||||
                        System.out.println("> Action: skip other bitstream in ORIGINAL bundle");
 | 
			
		||||
                        System.out.println("> Name: »" + originalName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + originalDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    // Print a blank line
 | 
			
		||||
                    System.out.println();
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										42
									
								
								src/main/java/io/github/ilri/cgspace/scripts/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								src/main/java/io/github/ilri/cgspace/scripts/README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,42 @@
 | 
			
		||||
# Scripts
 | 
			
		||||
Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
			
		||||
 | 
			
		||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
 | 
			
		||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
 | 
			
		||||
 | 
			
		||||
Tested on DSpace 7.6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System).
 | 
			
		||||
 | 
			
		||||
## Build and Install
 | 
			
		||||
 | 
			
		||||
### Integrate into DSpace Build
 | 
			
		||||
To use these curation tasks in a DSpace project add the following dependency to `dspace/modules/additions/pom.xml`:
 | 
			
		||||
 | 
			
		||||
```xml
 | 
			
		||||
<dependency>
 | 
			
		||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
			
		||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
			
		||||
  <version>7.6.1.4-SNAPSHOT</version>
 | 
			
		||||
</dependency>
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The jar will be copied to all DSpace applications.
 | 
			
		||||
 | 
			
		||||
### Manual Build and Install
 | 
			
		||||
To build the standalone jar:
 | 
			
		||||
 | 
			
		||||
```console
 | 
			
		||||
$ mvn package
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Copy the resulting jar to the DSpace `lib` directory:
 | 
			
		||||
 | 
			
		||||
```console
 | 
			
		||||
$ cp target/cgspace-java-helpers-7.6.1.4-SNAPSHOT.jar ~/dspace/lib/
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Invocation
 | 
			
		||||
The scripts take only one argument, which is a community, collection, or item:
 | 
			
		||||
 | 
			
		||||
```console
 | 
			
		||||
$ dspace dsrun io.github.ilri.cgspace.scripts.FixJpgJpgThumbnails 10568/83389
 | 
			
		||||
```
 | 
			
		||||
@@ -16,29 +16,15 @@
 | 
			
		||||
      "name": "Congo, The Democratic Republic of the",
 | 
			
		||||
      "numeric": "180"
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "alpha_2": "IR",
 | 
			
		||||
      "alpha_3": "IRN",
 | 
			
		||||
      "name": "Iran, Islamic Republic of",
 | 
			
		||||
      "cgspace_name": "Iran",
 | 
			
		||||
      "numeric": "364",
 | 
			
		||||
      "official_name": "Islamic Republic of Iran"
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "alpha_2": "KP",
 | 
			
		||||
      "alpha_3": "PRK",
 | 
			
		||||
      "common_name": "North Korea",
 | 
			
		||||
      "name": "Korea, Democratic People's Republic of",
 | 
			
		||||
      "cgspace_name": "Korea, DPR",
 | 
			
		||||
      "numeric": "408",
 | 
			
		||||
      "official_name": "Democratic People's Republic of Korea"
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "alpha_2": "LA",
 | 
			
		||||
      "alpha_3": "LAO",
 | 
			
		||||
      "name": "Lao People's Democratic Republic",
 | 
			
		||||
      "cgspace_name": "Laos",
 | 
			
		||||
      "numeric": "418"
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "alpha_2": "FM",
 | 
			
		||||
      "alpha_3": "FSM",
 | 
			
		||||
@@ -53,13 +39,6 @@
 | 
			
		||||
      "name": "Russian Federation",
 | 
			
		||||
      "cgspace_name": "Russia",
 | 
			
		||||
      "numeric": "643"
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "alpha_2": "SY",
 | 
			
		||||
      "alpha_3": "SYR",
 | 
			
		||||
      "name": "Syrian Arab Republic",
 | 
			
		||||
      "cgspace_name": "Syria",
 | 
			
		||||
      "numeric": "760"
 | 
			
		||||
    }
 | 
			
		||||
  ]
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user