mirror of
				https://github.com/ilri/cgspace-java-helpers.git
				synced 2025-11-03 22:29:10 +01:00 
			
		
		
		
	Compare commits
	
		
			62 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						
						
							
						
						546101bc92
	
				 | 
					
					
						|||
| 
						
						
							
						
						0a7cf7bf59
	
				 | 
					
					
						|||
| 
						
						
							
						
						8c0a8fbcd1
	
				 | 
					
					
						|||
| 
						
						
							
						
						c05a2e4f96
	
				 | 
					
					
						|||
| 
						
						
							
						
						cf2af393c0
	
				 | 
					
					
						|||
| 
						
						
							
						
						1f6ba4af67
	
				 | 
					
					
						|||
| 
						
						
							
						
						5ceaebaeae
	
				 | 
					
					
						|||
| 
						
						
							
						
						f3dcc6e261
	
				 | 
					
					
						|||
| 
						
						
							
						
						3eddbc3e22
	
				 | 
					
					
						|||
| 
						
						
							
						
						dbf59f784c
	
				 | 
					
					
						|||
| 
						
						
							
						
						0ffa4c8d37
	
				 | 
					
					
						|||
| 
						
						
							
						
						970d0c074e
	
				 | 
					
					
						|||
| 
						
						
							
						
						6b2b899957
	
				 | 
					
					
						|||
| 
						
						
							
						
						dfaa234a90
	
				 | 
					
					
						|||
| 
						
						
							
						
						f46e81b8cd
	
				 | 
					
					
						|||
| 
						
						
							
						
						dbd8721579
	
				 | 
					
					
						|||
| 
						
						
							
						
						a234b39064
	
				 | 
					
					
						|||
| 
						
						
							
						
						80a336f94d
	
				 | 
					
					
						|||
| 
						
						
							
						
						5ebf4930cf
	
				 | 
					
					
						|||
| 
						
						
							
						
						8e01595cc1
	
				 | 
					
					
						|||
| 
						
						
							
						
						8b3aac610d
	
				 | 
					
					
						|||
| 
						
						
							
						
						c2d7535d01
	
				 | 
					
					
						|||
| 
						
						
							
						
						b396fba043
	
				 | 
					
					
						|||
| 
						
						
							
						
						38a9cc5188
	
				 | 
					
					
						|||
| 
						
						
							
						
						16db38967b
	
				 | 
					
					
						|||
| 
						
						
							
						
						2604dc3cce
	
				 | 
					
					
						|||
| 
						
						
							
						
						f0754ab419
	
				 | 
					
					
						|||
| 
						
						
							
						
						6772145bec
	
				 | 
					
					
						|||
| 
						
						
							
						
						b31557aa05
	
				 | 
					
					
						|||
| 
						
						
							
						
						095f843067
	
				 | 
					
					
						|||
| 
						
						
							
						
						f7fda9922f
	
				 | 
					
					
						|||
| 
						
						
							
						
						83a416afaf
	
				 | 
					
					
						|||
| 
						
						
							
						
						922e3892a7
	
				 | 
					
					
						|||
| 
						
						
							
						
						6b648c2c85
	
				 | 
					
					
						|||
| 
						
						
							
						
						781ddcd931
	
				 | 
					
					
						|||
| 
						
						
							
						
						49cb8e3468
	
				 | 
					
					
						|||
| 
						
						
							
						
						3aa1503163
	
				 | 
					
					
						|||
| 
						
						
							
						
						26597e2f8f
	
				 | 
					
					
						|||
| 
						
						
							
						
						1497ebb476
	
				 | 
					
					
						|||
| 
						
						
							
						
						b2027e3e44
	
				 | 
					
					
						|||
| 
						
						
							
						
						26eaa2d94f
	
				 | 
					
					
						|||
| e3b95f6a30 | |||
| 
						
						
							
						
						5e545e37e2
	
				 | 
					
					
						|||
| 
						
						
							
						
						3f711db1b2
	
				 | 
					
					
						|||
| 
						
						
							
						
						112cb8a133
	
				 | 
					
					
						|||
| 
						
						
							
						
						4d59c1a00d
	
				 | 
					
					
						|||
| 
						
						
							
						
						2e779efb14
	
				 | 
					
					
						|||
| 
						
						
							
						
						735e759033
	
				 | 
					
					
						|||
| 
						
						
							
						
						271a9ce970
	
				 | 
					
					
						|||
| 
						
						
							
						
						6b72ddefc1
	
				 | 
					
					
						|||
| 
						
						
							
						
						cea97aebe5
	
				 | 
					
					
						|||
| 
						
						
							
						
						4bc7971ecb
	
				 | 
					
					
						|||
| 
						
						
							
						
						197aad0124
	
				 | 
					
					
						|||
| 
						
						
							
						
						da1ecad238
	
				 | 
					
					
						|||
| 
						
						
							
						
						307480f249
	
				 | 
					
					
						|||
| 
						
						
							
						
						4698b6eb38
	
				 | 
					
					
						|||
| 
						
						
							
						
						f1629f65fe
	
				 | 
					
					
						|||
| 
						
						
							
						
						29f6aff35e
	
				 | 
					
					
						|||
| 
						
						
							
						
						9bf487a336
	
				 | 
					
					
						|||
| 
						
						
							
						
						f50357b7cc
	
				 | 
					
					
						|||
| 
						
						
							
						
						f3ab89f7a1
	
				 | 
					
					
						|||
| 
						
						
							
						
						5a467f92e0
	
				 | 
					
					
						
							
								
								
									
										26
									
								
								.github/workflows/maven.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								.github/workflows/maven.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,26 @@
 | 
			
		||||
# This workflow will build a Java project with Maven
 | 
			
		||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
 | 
			
		||||
 | 
			
		||||
name: Build
 | 
			
		||||
 | 
			
		||||
on:
 | 
			
		||||
  push:
 | 
			
		||||
    branches: [ dspace6 ]
 | 
			
		||||
  pull_request:
 | 
			
		||||
    branches: [ dspace6 ]
 | 
			
		||||
 | 
			
		||||
jobs:
 | 
			
		||||
  build:
 | 
			
		||||
 | 
			
		||||
    runs-on: ubuntu-22.04
 | 
			
		||||
 | 
			
		||||
    steps:
 | 
			
		||||
    - uses: actions/checkout@v3
 | 
			
		||||
    - name: Set up JDK 8
 | 
			
		||||
      uses: actions/setup-java@v3
 | 
			
		||||
      with:
 | 
			
		||||
        java-version: 8
 | 
			
		||||
        distribution: 'temurin'
 | 
			
		||||
        cache: 'maven'
 | 
			
		||||
    - name: Build with Maven
 | 
			
		||||
      run: mvn -B package --file pom.xml
 | 
			
		||||
							
								
								
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -70,4 +70,10 @@ fabric.properties
 | 
			
		||||
# Android studio 3.1+ serialized cache file
 | 
			
		||||
.idea/caches/build_file_checksums.ser
 | 
			
		||||
 | 
			
		||||
# VS Code settings
 | 
			
		||||
.vscode
 | 
			
		||||
 | 
			
		||||
# asdf-vm tool-versions file
 | 
			
		||||
.tool-versions
 | 
			
		||||
 | 
			
		||||
target/
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +0,0 @@
 | 
			
		||||
dist: bionic
 | 
			
		||||
language: java
 | 
			
		||||
jdk:
 | 
			
		||||
  - openjdk8
 | 
			
		||||
script:
 | 
			
		||||
  - mvn package -B
 | 
			
		||||
 | 
			
		||||
# vim: ts=2 sw=2 et
 | 
			
		||||
							
								
								
									
										27
									
								
								CHANGELOG.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								CHANGELOG.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,27 @@
 | 
			
		||||
# Changelog
 | 
			
		||||
All notable changes to this project will be documented in this file.
 | 
			
		||||
 | 
			
		||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 | 
			
		||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 | 
			
		||||
 | 
			
		||||
## Unreleased
 | 
			
		||||
### Updated
 | 
			
		||||
- `iso_3166-1.json` from iso-codes 4.13.0-SNAPSHOT, which [adds common names for Iran, Laos, and Syria](https://salsa.debian.org/iso-codes-team/iso-codes/-/merge_requests/32)
 | 
			
		||||
 | 
			
		||||
## [6.2] - 2023-02-20
 | 
			
		||||
### Updated
 | 
			
		||||
- `iso_3166-1.json` from iso-codes 4.12.0, which updates the name for TR to "Türkiye"
 | 
			
		||||
 | 
			
		||||
## [6.1] - 2022-10-31
 | 
			
		||||
### Updated
 | 
			
		||||
- Update dependencies in `pom.xml`
 | 
			
		||||
- `iso_3166-1.json` from iso-codes 4.11.0
 | 
			
		||||
 | 
			
		||||
### Changed
 | 
			
		||||
- Java compiler and target from JDK 7 to JDK 8
 | 
			
		||||
 | 
			
		||||
### Added
 | 
			
		||||
- New `FixLowQualityThumbnails` script to detect and remove more low-quality thumbnails
 | 
			
		||||
 | 
			
		||||
### Fixed
 | 
			
		||||
- `FixJpgJpgThumbnails` and `FixLowQualityThumbnails` scripts not commiting changes when operating on a site, community, or collection
 | 
			
		||||
							
								
								
									
										61
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										61
									
								
								README.md
									
									
									
									
									
								
							@@ -1,10 +1,11 @@
 | 
			
		||||
# CGSpace Java Helpers [](https://travis-ci.org/ilri/dspace-curation-tasks)
 | 
			
		||||
# CGSpace Java Helpers [](https://github.com/ilri/cgspace-java-helpers/actions)
 | 
			
		||||
DSpace curation tasks and other Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
			
		||||
 | 
			
		||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
 | 
			
		||||
- **FixJpgJpgThumbnails**: Fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
 | 
			
		||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
 | 
			
		||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
 | 
			
		||||
 | 
			
		||||
Tested on DSpace 5.8. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
 | 
			
		||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC6x/Curation+System).
 | 
			
		||||
 | 
			
		||||
## Build and Install
 | 
			
		||||
 | 
			
		||||
@@ -15,7 +16,7 @@ To use these curation tasks in a DSpace project add the following dependency to
 | 
			
		||||
<dependency>
 | 
			
		||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
			
		||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
			
		||||
  <version>5.1</version>
 | 
			
		||||
  <version>6.2-SNAPSHOT</version>
 | 
			
		||||
</dependency>
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
@@ -24,49 +25,27 @@ The jar will be copied to all DSpace applications.
 | 
			
		||||
### Manual Build and Install
 | 
			
		||||
To build the standalone jar:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
```console
 | 
			
		||||
$ mvn package
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Copy the resulting jar to the DSpace `lib` directory:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ cp target/cgspace-java-helpers-5.1.jar ~/dspace/lib
 | 
			
		||||
```console
 | 
			
		||||
$ cp target/cgspace-java-helpers-6.2-SNAPSHOT.jar ~/dspace/lib/
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Configuration
 | 
			
		||||
Add the curation task to DSpace's `config/modules/curate.cfg`:
 | 
			
		||||
Please refer to the appropriate README.md file:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
plugin.named.org.dspace.curate.CurationTask = \
 | 
			
		||||
...
 | 
			
		||||
    io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger \
 | 
			
		||||
    io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
 | 
			
		||||
```
 | 
			
		||||
- Curation Tasks: [src/main/java/io/github/ilri/cgspace/ctasks/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace6/src/main/java/io/github/ilri/cgspace/ctasks/README.md)
 | 
			
		||||
- Scripts: [src/main/java/io/github/ilri/cgspace/scripts/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace6/src/main/java/io/github/ilri/cgspace/scripts/README.md)
 | 
			
		||||
 | 
			
		||||
And then add a configuration file for the task in `config/modules/countrycodetagger.cfg`:
 | 
			
		||||
## TODO
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
# name of the field containing ISO 3166-1 country names
 | 
			
		||||
iso3166.field = cg.coverage.country
 | 
			
		||||
 | 
			
		||||
# name of the field containing ISO 3166-1 Alpha2 country codes
 | 
			
		||||
iso3166-alpha2.field = cg.coverage.iso3166-alpha2
 | 
			
		||||
 | 
			
		||||
# only add country codes if an item doesn't have any (default false)
 | 
			
		||||
#forceupdate = false
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
*Note*: DSpace's curation system supports "profiles" where you can use the same task with different options, for example above I have a normal country code tagger and a "force" variant. To use the "force" variant you create a new configuration file with the overridden options in `config/modules/countrycodetagger.force.cfg`. The "force" profile clears all existing country codes and updates everything.
 | 
			
		||||
 | 
			
		||||
## Invocation
 | 
			
		||||
Once the jar is installed and you have added appropriate configuration in `~/dspace/config/modules`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ~/dspace/bin/dspace curate -t countrycodetagger -i 10568/3 -r - -l 500 -s object
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
*Note*: it is very important to set the cache limit (`-l`) and the database transaction scope to something sensible (`object`) if you're curating a community or collection with more than a few hundred items.
 | 
			
		||||
- Add a curation task to normalize DOIs to "https://doi.org" format
 | 
			
		||||
- Migrate from maven-deploy-plugin to nexus-staging-maven-plugin, see: https://central.sonatype.org/publish/publish-maven/#nexus-staging-maven-plugin-for-deployment-and-release
 | 
			
		||||
- Stop using oss-parent, see: https://central.sonatype.org/publish/publish-maven/#create-a-ticket-with-sonatype
 | 
			
		||||
 | 
			
		||||
## Notes
 | 
			
		||||
This project was initially created according to the [Maven Getting Started Guide](https://maven.apache.org/guides/getting-started/):
 | 
			
		||||
@@ -75,11 +54,13 @@ This project was initially created according to the [Maven Getting Started Guide
 | 
			
		||||
$ mvn -B archetype:generate -DgroupId=io.github.ilri.cgspace -DartifactId=cgspace-java-helpers -DarchetypeArtifactId=maven-archetype-quickstart -DarchetypeVersion=1.4
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## TODO
 | 
			
		||||
To deploy a new `-SNAPSHOT` release to Maven Central (make sure OSSHRH credentials are in `~/.m2/settings.xml`):
 | 
			
		||||
 | 
			
		||||
- Make sure this doesn't work on items in the workflow
 | 
			
		||||
- Check for existence of metadata field before trying to add metadata
 | 
			
		||||
- Add tests
 | 
			
		||||
```console
 | 
			
		||||
$ mvn clean deploy
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
See: <a href="https://central.sonatype.org/publish/publish-maven/#performing-a-snapshot-deployment">Performing a Snapshot Deployment</a>
 | 
			
		||||
 | 
			
		||||
## License
 | 
			
		||||
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										34
									
								
								pom.xml
									
									
									
									
									
								
							
							
						
						
									
										34
									
								
								pom.xml
									
									
									
									
									
								
							@@ -6,7 +6,7 @@
 | 
			
		||||
 | 
			
		||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
			
		||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
			
		||||
  <version>5.2</version>
 | 
			
		||||
  <version>6.2-SNAPSHOT</version>
 | 
			
		||||
 | 
			
		||||
  <name>cgspace-java-helpers</name>
 | 
			
		||||
  <url>https://github.com/ilri/cgspace-java-helpers</url>
 | 
			
		||||
@@ -28,26 +28,20 @@
 | 
			
		||||
 | 
			
		||||
  <properties>
 | 
			
		||||
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 | 
			
		||||
    <maven.compiler.source>1.7</maven.compiler.source>
 | 
			
		||||
    <maven.compiler.target>1.7</maven.compiler.target>
 | 
			
		||||
    <maven.compiler.source>1.8</maven.compiler.source>
 | 
			
		||||
    <maven.compiler.target>1.8</maven.compiler.target>
 | 
			
		||||
  </properties>
 | 
			
		||||
 | 
			
		||||
  <dependencies>
 | 
			
		||||
    <dependency>
 | 
			
		||||
      <groupId>junit</groupId>
 | 
			
		||||
      <artifactId>junit</artifactId>
 | 
			
		||||
      <version>4.11</version>
 | 
			
		||||
      <scope>test</scope>
 | 
			
		||||
    </dependency>
 | 
			
		||||
    <dependency>
 | 
			
		||||
      <groupId>com.google.code.gson</groupId>
 | 
			
		||||
      <artifactId>gson</artifactId>
 | 
			
		||||
      <version>2.2.1</version>
 | 
			
		||||
      <version>2.9.1</version>
 | 
			
		||||
    </dependency>
 | 
			
		||||
    <dependency>
 | 
			
		||||
      <groupId>org.dspace</groupId>
 | 
			
		||||
      <artifactId>dspace-api</artifactId>
 | 
			
		||||
      <version>5.8</version>
 | 
			
		||||
      <version>6.3</version>
 | 
			
		||||
      <scope>provided</scope>
 | 
			
		||||
    </dependency>
 | 
			
		||||
  </dependencies>
 | 
			
		||||
@@ -75,41 +69,41 @@
 | 
			
		||||
        <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-clean-plugin</artifactId>
 | 
			
		||||
          <version>3.1.0</version>
 | 
			
		||||
          <version>3.2.0</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-resources-plugin</artifactId>
 | 
			
		||||
          <version>3.0.2</version>
 | 
			
		||||
          <version>3.3.0</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-compiler-plugin</artifactId>
 | 
			
		||||
          <version>3.8.0</version>
 | 
			
		||||
          <version>3.10.1</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-surefire-plugin</artifactId>
 | 
			
		||||
          <version>2.22.1</version>
 | 
			
		||||
          <version>3.0.0-M7</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-jar-plugin</artifactId>
 | 
			
		||||
          <version>3.0.2</version>
 | 
			
		||||
          <version>3.3.0</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-install-plugin</artifactId>
 | 
			
		||||
          <version>2.5.2</version>
 | 
			
		||||
          <version>3.0.1</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-deploy-plugin</artifactId>
 | 
			
		||||
          <version>2.8.2</version>
 | 
			
		||||
          <version>3.0.0</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-site-plugin</artifactId>
 | 
			
		||||
          <version>3.7.1</version>
 | 
			
		||||
          <version>3.12.1</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-project-info-reports-plugin</artifactId>
 | 
			
		||||
          <version>3.0.0</version>
 | 
			
		||||
          <version>3.4.1</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
      </plugins>
 | 
			
		||||
    </pluginManagement>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,20 +1,8 @@
 | 
			
		||||
/*
 | 
			
		||||
DSpace Curation Tasks
 | 
			
		||||
Copyright (C) 2020  Alan Orth
 | 
			
		||||
 | 
			
		||||
This program is free software: you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License
 | 
			
		||||
along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
 * Copyright (C) 2020 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,20 +1,8 @@
 | 
			
		||||
/*
 | 
			
		||||
    DSpace Curation Tasks
 | 
			
		||||
    Copyright (C) 2020  Alan Orth
 | 
			
		||||
 | 
			
		||||
    This program is free software: you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License
 | 
			
		||||
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
 * Copyright (C) 2020 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
 | 
			
		||||
@@ -23,15 +11,16 @@ import javax.annotation.Nullable;
 | 
			
		||||
public class CountriesVocabulary {
 | 
			
		||||
 | 
			
		||||
    class Country {
 | 
			
		||||
        private String name;            //required
 | 
			
		||||
        private String common_name;     //optional
 | 
			
		||||
        private String official_name;   //optional
 | 
			
		||||
        private String cgspace_name;    //optional
 | 
			
		||||
        private String numeric;         //required Hmmmm need to cast this...
 | 
			
		||||
        private String alpha_2;         //required
 | 
			
		||||
        private String alpha_3;         //required
 | 
			
		||||
        private String name; // required
 | 
			
		||||
        private String common_name; // optional
 | 
			
		||||
        private String official_name; // optional
 | 
			
		||||
        private String cgspace_name; // optional
 | 
			
		||||
        private String numeric; // required Hmmmm need to cast this...
 | 
			
		||||
        private String alpha_2; // required
 | 
			
		||||
        private String alpha_3; // required
 | 
			
		||||
 | 
			
		||||
        public Country(String name,
 | 
			
		||||
        public Country(
 | 
			
		||||
                String name,
 | 
			
		||||
                @Nullable String common_name,
 | 
			
		||||
                @Nullable String official_name,
 | 
			
		||||
                @Nullable String cgspace_name,
 | 
			
		||||
@@ -42,7 +31,9 @@ public class CountriesVocabulary {
 | 
			
		||||
            this.common_name = common_name;
 | 
			
		||||
            this.official_name = official_name;
 | 
			
		||||
            this.cgspace_name = cgspace_name;
 | 
			
		||||
            this.numeric = numeric; // fuuuuu this is a string and we can't cast to Integer because some values are zeropadded like "004"
 | 
			
		||||
            this.numeric =
 | 
			
		||||
                    numeric; // fuuuuu this is a string and we can't cast to Integer because some
 | 
			
		||||
                             // values are zeropadded like "004"
 | 
			
		||||
            this.alpha_2 = alpha_2;
 | 
			
		||||
            this.alpha_3 = alpha_3;
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,29 +1,18 @@
 | 
			
		||||
/*
 | 
			
		||||
    DSpace Curation Tasks
 | 
			
		||||
    Copyright (C) 2020  Alan Orth
 | 
			
		||||
 | 
			
		||||
    This program is free software: you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License
 | 
			
		||||
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
 * Copyright (C) 2020 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
 | 
			
		||||
import com.google.gson.Gson;
 | 
			
		||||
 | 
			
		||||
import org.apache.log4j.Logger;
 | 
			
		||||
import org.dspace.authorize.AuthorizeException;
 | 
			
		||||
import org.dspace.content.DSpaceObject;
 | 
			
		||||
import org.dspace.content.Item;
 | 
			
		||||
import org.dspace.content.Metadatum;
 | 
			
		||||
import org.dspace.content.MetadataValue;
 | 
			
		||||
import org.dspace.core.Constants;
 | 
			
		||||
import org.dspace.curate.AbstractCurationTask;
 | 
			
		||||
import org.dspace.curate.Curator;
 | 
			
		||||
@@ -35,11 +24,11 @@ import java.sql.SQLException;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
public class CountryCodeTagger extends AbstractCurationTask
 | 
			
		||||
{
 | 
			
		||||
public class CountryCodeTagger extends AbstractCurationTask {
 | 
			
		||||
    public class CountryCodeTaggerConfig {
 | 
			
		||||
        private final String isocodesJsonPath = "/io/github/ilri/cgspace/ctasks/iso_3166-1.json";
 | 
			
		||||
        private final String cgspaceCountriesJsonPath = "/io/github/ilri/cgspace/ctasks/cgspace-countries.json";
 | 
			
		||||
        private final String cgspaceCountriesJsonPath =
 | 
			
		||||
                "/io/github/ilri/cgspace/ctasks/cgspace-countries.json";
 | 
			
		||||
        private final String iso3166Field = taskProperty("iso3166.field");
 | 
			
		||||
        private final String iso3166Alpha2Field = taskProperty("iso3166-alpha2.field");
 | 
			
		||||
        private final boolean forceupdate = taskBooleanProperty("forceupdate", false);
 | 
			
		||||
@@ -69,19 +58,21 @@ public class CountryCodeTagger extends AbstractCurationTask
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Override
 | 
			
		||||
    public int perform(DSpaceObject dso) throws IOException
 | 
			
		||||
    {
 | 
			
		||||
    public int perform(DSpaceObject dso) throws IOException {
 | 
			
		||||
        // gotta define this here so we can access it after the if context...
 | 
			
		||||
        CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
 | 
			
		||||
 | 
			
		||||
		if (dso.getType() == Constants.ITEM)
 | 
			
		||||
        {
 | 
			
		||||
        if (dso.getType() == Constants.ITEM) {
 | 
			
		||||
            // Load configuration
 | 
			
		||||
            CountryCodeTaggerConfig config = new CountryCodeTaggerConfig();
 | 
			
		||||
 | 
			
		||||
            Item item = (Item)dso;
 | 
			
		||||
            Item item = (Item) dso;
 | 
			
		||||
 | 
			
		||||
            try {
 | 
			
		||||
                alpha2Result = performAlpha2(item, config);
 | 
			
		||||
            } catch (SQLException throwables) {
 | 
			
		||||
                throwables.printStackTrace();
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            setResult(alpha2Result.getResult());
 | 
			
		||||
            report(alpha2Result.getResult());
 | 
			
		||||
@@ -90,52 +81,77 @@ public class CountryCodeTagger extends AbstractCurationTask
 | 
			
		||||
        return alpha2Result.getStatus();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config) throws IOException
 | 
			
		||||
    {
 | 
			
		||||
    public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config)
 | 
			
		||||
            throws IOException, SQLException {
 | 
			
		||||
        CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
 | 
			
		||||
        String itemHandle = item.getHandle();
 | 
			
		||||
 | 
			
		||||
        Metadatum[] itemCountries = item.getMetadataByMetadataString(config.iso3166Field);
 | 
			
		||||
        List<MetadataValue> itemCountries =
 | 
			
		||||
                itemService.getMetadataByMetadataString(item, config.iso3166Field);
 | 
			
		||||
 | 
			
		||||
        // skip items that don't have country metadata
 | 
			
		||||
        if (itemCountries.length == 0) {
 | 
			
		||||
        if (itemCountries.size() == 0) {
 | 
			
		||||
            alpha2Result.setResult(itemHandle + ": no countries, skipping.");
 | 
			
		||||
            alpha2Result.setStatus(Curator.CURATE_SKIP);
 | 
			
		||||
        } else {
 | 
			
		||||
            Gson gson = new Gson();
 | 
			
		||||
 | 
			
		||||
            // TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
 | 
			
		||||
            BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(config.isocodesJsonPath)));
 | 
			
		||||
            ISO3166CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, ISO3166CountriesVocabulary.class);
 | 
			
		||||
            // TODO: convert to try:
 | 
			
		||||
            // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
 | 
			
		||||
            BufferedReader reader =
 | 
			
		||||
                    new BufferedReader(
 | 
			
		||||
                            new InputStreamReader(
 | 
			
		||||
                                    this.getClass().getResourceAsStream(config.isocodesJsonPath)));
 | 
			
		||||
            ISO3166CountriesVocabulary isocodesCountriesJson =
 | 
			
		||||
                    gson.fromJson(reader, ISO3166CountriesVocabulary.class);
 | 
			
		||||
            reader.close();
 | 
			
		||||
 | 
			
		||||
            reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(config.cgspaceCountriesJsonPath)));
 | 
			
		||||
            CGSpaceCountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
 | 
			
		||||
            reader =
 | 
			
		||||
                    new BufferedReader(
 | 
			
		||||
                            new InputStreamReader(
 | 
			
		||||
                                    this.getClass()
 | 
			
		||||
                                            .getResourceAsStream(config.cgspaceCountriesJsonPath)));
 | 
			
		||||
            CGSpaceCountriesVocabulary cgspaceCountriesJson =
 | 
			
		||||
                    gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
 | 
			
		||||
            reader.close();
 | 
			
		||||
 | 
			
		||||
            // split the alpha2 country code field into schema, element, and qualifier so we can use it with item.addMetadata()
 | 
			
		||||
            // split the alpha2 country code field into schema, element, and qualifier so we can use
 | 
			
		||||
            // it with item.addMetadata()
 | 
			
		||||
            String[] iso3166Alpha2FieldParts = config.iso3166Alpha2Field.split("\\.");
 | 
			
		||||
 | 
			
		||||
            if (config.forceupdate) {
 | 
			
		||||
                item.clearMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], Item.ANY);
 | 
			
		||||
                itemService.clearMetadata(
 | 
			
		||||
                        Curator.curationContext(),
 | 
			
		||||
                        item,
 | 
			
		||||
                        iso3166Alpha2FieldParts[0],
 | 
			
		||||
                        iso3166Alpha2FieldParts[1],
 | 
			
		||||
                        iso3166Alpha2FieldParts[2],
 | 
			
		||||
                        Item.ANY);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // check the item's country codes, if any
 | 
			
		||||
            Metadatum[] itemAlpha2CountryCodes = item.getMetadataByMetadataString(config.iso3166Alpha2Field);
 | 
			
		||||
            List<MetadataValue> itemAlpha2CountryCodes =
 | 
			
		||||
                    itemService.getMetadataByMetadataString(item, config.iso3166Alpha2Field);
 | 
			
		||||
 | 
			
		||||
            if (itemAlpha2CountryCodes.length == 0) {
 | 
			
		||||
            if (itemAlpha2CountryCodes.size() == 0) {
 | 
			
		||||
                List<String> newAlpha2Codes = new ArrayList<String>();
 | 
			
		||||
                for (Metadatum itemCountry : itemCountries) {
 | 
			
		||||
                    //check ISO 3166-1 countries
 | 
			
		||||
                for (MetadataValue itemCountry : itemCountries) {
 | 
			
		||||
                    // check ISO 3166-1 countries
 | 
			
		||||
                    for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
 | 
			
		||||
                        if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) {
 | 
			
		||||
                        if (itemCountry.getValue().equalsIgnoreCase(country.getName())
 | 
			
		||||
                                || itemCountry
 | 
			
		||||
                                        .getValue()
 | 
			
		||||
                                        .equalsIgnoreCase(country.get_official_name())
 | 
			
		||||
                                || itemCountry
 | 
			
		||||
                                        .getValue()
 | 
			
		||||
                                        .equalsIgnoreCase(country.get_common_name())) {
 | 
			
		||||
                            newAlpha2Codes.add(country.getAlpha_2());
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    //check CGSpace countries
 | 
			
		||||
                    // check CGSpace countries
 | 
			
		||||
                    for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) {
 | 
			
		||||
                        if (itemCountry.value.equalsIgnoreCase(country.getCgspace_name())) {
 | 
			
		||||
                        if (itemCountry.getValue().equalsIgnoreCase(country.getCgspace_name())) {
 | 
			
		||||
                            newAlpha2Codes.add(country.getAlpha_2());
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
@@ -143,16 +159,26 @@ public class CountryCodeTagger extends AbstractCurationTask
 | 
			
		||||
 | 
			
		||||
                if (newAlpha2Codes.size() > 0) {
 | 
			
		||||
                    try {
 | 
			
		||||
                        // add metadata values (casting the List<String> to an array)
 | 
			
		||||
                        item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", newAlpha2Codes.toArray(new String[0]));
 | 
			
		||||
                        item.update();
 | 
			
		||||
                        itemService.addMetadata(
 | 
			
		||||
                                Curator.curationContext(),
 | 
			
		||||
                                item,
 | 
			
		||||
                                iso3166Alpha2FieldParts[0],
 | 
			
		||||
                                iso3166Alpha2FieldParts[1],
 | 
			
		||||
                                iso3166Alpha2FieldParts[2],
 | 
			
		||||
                                "en_US",
 | 
			
		||||
                                newAlpha2Codes);
 | 
			
		||||
                        itemService.update(Curator.curationContext(), item);
 | 
			
		||||
                    } catch (SQLException | AuthorizeException sqle) {
 | 
			
		||||
                        config.log.debug(sqle.getMessage());
 | 
			
		||||
                        alpha2Result.setResult(itemHandle + ": error");
 | 
			
		||||
                        alpha2Result.setStatus(Curator.CURATE_ERROR);
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    alpha2Result.setResult(itemHandle + ": added " + newAlpha2Codes.size() + " alpha2 country code(s)");
 | 
			
		||||
                    alpha2Result.setResult(
 | 
			
		||||
                            itemHandle
 | 
			
		||||
                                    + ": added "
 | 
			
		||||
                                    + newAlpha2Codes.size()
 | 
			
		||||
                                    + " alpha2 country code(s)");
 | 
			
		||||
                } else {
 | 
			
		||||
                    alpha2Result.setResult(itemHandle + ": no matching countries found");
 | 
			
		||||
                }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,27 +1,18 @@
 | 
			
		||||
/*
 | 
			
		||||
DSpace Curation Tasks
 | 
			
		||||
Copyright (C) 2020  Alan Orth
 | 
			
		||||
 | 
			
		||||
This program is free software: you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License
 | 
			
		||||
along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
 * Copyright (C) 2020 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
 | 
			
		||||
import com.google.gson.annotations.SerializedName;
 | 
			
		||||
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
public class ISO3166CountriesVocabulary extends CountriesVocabulary {
 | 
			
		||||
    // support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly
 | 
			
		||||
    @SerializedName("3166-1") List<Country> countries;
 | 
			
		||||
    // support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since
 | 
			
		||||
    // our class needs to match the JSON exactly
 | 
			
		||||
    @SerializedName("3166-1")
 | 
			
		||||
    List<Country> countries;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										72
									
								
								src/main/java/io/github/ilri/cgspace/ctasks/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								src/main/java/io/github/ilri/cgspace/ctasks/README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,72 @@
 | 
			
		||||
# Curation Tasks
 | 
			
		||||
DSpace curation tasks used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
			
		||||
 | 
			
		||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
 | 
			
		||||
 | 
			
		||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
 | 
			
		||||
 | 
			
		||||
## Build and Install
 | 
			
		||||
 | 
			
		||||
### Integrate into DSpace Build
 | 
			
		||||
To use these curation tasks in a DSpace project add the following dependency to `dspace/modules/additions/pom.xml`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
<dependency>
 | 
			
		||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
			
		||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
			
		||||
  <version>6.2-SNAPSHOT</version>
 | 
			
		||||
</dependency>
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The jar will be copied to all DSpace applications.
 | 
			
		||||
 | 
			
		||||
### Manual Build and Install
 | 
			
		||||
To build the standalone jar:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ mvn package
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Copy the resulting jar to the DSpace `lib` directory:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ cp target/cgspace-java-helpers-6.2-SNAPSHOT.jar ~/dspace/lib/
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Configuration
 | 
			
		||||
Add the curation task to DSpace's `config/modules/curate.cfg`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger
 | 
			
		||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
And then add the following variables to your `local.cfg` or some other [configuration file that is included](https://wiki.lyrasis.org/display/DSDOC6x/Configuration+Reference#ConfigurationReference-IncludingotherPropertyFiles):
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
# name of the field containing ISO 3166-1 country names
 | 
			
		||||
countrycodetagger.iso3166.field = cg.coverage.country
 | 
			
		||||
 | 
			
		||||
# name of the field containing ISO 3166-1 Alpha2 country codes
 | 
			
		||||
countrycodetagger.iso3166-alpha2.field = cg.coverage.iso3166-alpha2
 | 
			
		||||
 | 
			
		||||
# only add country codes if an item doesn't have any (default false)
 | 
			
		||||
#countrycodetagger.forceupdate = false
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
*Note*: DSpace's curation system supports "profiles" where you can use the same task with different options, for example above I have a normal country code tagger task and a "force" variant. The "force" variant is the same task, but it looks for configuration variables using the `countrycodetagger.force` instead. To use the "force" variant you simply need to add these new variables with the `forceupdate` parameter overridden to the same configuration file where you put the other variables. The "force" profile clears all existing country codes and updates everything.
 | 
			
		||||
 | 
			
		||||
## Invocation
 | 
			
		||||
Once the jar is installed and you have added appropriate configuration in `~/dspace/config/modules`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ~/dspace/bin/dspace curate -t countrycodetagger -i 10568/3 -r - -s object
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
*Note*: it is very important to set the database transaction scope to something sensible (`object`) if you're curating a community or collection with more than a few hundred items.
 | 
			
		||||
 | 
			
		||||
## TODO
 | 
			
		||||
 | 
			
		||||
- Make sure this doesn't work on items in the workflow
 | 
			
		||||
- Check for existence of metadata field before trying to add metadata
 | 
			
		||||
- Add tests
 | 
			
		||||
@@ -1,22 +1,46 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (C) 2020 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.scripts;
 | 
			
		||||
 | 
			
		||||
import org.apache.commons.lang.StringUtils;
 | 
			
		||||
import org.dspace.authorize.AuthorizeException;
 | 
			
		||||
import org.dspace.content.*;
 | 
			
		||||
import org.dspace.content.Bitstream;
 | 
			
		||||
import org.dspace.content.Bundle;
 | 
			
		||||
import org.dspace.content.Collection;
 | 
			
		||||
import org.dspace.content.Community;
 | 
			
		||||
import org.dspace.content.DSpaceObject;
 | 
			
		||||
import org.dspace.content.Item;
 | 
			
		||||
import org.dspace.content.MetadataValue;
 | 
			
		||||
import org.dspace.content.factory.ContentServiceFactory;
 | 
			
		||||
import org.dspace.content.service.BundleService;
 | 
			
		||||
import org.dspace.content.service.ItemService;
 | 
			
		||||
import org.dspace.core.Constants;
 | 
			
		||||
import org.dspace.core.Context;
 | 
			
		||||
import org.dspace.handle.HandleManager;
 | 
			
		||||
import org.dspace.handle.factory.HandleServiceFactory;
 | 
			
		||||
import org.dspace.handle.service.HandleService;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.sql.SQLException;
 | 
			
		||||
import java.util.Iterator;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories
 | 
			
		||||
 * @author Alan Orth for the International Livestock Research Institute
 | 
			
		||||
 * @version 5.1-SNAPSHOT
 | 
			
		||||
 * @since 5.1-SNAPSHOT
 | 
			
		||||
 * @version 6.1
 | 
			
		||||
 * @since 5.1
 | 
			
		||||
 */
 | 
			
		||||
public class FixJpgJpgThumbnails {
 | 
			
		||||
    // note: static members belong to the class itself, not any one instance
 | 
			
		||||
    public static ItemService itemService = ContentServiceFactory.getInstance().getItemService();
 | 
			
		||||
    public static HandleService handleService =
 | 
			
		||||
            HandleServiceFactory.getInstance().getHandleService();
 | 
			
		||||
    public static BundleService bundleService =
 | 
			
		||||
            ContentServiceFactory.getInstance().getBundleService();
 | 
			
		||||
 | 
			
		||||
    public static void main(String[] args) {
 | 
			
		||||
        String parentHandle = null;
 | 
			
		||||
@@ -30,25 +54,32 @@ public class FixJpgJpgThumbnails {
 | 
			
		||||
            context.turnOffAuthorisationSystem();
 | 
			
		||||
 | 
			
		||||
            if (StringUtils.isBlank(parentHandle)) {
 | 
			
		||||
				process(context, Item.findAll(context));
 | 
			
		||||
                process(context, itemService.findAll(context));
 | 
			
		||||
            } else {
 | 
			
		||||
				DSpaceObject parent = HandleManager.resolveToObject(context, parentHandle);
 | 
			
		||||
                DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
 | 
			
		||||
                if (parent != null) {
 | 
			
		||||
                    switch (parent.getType()) {
 | 
			
		||||
						case Constants.COLLECTION:
 | 
			
		||||
							process(context, ((Collection) parent).getAllItems()); // getAllItems because we want to work on non-archived ones as well
 | 
			
		||||
                        case Constants.SITE:
 | 
			
		||||
                            process(context, itemService.findAll(context));
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.COMMUNITY:
 | 
			
		||||
							Collection[] collections = ((Community) parent).getCollections();
 | 
			
		||||
                            List<Collection> collections = ((Community) parent).getCollections();
 | 
			
		||||
                            for (Collection collection : collections) {
 | 
			
		||||
								process(context, collection.getAllItems()); // getAllItems because we want to work on non-archived ones as well
 | 
			
		||||
                                process(
 | 
			
		||||
                                        context,
 | 
			
		||||
                                        itemService.findAllByCollection(context, collection));
 | 
			
		||||
                            }
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
						case Constants.SITE:
 | 
			
		||||
							process(context, Item.findAll(context));
 | 
			
		||||
                        case Constants.COLLECTION:
 | 
			
		||||
                            process(
 | 
			
		||||
                                    context,
 | 
			
		||||
                                    itemService.findByCollection(context, (Collection) parent));
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.ITEM:
 | 
			
		||||
							processItem((Item) parent);
 | 
			
		||||
                            processItem(context, (Item) parent);
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                    }
 | 
			
		||||
@@ -63,39 +94,77 @@ public class FixJpgJpgThumbnails {
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
	private static void process(Context context, ItemIterator items) throws SQLException, IOException, AuthorizeException {
 | 
			
		||||
    private static void process(Context context, Iterator<Item> items)
 | 
			
		||||
            throws SQLException, IOException, AuthorizeException {
 | 
			
		||||
        while (items.hasNext()) {
 | 
			
		||||
            Item item = items.next();
 | 
			
		||||
			processItem(item);
 | 
			
		||||
			context.commit();
 | 
			
		||||
			item.decache();
 | 
			
		||||
            processItem(context, item);
 | 
			
		||||
            itemService.update(context, item);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
	private static void processItem(Item item) throws SQLException, AuthorizeException, IOException {
 | 
			
		||||
		Bundle[] thumbnailBundles = item.getBundles("THUMBNAIL");
 | 
			
		||||
    private static void processItem(Context context, Item item)
 | 
			
		||||
            throws SQLException, AuthorizeException, IOException {
 | 
			
		||||
        // Some bitstreams like Infographics and Maps are large JPEGs and put in the ORIGINAL bundle
 | 
			
		||||
        // on purpose so we shouldn't
 | 
			
		||||
        // swap them.
 | 
			
		||||
        List<MetadataValue> itemTypes =
 | 
			
		||||
                itemService.getMetadataByMetadataString(item, "dcterms.type");
 | 
			
		||||
        for (MetadataValue itemType : itemTypes) {
 | 
			
		||||
            if (itemType.getValue().equals("Infographic") || itemType.getValue().equals("Map")) {
 | 
			
		||||
                System.out.println(
 | 
			
		||||
                        item.getHandle() + ": item has an Infographic or Map, skipping.");
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        List<Bundle> thumbnailBundles = item.getBundles("THUMBNAIL");
 | 
			
		||||
        for (Bundle thumbnailBundle : thumbnailBundles) {
 | 
			
		||||
			Bitstream[] thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
 | 
			
		||||
            List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
 | 
			
		||||
            for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
			
		||||
                String thumbnailName = thumbnailBitstream.getName();
 | 
			
		||||
                String thumbnailDescription = thumbnailBitstream.getDescription();
 | 
			
		||||
 | 
			
		||||
                // There is no point continuing if the thumbnail's description is empty or null
 | 
			
		||||
                if (StringUtils.isEmpty(thumbnailDescription)) {
 | 
			
		||||
                    continue;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                if (thumbnailName.toLowerCase().contains(".jpg.jpg")) {
 | 
			
		||||
					Bundle[] originalBundles = item.getBundles("ORIGINAL");
 | 
			
		||||
                    List<Bundle> originalBundles = item.getBundles("ORIGINAL");
 | 
			
		||||
                    for (Bundle originalBundle : originalBundles) {
 | 
			
		||||
						Bitstream[] originalBundleBitstreams = originalBundle.getBitstreams();
 | 
			
		||||
                        List<Bitstream> originalBundleBitstreams = originalBundle.getBitstreams();
 | 
			
		||||
 | 
			
		||||
						for(Bitstream originalBitstream : originalBundleBitstreams) {
 | 
			
		||||
                        for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
			
		||||
                            String originalName = originalBitstream.getName();
 | 
			
		||||
 | 
			
		||||
							//check if the original file name is the same as the thumbnail name minus the extra ".jpg"
 | 
			
		||||
							if (originalName.equalsIgnoreCase(StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg")) && ("Generated Thumbnail".equals(thumbnailBitstream.getDescription()) || "IM Thumbnail".equals(thumbnailBitstream.getDescription()))) {
 | 
			
		||||
								System.out.println(item.getHandle() + ": replacing " + thumbnailName + " with " + originalName);
 | 
			
		||||
                            long originalBitstreamBytes = originalBitstream.getSize();
 | 
			
		||||
 | 
			
		||||
								//add the original bitstream to the THUMBNAIL bundle
 | 
			
		||||
								thumbnailBundle.addBitstream(originalBitstream);
 | 
			
		||||
								//remove the original bitstream from the ORIGINAL bundle
 | 
			
		||||
                            /*
 | 
			
		||||
                            - check if the original file name is the same as the thumbnail name minus the extra ".jpg"
 | 
			
		||||
                            - check if the thumbnail description indicates it was automatically generated
 | 
			
		||||
                            - check if the original bitstream is less than ~100KiB
 | 
			
		||||
                                - Note: in my tests there were 4022 items with ".jpg.jpg" thumbnails totaling 394549249
 | 
			
		||||
                                  bytes for an average of about 98KiB so ~100KiB seems like a good cut off
 | 
			
		||||
                            */
 | 
			
		||||
                            if (originalName.equalsIgnoreCase(
 | 
			
		||||
                                            StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg"))
 | 
			
		||||
                                    && ("Generated Thumbnail".equals(thumbnailDescription)
 | 
			
		||||
                                            || "IM Thumbnail".equals(thumbnailDescription))
 | 
			
		||||
                                    && originalBitstreamBytes < 100000) {
 | 
			
		||||
                                System.out.println(
 | 
			
		||||
                                        item.getHandle()
 | 
			
		||||
                                                + ": replacing "
 | 
			
		||||
                                                + thumbnailName
 | 
			
		||||
                                                + " with "
 | 
			
		||||
                                                + originalName);
 | 
			
		||||
 | 
			
		||||
                                // add the original bitstream to the THUMBNAIL bundle
 | 
			
		||||
                                bundleService.addBitstream(
 | 
			
		||||
                                        context, thumbnailBundle, originalBitstream);
 | 
			
		||||
                                // remove the original bitstream from the ORIGINAL bundle
 | 
			
		||||
                                originalBundle.removeBitstream(originalBitstream);
 | 
			
		||||
								//remove the JpgJpg bitstream from the THUMBNAIL bundle
 | 
			
		||||
                                // remove the JpgJpg bitstream from the THUMBNAIL bundle
 | 
			
		||||
                                thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,280 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (C) 2022 Alan Orth
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.scripts;
 | 
			
		||||
 | 
			
		||||
import org.apache.commons.lang.StringUtils;
 | 
			
		||||
import org.dspace.authorize.AuthorizeException;
 | 
			
		||||
import org.dspace.content.Bitstream;
 | 
			
		||||
import org.dspace.content.Bundle;
 | 
			
		||||
import org.dspace.content.Collection;
 | 
			
		||||
import org.dspace.content.Community;
 | 
			
		||||
import org.dspace.content.DSpaceObject;
 | 
			
		||||
import org.dspace.content.Item;
 | 
			
		||||
import org.dspace.content.factory.ContentServiceFactory;
 | 
			
		||||
import org.dspace.content.service.BundleService;
 | 
			
		||||
import org.dspace.content.service.ItemService;
 | 
			
		||||
import org.dspace.core.Constants;
 | 
			
		||||
import org.dspace.core.Context;
 | 
			
		||||
import org.dspace.handle.factory.HandleServiceFactory;
 | 
			
		||||
import org.dspace.handle.service.HandleService;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.sql.SQLException;
 | 
			
		||||
import java.util.Iterator;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Fix low-quality thumbnails in a DSpace repository.
 | 
			
		||||
 *
 | 
			
		||||
 * <p>Search the DSpace repository for items containing bitstreams matching the following criteria:
 | 
			
		||||
 *
 | 
			
		||||
 * <ul>
 | 
			
		||||
 *   <li>If an item has an <code>IM Thumbnail</code> and a <code>Generated Thumbnail</code> in the
 | 
			
		||||
 *       <code>THUMBNAIL</code> bundle, remove the <code>Generated Thumbnail</code>.
 | 
			
		||||
 *   <li>If an item has a PDF bitstream and a JPEG bitstream with description "thumbnail" in the
 | 
			
		||||
 *       <code>ORIGINAL</code> bundle, remove the "thumbnail" bitstream in the ORIGINAL bundle.
 | 
			
		||||
 * </ul>
 | 
			
		||||
 *
 | 
			
		||||
 * <p>The general idea is that we should always prefer thumbnails generated from PDFs by ImageMagick
 | 
			
		||||
 * to manually uploaded JPEGs because ImageMagick Thumbnails can be regenerated with higher quality,
 | 
			
		||||
 * resolution, etc. Furthermore, if there are JPEG bitstreams in the ORIGINAL bundle DSpace will
 | 
			
		||||
 * automatically create ".jpg.jpg" thumbnails from them in the THUMBNAIL bundle so we should remove
 | 
			
		||||
 * those as well!
 | 
			
		||||
 *
 | 
			
		||||
 * @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories
 | 
			
		||||
 * @author Alan Orth for the International Livestock Research Institute
 | 
			
		||||
 * @version 6.1
 | 
			
		||||
 * @since 6.1
 | 
			
		||||
 * @see FixJpgJpgThumbnails
 | 
			
		||||
 */
 | 
			
		||||
public class FixLowQualityThumbnails {
 | 
			
		||||
    // note: static members belong to the class itself, not any one instance
 | 
			
		||||
    public static ItemService itemService = ContentServiceFactory.getInstance().getItemService();
 | 
			
		||||
    public static HandleService handleService =
 | 
			
		||||
            HandleServiceFactory.getInstance().getHandleService();
 | 
			
		||||
    public static BundleService bundleService =
 | 
			
		||||
            ContentServiceFactory.getInstance().getBundleService();
 | 
			
		||||
 | 
			
		||||
    public static void main(String[] args) {
 | 
			
		||||
        String parentHandle = null;
 | 
			
		||||
        if (args.length >= 1) {
 | 
			
		||||
            parentHandle = args[0];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Context context = null;
 | 
			
		||||
        try {
 | 
			
		||||
            context = new Context();
 | 
			
		||||
            context.turnOffAuthorisationSystem();
 | 
			
		||||
 | 
			
		||||
            if (StringUtils.isBlank(parentHandle)) {
 | 
			
		||||
                process(context, itemService.findAll(context));
 | 
			
		||||
            } else {
 | 
			
		||||
                DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
 | 
			
		||||
                if (parent != null) {
 | 
			
		||||
                    switch (parent.getType()) {
 | 
			
		||||
                        case Constants.SITE:
 | 
			
		||||
                            process(context, itemService.findAll(context));
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.COMMUNITY:
 | 
			
		||||
                            List<Collection> collections = ((Community) parent).getCollections();
 | 
			
		||||
                            for (Collection collection : collections) {
 | 
			
		||||
                                process(
 | 
			
		||||
                                        context,
 | 
			
		||||
                                        itemService.findAllByCollection(context, collection));
 | 
			
		||||
                            }
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.COLLECTION:
 | 
			
		||||
                            process(
 | 
			
		||||
                                    context,
 | 
			
		||||
                                    itemService.findByCollection(context, (Collection) parent));
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.ITEM:
 | 
			
		||||
                            processItem(context, (Item) parent);
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        } catch (SQLException | AuthorizeException | IOException e) {
 | 
			
		||||
            e.printStackTrace(System.err);
 | 
			
		||||
        } finally {
 | 
			
		||||
            if (context != null && context.isValid()) {
 | 
			
		||||
                context.abort();
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private static void process(Context context, Iterator<Item> items)
 | 
			
		||||
            throws SQLException, IOException, AuthorizeException {
 | 
			
		||||
        while (items.hasNext()) {
 | 
			
		||||
            Item item = items.next();
 | 
			
		||||
            processItem(context, item);
 | 
			
		||||
            itemService.update(context, item);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private static void processItem(Context context, Item item)
 | 
			
		||||
            throws SQLException, AuthorizeException, IOException {
 | 
			
		||||
        System.out.println("FixLowQualityThumbnails: processing item: " + item.getHandle());
 | 
			
		||||
 | 
			
		||||
        // Set some state for the item before we iterate over the THUMBNAIL bundle
 | 
			
		||||
        boolean itemHasImThumbnail = false;
 | 
			
		||||
 | 
			
		||||
        // Iterate over the THUMBNAIL bundle to first identify if this item has an "IM Thumbnail"
 | 
			
		||||
        List<Bundle> thumbnailBundles = item.getBundles("THUMBNAIL");
 | 
			
		||||
        for (Bundle thumbnailBundle : thumbnailBundles) {
 | 
			
		||||
            List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
 | 
			
		||||
            for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
			
		||||
                String thumbnailDescription = thumbnailBitstream.getDescription();
 | 
			
		||||
 | 
			
		||||
                if (StringUtils.isEmpty(thumbnailDescription)) {
 | 
			
		||||
                    continue;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                // Check if this item has a bitstream in the THUMBNAIL bundle with description "IM
 | 
			
		||||
                // Thumbnail", but only if we haven't already seen one in another iteration for this
 | 
			
		||||
                // bundle.
 | 
			
		||||
                if (!itemHasImThumbnail && "IM Thumbnail".equals(thumbnailDescription)) {
 | 
			
		||||
                    itemHasImThumbnail = true;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // If this item has an IM Thumbnail we can be reasonably sure that there is a PDF
 | 
			
		||||
            // in the ORIGINAL bundle and we don't need any other thumbnails.
 | 
			
		||||
            if (itemHasImThumbnail) {
 | 
			
		||||
                // Iterate over the bitstreams in the THUMBNAIL bundle again.
 | 
			
		||||
                for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
			
		||||
                    String thumbnailName = thumbnailBitstream.getName();
 | 
			
		||||
                    String thumbnailDescription = thumbnailBitstream.getDescription();
 | 
			
		||||
 | 
			
		||||
                    if (StringUtils.isEmpty(thumbnailDescription)) {
 | 
			
		||||
                        continue;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    // If this item has a "Generated Thumbnail" we can remove it, because those
 | 
			
		||||
                    // typically come from other JPEGs in the ORIGINAL bundle and we would prefer
 | 
			
		||||
                    // the IM Thumbnail generated from a PDF anyway. The DSpace-generated descri-
 | 
			
		||||
                    // ption will *always* be "Generated Thumbnail".
 | 
			
		||||
                    if ("Generated Thumbnail".equals(thumbnailDescription)) {
 | 
			
		||||
                        System.out.print("\u001b[33m");
 | 
			
		||||
                        System.out.println("> Action: remove old thumbnail from THUMBNAIL bundle");
 | 
			
		||||
                        System.out.println("> Name: »" + thumbnailName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + thumbnailDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
 | 
			
		||||
                        // Remove the "Generated Thumbnail" bitstream from the THUMBNAIL bundle
 | 
			
		||||
                        thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
			
		||||
 | 
			
		||||
                        // If this item has a bitstream with the word "thumbnail" in it then we can
 | 
			
		||||
                        // remove it because we already know this item has an IM Thumbnail and we
 | 
			
		||||
                        // prefer that one.
 | 
			
		||||
                    } else if (thumbnailDescription.toLowerCase().contains("thumbnail")
 | 
			
		||||
                            && !"IM Thumbnail".equals(thumbnailDescription)) {
 | 
			
		||||
                        System.out.print("\u001b[33m");
 | 
			
		||||
                        System.out.println("> Action: remove manually uploaded thumbnail from THUMBNAIL bundle");
 | 
			
		||||
                        System.out.println("> Name: »" + thumbnailName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + thumbnailDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
 | 
			
		||||
                        // Remove the "thumbnail" bitstream from the THUMBNAIL bundle
 | 
			
		||||
                        thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
			
		||||
 | 
			
		||||
                        // Otherwise skip it because it might be something uploaded manually, like
 | 
			
		||||
                        // a thumbnail for a journal or a limited access item.
 | 
			
		||||
                    } else {
 | 
			
		||||
                        System.out.print("\u001b[34m");
 | 
			
		||||
                        System.out.println("> Action: skip other thumbnail in THUMBNAIL bundle");
 | 
			
		||||
                        System.out.println("> Name: »" + thumbnailName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + thumbnailDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    // Print a blank line
 | 
			
		||||
                    System.out.println();
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Set some state before we iterate over the ORIGINAL bundle
 | 
			
		||||
        boolean itemHasOriginalPdfBitstream = false;
 | 
			
		||||
        boolean itemHasOriginalJpegBitstream = false;
 | 
			
		||||
 | 
			
		||||
        // Iterate over the ORIGINAL bundle to delete manually-uploaded JPEG
 | 
			
		||||
        // bitstreams labeled "Thumbnail" whenever we have a PDF because they
 | 
			
		||||
        // don't belong in the ORIGINAL bundle and DSpace will automatically
 | 
			
		||||
        // create a better thumbnail from the PDF anyway.
 | 
			
		||||
        List<Bundle> originalBundles = item.getBundles("ORIGINAL");
 | 
			
		||||
        for (Bundle originalBundle : originalBundles) {
 | 
			
		||||
            List<Bitstream> originalBundleBitstreams = originalBundle.getBitstreams();
 | 
			
		||||
            for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
			
		||||
                String originalFormat = originalBitstream.getFormat(context).getMIMEType();
 | 
			
		||||
 | 
			
		||||
                // Check if this item has a PDF bitstream in the ORIGINAL bundle,
 | 
			
		||||
                // but only if we haven't already seen one in another iteration
 | 
			
		||||
                // for this bundle. DSpace will return "format application/pdf"
 | 
			
		||||
                // for the MIME type.
 | 
			
		||||
                if (!itemHasOriginalPdfBitstream && originalFormat.contains("application/pdf")) {
 | 
			
		||||
                    itemHasOriginalPdfBitstream = true;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                // Check if this item has a JPEG bitstream in the ORIGINAL bundle,
 | 
			
		||||
                // but only if we haven't already seen one in another iteration
 | 
			
		||||
                // for this bundle. DSpace will return "format image/jpeg" for
 | 
			
		||||
                // the MIME type.
 | 
			
		||||
                if (!itemHasOriginalJpegBitstream && originalFormat.contains("image/jpeg")) {
 | 
			
		||||
                    itemHasOriginalJpegBitstream = true;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // Check if we found a PDF *and* a JPEG in this item's ORIGINAL
 | 
			
		||||
            // bundle.
 | 
			
		||||
            if (itemHasOriginalPdfBitstream && itemHasOriginalJpegBitstream) {
 | 
			
		||||
                // Yes! Now iterate over the bitstreams in the ORIGINAL bundle
 | 
			
		||||
                // again to see if the JPEG is a manually uploaded "Thumbnail"
 | 
			
		||||
                for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
			
		||||
                    String originalName = originalBitstream.getName();
 | 
			
		||||
                    String originalDescription = originalBitstream.getDescription();
 | 
			
		||||
                    String originalFormat = originalBitstream.getFormat(context).getMIMEType();
 | 
			
		||||
 | 
			
		||||
                    if (StringUtils.isEmpty(originalDescription)) {
 | 
			
		||||
                        continue;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    /*
 | 
			
		||||
                    - check if the bitstream is a JPEG based on its MIME Type
 | 
			
		||||
                    - check if the bitstream's name or description is "Thumbnail"
 | 
			
		||||
                    */
 | 
			
		||||
                    if (originalFormat.toLowerCase().contains("image/jpeg")
 | 
			
		||||
                            && (originalName.toLowerCase().contains("thumbnail")
 | 
			
		||||
                                    || originalDescription.toLowerCase().contains("thumbnail"))) {
 | 
			
		||||
                        System.out.print("\u001b[33m");
 | 
			
		||||
                        System.out.println("> Action: remove thumbnail from ORIGINAL bundle");
 | 
			
		||||
                        System.out.println("> Name: »" + originalName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + originalDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
 | 
			
		||||
                        // Remove the original bitstream from the ORIGINAL bundle
 | 
			
		||||
                        originalBundle.removeBitstream(originalBitstream);
 | 
			
		||||
 | 
			
		||||
                    } else {
 | 
			
		||||
                        System.out.print("\u001b[34m");
 | 
			
		||||
                        System.out.println("> Action: skip other bitstream in ORIGINAL bundle");
 | 
			
		||||
                        System.out.println("> Name: »" + originalName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + originalDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    // Print a blank line
 | 
			
		||||
                    System.out.println();
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										42
									
								
								src/main/java/io/github/ilri/cgspace/scripts/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								src/main/java/io/github/ilri/cgspace/scripts/README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,42 @@
 | 
			
		||||
# Scripts
 | 
			
		||||
Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
			
		||||
 | 
			
		||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
 | 
			
		||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
 | 
			
		||||
 | 
			
		||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC6x/Curation+System).
 | 
			
		||||
 | 
			
		||||
## Build and Install
 | 
			
		||||
 | 
			
		||||
### Integrate into DSpace Build
 | 
			
		||||
To use these curation tasks in a DSpace project add the following dependency to `dspace/modules/additions/pom.xml`:
 | 
			
		||||
 | 
			
		||||
```xml
 | 
			
		||||
<dependency>
 | 
			
		||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
			
		||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
			
		||||
  <version>6.2-SNAPSHOT</version>
 | 
			
		||||
</dependency>
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The jar will be copied to all DSpace applications.
 | 
			
		||||
 | 
			
		||||
### Manual Build and Install
 | 
			
		||||
To build the standalone jar:
 | 
			
		||||
 | 
			
		||||
```console
 | 
			
		||||
$ mvn package
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Copy the resulting jar to the DSpace `lib` directory:
 | 
			
		||||
 | 
			
		||||
```console
 | 
			
		||||
$ cp target/cgspace-java-helpers-6.2-SNAPSHOT.jar ~/dspace/lib/
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Invocation
 | 
			
		||||
The scripts take only one argument, which is a community, collection, or item:
 | 
			
		||||
 | 
			
		||||
```console
 | 
			
		||||
$ dspace dsrun io.github.ilri.cgspace.scripts.FixJpgJpgThumbnails 10568/83389
 | 
			
		||||
```
 | 
			
		||||
@@ -16,29 +16,15 @@
 | 
			
		||||
      "name": "Congo, The Democratic Republic of the",
 | 
			
		||||
      "numeric": "180"
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "alpha_2": "IR",
 | 
			
		||||
      "alpha_3": "IRN",
 | 
			
		||||
      "name": "Iran, Islamic Republic of",
 | 
			
		||||
      "cgspace_name": "Iran",
 | 
			
		||||
      "numeric": "364",
 | 
			
		||||
      "official_name": "Islamic Republic of Iran"
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "alpha_2": "KP",
 | 
			
		||||
      "alpha_3": "PRK",
 | 
			
		||||
      "common_name": "North Korea",
 | 
			
		||||
      "name": "Korea, Democratic People's Republic of",
 | 
			
		||||
      "cgspace_name": "Korea, DPR",
 | 
			
		||||
      "numeric": "408",
 | 
			
		||||
      "official_name": "Democratic People's Republic of Korea"
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "alpha_2": "LA",
 | 
			
		||||
      "alpha_3": "LAO",
 | 
			
		||||
      "name": "Lao People's Democratic Republic",
 | 
			
		||||
      "cgspace_name": "Laos",
 | 
			
		||||
      "numeric": "418"
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "alpha_2": "FM",
 | 
			
		||||
      "alpha_3": "FSM",
 | 
			
		||||
@@ -54,12 +40,5 @@
 | 
			
		||||
      "cgspace_name": "Russia",
 | 
			
		||||
      "numeric": "643"
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      "alpha_2": "SY",
 | 
			
		||||
      "alpha_3": "SYR",
 | 
			
		||||
      "name": "Syrian Arab Republic",
 | 
			
		||||
      "cgspace_name": "Syria",
 | 
			
		||||
      "numeric": "760"
 | 
			
		||||
    }
 | 
			
		||||
  ]
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user