mirror of
				https://github.com/ilri/cgspace-java-helpers.git
				synced 2025-11-03 22:29:10 +01:00 
			
		
		
		
	Compare commits
	
		
			36 Commits
		
	
	
		
			dspace5
			...
			f0754ab419
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						
						
							
						
						f0754ab419
	
				 | 
					
					
						|||
| 
						
						
							
						
						6772145bec
	
				 | 
					
					
						|||
| 
						
						
							
						
						b31557aa05
	
				 | 
					
					
						|||
| 
						
						
							
						
						095f843067
	
				 | 
					
					
						|||
| 
						
						
							
						
						f7fda9922f
	
				 | 
					
					
						|||
| 
						
						
							
						
						83a416afaf
	
				 | 
					
					
						|||
| 
						
						
							
						
						922e3892a7
	
				 | 
					
					
						|||
| 
						
						
							
						
						6b648c2c85
	
				 | 
					
					
						|||
| 
						
						
							
						
						781ddcd931
	
				 | 
					
					
						|||
| 
						
						
							
						
						49cb8e3468
	
				 | 
					
					
						|||
| 
						
						
							
						
						3aa1503163
	
				 | 
					
					
						|||
| 
						
						
							
						
						26597e2f8f
	
				 | 
					
					
						|||
| 
						
						
							
						
						1497ebb476
	
				 | 
					
					
						|||
| 
						
						
							
						
						b2027e3e44
	
				 | 
					
					
						|||
| 
						
						
							
						
						26eaa2d94f
	
				 | 
					
					
						|||
| e3b95f6a30 | |||
| 
						
						
							
						
						5e545e37e2
	
				 | 
					
					
						|||
| 
						
						
							
						
						3f711db1b2
	
				 | 
					
					
						|||
| 
						
						
							
						
						112cb8a133
	
				 | 
					
					
						|||
| 
						
						
							
						
						4d59c1a00d
	
				 | 
					
					
						|||
| 
						
						
							
						
						2e779efb14
	
				 | 
					
					
						|||
| 
						
						
							
						
						735e759033
	
				 | 
					
					
						|||
| 
						
						
							
						
						271a9ce970
	
				 | 
					
					
						|||
| 
						
						
							
						
						6b72ddefc1
	
				 | 
					
					
						|||
| 
						
						
							
						
						cea97aebe5
	
				 | 
					
					
						|||
| 
						
						
							
						
						4bc7971ecb
	
				 | 
					
					
						|||
| 
						
						
							
						
						197aad0124
	
				 | 
					
					
						|||
| 
						
						
							
						
						da1ecad238
	
				 | 
					
					
						|||
| 
						
						
							
						
						307480f249
	
				 | 
					
					
						|||
| 
						
						
							
						
						4698b6eb38
	
				 | 
					
					
						|||
| 
						
						
							
						
						f1629f65fe
	
				 | 
					
					
						|||
| 
						
						
							
						
						29f6aff35e
	
				 | 
					
					
						|||
| 
						
						
							
						
						9bf487a336
	
				 | 
					
					
						|||
| 
						
						
							
						
						f50357b7cc
	
				 | 
					
					
						|||
| 
						
						
							
						
						f3ab89f7a1
	
				 | 
					
					
						|||
| 
						
						
							
						
						5a467f92e0
	
				 | 
					
					
						
							
								
								
									
										24
									
								
								.github/workflows/maven.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								.github/workflows/maven.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,24 @@
 | 
			
		||||
# This workflow will build a Java project with Maven
 | 
			
		||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
 | 
			
		||||
 | 
			
		||||
name: Build
 | 
			
		||||
 | 
			
		||||
on:
 | 
			
		||||
  push:
 | 
			
		||||
    branches: [ dspace6 ]
 | 
			
		||||
  pull_request:
 | 
			
		||||
    branches: [ dspace6 ]
 | 
			
		||||
 | 
			
		||||
jobs:
 | 
			
		||||
  build:
 | 
			
		||||
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
 | 
			
		||||
    steps:
 | 
			
		||||
    - uses: actions/checkout@v2
 | 
			
		||||
    - name: Set up JDK 1.8
 | 
			
		||||
      uses: actions/setup-java@v1
 | 
			
		||||
      with:
 | 
			
		||||
        java-version: 1.8
 | 
			
		||||
    - name: Build with Maven
 | 
			
		||||
      run: mvn -B package --file pom.xml
 | 
			
		||||
							
								
								
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -70,4 +70,10 @@ fabric.properties
 | 
			
		||||
# Android studio 3.1+ serialized cache file
 | 
			
		||||
.idea/caches/build_file_checksums.ser
 | 
			
		||||
 | 
			
		||||
# VS Code settings
 | 
			
		||||
.vscode
 | 
			
		||||
 | 
			
		||||
# asdf-vm tool-versions file
 | 
			
		||||
.tool-versions
 | 
			
		||||
 | 
			
		||||
target/
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +0,0 @@
 | 
			
		||||
dist: bionic
 | 
			
		||||
language: java
 | 
			
		||||
jdk:
 | 
			
		||||
  - openjdk8
 | 
			
		||||
script:
 | 
			
		||||
  - mvn package -B
 | 
			
		||||
 | 
			
		||||
# vim: ts=2 sw=2 et
 | 
			
		||||
							
								
								
									
										15
									
								
								CHANGELOG.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								CHANGELOG.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,15 @@
 | 
			
		||||
# Changelog
 | 
			
		||||
All notable changes to this project will be documented in this file.
 | 
			
		||||
 | 
			
		||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 | 
			
		||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 | 
			
		||||
 | 
			
		||||
## Unreleased
 | 
			
		||||
### Updated
 | 
			
		||||
- Update dependencies in `pom.xml`
 | 
			
		||||
 | 
			
		||||
### Changed
 | 
			
		||||
- Java compiler and target from JDK 7 to JDK 8
 | 
			
		||||
 | 
			
		||||
### Added
 | 
			
		||||
- New `FixLowQualityThumbnails` script to detect and remove more low-quality thumbnails
 | 
			
		||||
							
								
								
									
										62
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										62
									
								
								README.md
									
									
									
									
									
								
							@@ -1,9 +1,11 @@
 | 
			
		||||
# DSpace Curation Tasks [](https://travis-ci.org/ilri/dspace-curation-tasks)
 | 
			
		||||
Metadata curation tasks used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
			
		||||
# CGSpace Java Helpers [](https://github.com/ilri/cgspace-java-helpers/actions)
 | 
			
		||||
DSpace curation tasks and other Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
			
		||||
 | 
			
		||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
 | 
			
		||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
 | 
			
		||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
 | 
			
		||||
 | 
			
		||||
Tested on DSpace 5.8. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
 | 
			
		||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
 | 
			
		||||
 | 
			
		||||
## Build and Install
 | 
			
		||||
 | 
			
		||||
@@ -13,8 +15,8 @@ To use these curation tasks in a DSpace project add the following dependency to
 | 
			
		||||
```
 | 
			
		||||
<dependency>
 | 
			
		||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
			
		||||
  <artifactId>dspace-curation-tasks</artifactId>
 | 
			
		||||
  <version>1.0-SNAPSHOT</version>
 | 
			
		||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
			
		||||
  <version>6.1-SNAPSHOT</version>
 | 
			
		||||
</dependency>
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
@@ -23,65 +25,33 @@ The jar will be copied to all DSpace applications.
 | 
			
		||||
### Manual Build and Install
 | 
			
		||||
To build the standalone jar:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
```console
 | 
			
		||||
$ mvn package
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Copy the resulting jar to the DSpace `lib` directory:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ cp target/dspace-curation-tasks-1.0-SNAPSHOT.jar ~/dspace/lib/dspace-curation-tasks-1.0-SNAPSHOT.jar
 | 
			
		||||
```console
 | 
			
		||||
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Configuration
 | 
			
		||||
Add the curation task to DSpace's `config/modules/curate.cfg`:
 | 
			
		||||
Please refer to the appropriate README.md file:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
plugin.named.org.dspace.curate.CurationTask = \
 | 
			
		||||
...
 | 
			
		||||
    io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger \
 | 
			
		||||
    io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
 | 
			
		||||
```
 | 
			
		||||
- Curation Tasks: [src/main/java/io/github/ilri/cgspace/ctasks/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace6/src/main/java/io/github/ilri/cgspace/ctasks/README.md)
 | 
			
		||||
- Scripts: [src/main/java/io/github/ilri/cgspace/scripts/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace6/src/main/java/io/github/ilri/cgspace/scripts/README.md)
 | 
			
		||||
 | 
			
		||||
And then add a configuration file for the task in `config/modules/countrycodetagger.cfg`:
 | 
			
		||||
## TODO
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
# name of the field containing ISO 3166-1 country names
 | 
			
		||||
iso3166.field = cg.coverage.country
 | 
			
		||||
 | 
			
		||||
# name of the field containing ISO 3166-1 Alpha2 country codes
 | 
			
		||||
iso3166-alpha2.field = cg.coverage.iso3166-alpha2
 | 
			
		||||
 | 
			
		||||
# only add country codes if an item doesn't have any (default false)
 | 
			
		||||
#forceupdate = false
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
*Note*: DSpace's curation system supports "profiles" where you can use the same task with different options, for example above I have a normal country code tagger and a "force" variant. To use the "force" variant you create a new configuration file with the overridden options in `config/modules/countrycodetagger.force.cfg`. The "force" profile clears all existing country codes and updates everything.
 | 
			
		||||
 | 
			
		||||
## Invocation
 | 
			
		||||
Once the jar is installed and you have added appropriate configuration in `~/dspace/config/modules`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ~/dspace/bin/dspace curate -t countrycodetagger -i 10568/3 -r - -l 500 -s object
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
*Note*: it is very important to set the cache limit (`-l`) and the database transaction scope to something sensible (`object`) if you're curating a community or collection with more than a few hundred items.
 | 
			
		||||
- Add a curation task to normalize DOIs to "https://doi.org" format
 | 
			
		||||
 | 
			
		||||
## Notes
 | 
			
		||||
This project was initially created according to the [Maven Getting Started Guide](https://maven.apache.org/guides/getting-started/):
 | 
			
		||||
 | 
			
		||||
```console
 | 
			
		||||
$ mvn -B archetype:generate -DgroupId=io.github.ilri.cgspace -DartifactId=dspace-curation-tasks -DarchetypeArtifactId=maven-archetype-quickstart -DarchetypeVersion=1.4
 | 
			
		||||
$ mvn -B archetype:generate -DgroupId=io.github.ilri.cgspace -DartifactId=cgspace-java-helpers -DarchetypeArtifactId=maven-archetype-quickstart -DarchetypeVersion=1.4
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## TODO
 | 
			
		||||
 | 
			
		||||
- Make sure this doesn't work on items in the workflow
 | 
			
		||||
- Port to DSpace 6
 | 
			
		||||
  - Remember to bump Gson version!
 | 
			
		||||
- Check for existence of metadata field before trying to add metadata
 | 
			
		||||
- Add tests
 | 
			
		||||
 | 
			
		||||
## License
 | 
			
		||||
This work is licensed under the [GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html).
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										46
									
								
								pom.xml
									
									
									
									
									
								
							
							
						
						
									
										46
									
								
								pom.xml
									
									
									
									
									
								
							@@ -5,11 +5,11 @@
 | 
			
		||||
  <modelVersion>4.0.0</modelVersion>
 | 
			
		||||
 | 
			
		||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
			
		||||
  <artifactId>dspace-curation-tasks</artifactId>
 | 
			
		||||
  <version>1.0-SNAPSHOT</version>
 | 
			
		||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
			
		||||
  <version>6.1-SNAPSHOT</version>
 | 
			
		||||
 | 
			
		||||
  <name>dspace-curation-tasks</name>
 | 
			
		||||
  <url>https://github.com/ilri/dspace-curation-tasks</url>
 | 
			
		||||
  <name>cgspace-java-helpers</name>
 | 
			
		||||
  <url>https://github.com/ilri/cgspace-java-helpers</url>
 | 
			
		||||
 | 
			
		||||
  <licenses>
 | 
			
		||||
    <license>
 | 
			
		||||
@@ -28,34 +28,28 @@
 | 
			
		||||
 | 
			
		||||
  <properties>
 | 
			
		||||
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 | 
			
		||||
    <maven.compiler.source>1.7</maven.compiler.source>
 | 
			
		||||
    <maven.compiler.target>1.7</maven.compiler.target>
 | 
			
		||||
    <maven.compiler.source>1.8</maven.compiler.source>
 | 
			
		||||
    <maven.compiler.target>1.8</maven.compiler.target>
 | 
			
		||||
  </properties>
 | 
			
		||||
 | 
			
		||||
  <dependencies>
 | 
			
		||||
    <dependency>
 | 
			
		||||
      <groupId>junit</groupId>
 | 
			
		||||
      <artifactId>junit</artifactId>
 | 
			
		||||
      <version>4.11</version>
 | 
			
		||||
      <scope>test</scope>
 | 
			
		||||
    </dependency>
 | 
			
		||||
    <dependency>
 | 
			
		||||
      <groupId>com.google.code.gson</groupId>
 | 
			
		||||
      <artifactId>gson</artifactId>
 | 
			
		||||
      <version>2.2.1</version>
 | 
			
		||||
      <version>2.9.1</version>
 | 
			
		||||
    </dependency>
 | 
			
		||||
    <dependency>
 | 
			
		||||
      <groupId>org.dspace</groupId>
 | 
			
		||||
      <artifactId>dspace-api</artifactId>
 | 
			
		||||
      <version>5.8</version>
 | 
			
		||||
      <version>6.3</version>
 | 
			
		||||
      <scope>provided</scope>
 | 
			
		||||
    </dependency>
 | 
			
		||||
  </dependencies>
 | 
			
		||||
 | 
			
		||||
  <scm>
 | 
			
		||||
      <connection>scm:git:git://github.com/ilri/dspace-curation-tasks.git</connection>
 | 
			
		||||
      <developerConnection>scm:git:ssh://github.com:nanosai/dspace-curation-tasks.git</developerConnection>
 | 
			
		||||
      <url>http://github.com/ilri/dspace-curation-tasks</url>
 | 
			
		||||
      <connection>scm:git:git://github.com/ilri/cgspace-java-helpers.git</connection>
 | 
			
		||||
      <developerConnection>scm:git:ssh://github.com:nanosai/cgspace-java-helpers.git</developerConnection>
 | 
			
		||||
      <url>http://github.com/ilri/cgspace-java-helpers</url>
 | 
			
		||||
  </scm>
 | 
			
		||||
 | 
			
		||||
  <distributionManagement>
 | 
			
		||||
@@ -75,41 +69,41 @@
 | 
			
		||||
        <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-clean-plugin</artifactId>
 | 
			
		||||
          <version>3.1.0</version>
 | 
			
		||||
          <version>3.2.0</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-resources-plugin</artifactId>
 | 
			
		||||
          <version>3.0.2</version>
 | 
			
		||||
          <version>3.3.0</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-compiler-plugin</artifactId>
 | 
			
		||||
          <version>3.8.0</version>
 | 
			
		||||
          <version>3.10.1</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-surefire-plugin</artifactId>
 | 
			
		||||
          <version>2.22.1</version>
 | 
			
		||||
          <version>3.0.0-M7</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-jar-plugin</artifactId>
 | 
			
		||||
          <version>3.0.2</version>
 | 
			
		||||
          <version>3.3.0</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-install-plugin</artifactId>
 | 
			
		||||
          <version>2.5.2</version>
 | 
			
		||||
          <version>3.0.1</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-deploy-plugin</artifactId>
 | 
			
		||||
          <version>2.8.2</version>
 | 
			
		||||
          <version>3.3.0</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-site-plugin</artifactId>
 | 
			
		||||
          <version>3.7.1</version>
 | 
			
		||||
          <version>3.12.1</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
        <plugin>
 | 
			
		||||
          <artifactId>maven-project-info-reports-plugin</artifactId>
 | 
			
		||||
          <version>3.0.0</version>
 | 
			
		||||
          <version>3.4.1</version>
 | 
			
		||||
        </plugin>
 | 
			
		||||
      </plugins>
 | 
			
		||||
    </pluginManagement>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,19 +1,7 @@
 | 
			
		||||
/*
 | 
			
		||||
DSpace Curation Tasks
 | 
			
		||||
Copyright (C) 2020  Alan Orth
 | 
			
		||||
 | 
			
		||||
This program is free software: you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License
 | 
			
		||||
along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
* Copyright (C) 2020 Alan Orth
 | 
			
		||||
*
 | 
			
		||||
* SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,19 +1,7 @@
 | 
			
		||||
/*
 | 
			
		||||
    DSpace Curation Tasks
 | 
			
		||||
    Copyright (C) 2020  Alan Orth
 | 
			
		||||
 | 
			
		||||
    This program is free software: you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License
 | 
			
		||||
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
* Copyright (C) 2020 Alan Orth
 | 
			
		||||
*
 | 
			
		||||
* SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,19 +1,7 @@
 | 
			
		||||
/*
 | 
			
		||||
    DSpace Curation Tasks
 | 
			
		||||
    Copyright (C) 2020  Alan Orth
 | 
			
		||||
 | 
			
		||||
    This program is free software: you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License
 | 
			
		||||
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
* Copyright (C) 2020 Alan Orth
 | 
			
		||||
*
 | 
			
		||||
* SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
@@ -23,7 +11,7 @@ import org.apache.log4j.Logger;
 | 
			
		||||
import org.dspace.authorize.AuthorizeException;
 | 
			
		||||
import org.dspace.content.DSpaceObject;
 | 
			
		||||
import org.dspace.content.Item;
 | 
			
		||||
import org.dspace.content.Metadatum;
 | 
			
		||||
import org.dspace.content.MetadataValue;
 | 
			
		||||
import org.dspace.core.Constants;
 | 
			
		||||
import org.dspace.curate.AbstractCurationTask;
 | 
			
		||||
import org.dspace.curate.Curator;
 | 
			
		||||
@@ -81,7 +69,11 @@ public class CountryCodeTagger extends AbstractCurationTask
 | 
			
		||||
 | 
			
		||||
            Item item = (Item)dso;
 | 
			
		||||
 | 
			
		||||
            alpha2Result = performAlpha2(item, config);
 | 
			
		||||
            try {
 | 
			
		||||
                alpha2Result = performAlpha2(item, config);
 | 
			
		||||
            } catch (SQLException throwables) {
 | 
			
		||||
                throwables.printStackTrace();
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            setResult(alpha2Result.getResult());
 | 
			
		||||
            report(alpha2Result.getResult());
 | 
			
		||||
@@ -90,15 +82,14 @@ public class CountryCodeTagger extends AbstractCurationTask
 | 
			
		||||
		return alpha2Result.getStatus();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config) throws IOException
 | 
			
		||||
    {
 | 
			
		||||
    public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config) throws IOException, SQLException {
 | 
			
		||||
        CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
 | 
			
		||||
        String itemHandle = item.getHandle();
 | 
			
		||||
 | 
			
		||||
        Metadatum[] itemCountries = item.getMetadataByMetadataString(config.iso3166Field);
 | 
			
		||||
        List<MetadataValue> itemCountries = itemService.getMetadataByMetadataString(item, config.iso3166Field);
 | 
			
		||||
 | 
			
		||||
        // skip items that don't have country metadata
 | 
			
		||||
        if (itemCountries.length == 0) {
 | 
			
		||||
        if (itemCountries.size() == 0) {
 | 
			
		||||
            alpha2Result.setResult(itemHandle + ": no countries, skipping.");
 | 
			
		||||
            alpha2Result.setStatus(Curator.CURATE_SKIP);
 | 
			
		||||
        } else {
 | 
			
		||||
@@ -117,25 +108,25 @@ public class CountryCodeTagger extends AbstractCurationTask
 | 
			
		||||
            String[] iso3166Alpha2FieldParts = config.iso3166Alpha2Field.split("\\.");
 | 
			
		||||
 | 
			
		||||
            if (config.forceupdate) {
 | 
			
		||||
                item.clearMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], Item.ANY);
 | 
			
		||||
                itemService.clearMetadata(Curator.curationContext(), item, iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], Item.ANY);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // check the item's country codes, if any
 | 
			
		||||
            Metadatum[] itemAlpha2CountryCodes = item.getMetadataByMetadataString(config.iso3166Alpha2Field);
 | 
			
		||||
            List<MetadataValue> itemAlpha2CountryCodes = itemService.getMetadataByMetadataString(item, config.iso3166Alpha2Field);
 | 
			
		||||
 | 
			
		||||
            if (itemAlpha2CountryCodes.length == 0) {
 | 
			
		||||
            if (itemAlpha2CountryCodes.size() == 0) {
 | 
			
		||||
                List<String> newAlpha2Codes = new ArrayList<String>();
 | 
			
		||||
                for (Metadatum itemCountry : itemCountries) {
 | 
			
		||||
                for (MetadataValue itemCountry : itemCountries) {
 | 
			
		||||
                    //check ISO 3166-1 countries
 | 
			
		||||
                    for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
 | 
			
		||||
                        if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) {
 | 
			
		||||
                        if (itemCountry.getValue().equalsIgnoreCase(country.getName()) || itemCountry.getValue().equalsIgnoreCase(country.get_official_name()) || itemCountry.getValue().equalsIgnoreCase(country.get_common_name())) {
 | 
			
		||||
                            newAlpha2Codes.add(country.getAlpha_2());
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    //check CGSpace countries
 | 
			
		||||
                    for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) {
 | 
			
		||||
                        if (itemCountry.value.equalsIgnoreCase(country.getCgspace_name())) {
 | 
			
		||||
                        if (itemCountry.getValue().equalsIgnoreCase(country.getCgspace_name())) {
 | 
			
		||||
                            newAlpha2Codes.add(country.getAlpha_2());
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
@@ -143,9 +134,8 @@ public class CountryCodeTagger extends AbstractCurationTask
 | 
			
		||||
 | 
			
		||||
                if (newAlpha2Codes.size() > 0) {
 | 
			
		||||
                    try {
 | 
			
		||||
                        // add metadata values (casting the List<String> to an array)
 | 
			
		||||
                        item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", newAlpha2Codes.toArray(new String[0]));
 | 
			
		||||
                        item.update();
 | 
			
		||||
                        itemService.addMetadata(Curator.curationContext(), item, iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", newAlpha2Codes);
 | 
			
		||||
                        itemService.update(Curator.curationContext(), item);
 | 
			
		||||
                    } catch (SQLException | AuthorizeException sqle) {
 | 
			
		||||
                        config.log.debug(sqle.getMessage());
 | 
			
		||||
                        alpha2Result.setResult(itemHandle + ": error");
 | 
			
		||||
 
 | 
			
		||||
@@ -1,19 +1,7 @@
 | 
			
		||||
/*
 | 
			
		||||
DSpace Curation Tasks
 | 
			
		||||
Copyright (C) 2020  Alan Orth
 | 
			
		||||
 | 
			
		||||
This program is free software: you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License
 | 
			
		||||
along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
* Copyright (C) 2020 Alan Orth
 | 
			
		||||
*
 | 
			
		||||
* SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.ctasks;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										72
									
								
								src/main/java/io/github/ilri/cgspace/ctasks/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								src/main/java/io/github/ilri/cgspace/ctasks/README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,72 @@
 | 
			
		||||
# Curation Tasks
 | 
			
		||||
DSpace curation tasks used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
			
		||||
 | 
			
		||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
 | 
			
		||||
 | 
			
		||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
 | 
			
		||||
 | 
			
		||||
## Build and Install
 | 
			
		||||
 | 
			
		||||
### Integrate into DSpace Build
 | 
			
		||||
To use these curation tasks in a DSpace project add the following dependency to `dspace/modules/additions/pom.xml`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
<dependency>
 | 
			
		||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
			
		||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
			
		||||
  <version>6.1-SNAPSHOT</version>
 | 
			
		||||
</dependency>
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The jar will be copied to all DSpace applications.
 | 
			
		||||
 | 
			
		||||
### Manual Build and Install
 | 
			
		||||
To build the standalone jar:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ mvn package
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Copy the resulting jar to the DSpace `lib` directory:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Configuration
 | 
			
		||||
Add the curation task to DSpace's `config/modules/curate.cfg`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger
 | 
			
		||||
plugin.named.org.dspace.curate.CurationTask = io.github.ilri.cgspace.ctasks.CountryCodeTagger = countrycodetagger.force
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
And then add the following variables to your `local.cfg` or some other [configuration file that is included](https://wiki.lyrasis.org/display/DSDOC6x/Configuration+Reference#ConfigurationReference-IncludingotherPropertyFiles):
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
# name of the field containing ISO 3166-1 country names
 | 
			
		||||
countrycodetagger.iso3166.field = cg.coverage.country
 | 
			
		||||
 | 
			
		||||
# name of the field containing ISO 3166-1 Alpha2 country codes
 | 
			
		||||
countrycodetagger.iso3166-alpha2.field = cg.coverage.iso3166-alpha2
 | 
			
		||||
 | 
			
		||||
# only add country codes if an item doesn't have any (default false)
 | 
			
		||||
#countrycodetagger.forceupdate = false
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
*Note*: DSpace's curation system supports "profiles" where you can use the same task with different options, for example above I have a normal country code tagger task and a "force" variant. The "force" variant is the same task, but it looks for configuration variables using the `countrycodetagger.force` instead. To use the "force" variant you simply need to add these new variables with the `forceupdate` parameter overridden to the same configuration file where you put the other variables. The "force" profile clears all existing country codes and updates everything.
 | 
			
		||||
 | 
			
		||||
## Invocation
 | 
			
		||||
Once the jar is installed and you have added appropriate configuration in `~/dspace/config/modules`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ~/dspace/bin/dspace curate -t countrycodetagger -i 10568/3 -r - -s object
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
*Note*: it is very important to set the database transaction scope to something sensible (`object`) if you're curating a community or collection with more than a few hundred items.
 | 
			
		||||
 | 
			
		||||
## TODO
 | 
			
		||||
 | 
			
		||||
- Make sure this doesn't work on items in the workflow
 | 
			
		||||
- Check for existence of metadata field before trying to add metadata
 | 
			
		||||
- Add tests
 | 
			
		||||
@@ -0,0 +1,146 @@
 | 
			
		||||
/*
 | 
			
		||||
* Copyright (C) 2020 Alan Orth
 | 
			
		||||
*
 | 
			
		||||
* SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.scripts;
 | 
			
		||||
 | 
			
		||||
import org.apache.commons.lang.StringUtils;
 | 
			
		||||
import org.dspace.authorize.AuthorizeException;
 | 
			
		||||
import org.dspace.content.*;
 | 
			
		||||
import org.dspace.core.Constants;
 | 
			
		||||
import org.dspace.core.Context;
 | 
			
		||||
import org.dspace.content.factory.ContentServiceFactory;
 | 
			
		||||
import org.dspace.content.service.ItemService;
 | 
			
		||||
import org.dspace.handle.factory.HandleServiceFactory;
 | 
			
		||||
import org.dspace.handle.service.HandleService;
 | 
			
		||||
import org.dspace.content.service.BundleService;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.sql.SQLException;
 | 
			
		||||
import java.util.Iterator;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories
 | 
			
		||||
 * @author Alan Orth for the International Livestock Research Institute
 | 
			
		||||
 * @version 6.1
 | 
			
		||||
 * @since 5.1
 | 
			
		||||
 */
 | 
			
		||||
public class FixJpgJpgThumbnails {
 | 
			
		||||
	//note: static members belong to the class itself, not any one instance
 | 
			
		||||
	public static ItemService itemService = ContentServiceFactory.getInstance().getItemService();
 | 
			
		||||
	public static HandleService handleService = HandleServiceFactory.getInstance().getHandleService();
 | 
			
		||||
	public static BundleService bundleService = ContentServiceFactory.getInstance().getBundleService();
 | 
			
		||||
 | 
			
		||||
	public static void main(String[] args) {
 | 
			
		||||
		String parentHandle = null;
 | 
			
		||||
		if (args.length >= 1) {
 | 
			
		||||
			parentHandle = args[0];
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		Context context = null;
 | 
			
		||||
		try {
 | 
			
		||||
			context = new Context();
 | 
			
		||||
			context.turnOffAuthorisationSystem();
 | 
			
		||||
 | 
			
		||||
			if (StringUtils.isBlank(parentHandle)) {
 | 
			
		||||
				process(context, itemService.findAll(context));
 | 
			
		||||
			} else {
 | 
			
		||||
				DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
 | 
			
		||||
				if (parent != null) {
 | 
			
		||||
					switch (parent.getType()) {
 | 
			
		||||
						case Constants.COLLECTION:
 | 
			
		||||
							process(context, itemService.findByCollection(context, (Collection) parent));
 | 
			
		||||
							break;
 | 
			
		||||
						case Constants.COMMUNITY:
 | 
			
		||||
							List<Collection> collections = ((Community) parent).getCollections();
 | 
			
		||||
							for (Collection collection : collections) {
 | 
			
		||||
								process(context, itemService.findAllByCollection(context, collection));
 | 
			
		||||
							}
 | 
			
		||||
							break;
 | 
			
		||||
						case Constants.SITE:
 | 
			
		||||
							process(context, itemService.findAll(context));
 | 
			
		||||
							break;
 | 
			
		||||
						case Constants.ITEM:
 | 
			
		||||
							processItem(context, (Item) parent);
 | 
			
		||||
							context.commit();
 | 
			
		||||
							break;
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		} catch (SQLException | AuthorizeException | IOException e) {
 | 
			
		||||
			e.printStackTrace(System.err);
 | 
			
		||||
		} finally {
 | 
			
		||||
			if (context != null && context.isValid()) {
 | 
			
		||||
				context.abort();
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	private static void process(Context context, Iterator<Item> items) throws SQLException, IOException, AuthorizeException {
 | 
			
		||||
		while (items.hasNext()) {
 | 
			
		||||
			Item item = items.next();
 | 
			
		||||
			processItem(context, item);
 | 
			
		||||
			itemService.update(context, item);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	private static void processItem(Context context, Item item) throws SQLException, AuthorizeException, IOException {
 | 
			
		||||
		// Some bitstreams like Infographics are large JPGs and put in the ORIGINAL bundle on purpose so we shouldn't
 | 
			
		||||
		// swap them.
 | 
			
		||||
		List<MetadataValue> itemTypes = itemService.getMetadataByMetadataString(item, "dcterms.type");
 | 
			
		||||
		boolean itemHasInfographic = false;
 | 
			
		||||
		for (MetadataValue itemType: itemTypes) {
 | 
			
		||||
			if (itemType.getValue().equals("Infographic")) {
 | 
			
		||||
				itemHasInfographic = true;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		List<Bundle> thumbnailBundles = item.getBundles("THUMBNAIL");
 | 
			
		||||
		for (Bundle thumbnailBundle : thumbnailBundles) {
 | 
			
		||||
			List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
 | 
			
		||||
			for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
			
		||||
				String thumbnailName = thumbnailBitstream.getName();
 | 
			
		||||
 | 
			
		||||
				if (thumbnailName.toLowerCase().contains(".jpg.jpg")) {
 | 
			
		||||
					List<Bundle> originalBundles = item.getBundles("ORIGINAL");
 | 
			
		||||
					for (Bundle originalBundle : originalBundles) {
 | 
			
		||||
						List<Bitstream> originalBundleBitstreams = originalBundle.getBitstreams();
 | 
			
		||||
 | 
			
		||||
						for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
			
		||||
							String originalName = originalBitstream.getName();
 | 
			
		||||
 | 
			
		||||
							long originalBitstreamBytes = originalBitstream.getSize();
 | 
			
		||||
 | 
			
		||||
							/*
 | 
			
		||||
							- check if the original file name is the same as the thumbnail name minus the extra ".jpg"
 | 
			
		||||
							- check if the thumbnail description indicates it was automatically generated
 | 
			
		||||
							- check if the item has dc.type Infographic (JPG could be the "real" item!)
 | 
			
		||||
							- check if the original bitstream is less than ~100KiB
 | 
			
		||||
							    - Note: in my tests there were 4022 items with ".jpg.jpg" thumbnails totaling 394549249
 | 
			
		||||
							      bytes for an average of about 98KiB so ~100KiB seems like a good cut off
 | 
			
		||||
							*/
 | 
			
		||||
							if (
 | 
			
		||||
									originalName.equalsIgnoreCase(StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg"))
 | 
			
		||||
									&& ("Generated Thumbnail".equals(thumbnailBitstream.getDescription()) || "IM Thumbnail".equals(thumbnailBitstream.getDescription()))
 | 
			
		||||
									&& !itemHasInfographic
 | 
			
		||||
									&& originalBitstreamBytes < 100000
 | 
			
		||||
							) {
 | 
			
		||||
								System.out.println(item.getHandle() + ": replacing " + thumbnailName + " with " + originalName);
 | 
			
		||||
 | 
			
		||||
								//add the original bitstream to the THUMBNAIL bundle
 | 
			
		||||
								bundleService.addBitstream(context, thumbnailBundle, originalBitstream);
 | 
			
		||||
								//remove the original bitstream from the ORIGINAL bundle
 | 
			
		||||
								originalBundle.removeBitstream(originalBitstream);
 | 
			
		||||
								//remove the JpgJpg bitstream from the THUMBNAIL bundle
 | 
			
		||||
								thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
			
		||||
							}
 | 
			
		||||
						}
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,270 @@
 | 
			
		||||
/*
 | 
			
		||||
* Copyright (C) 2022 Alan Orth
 | 
			
		||||
*
 | 
			
		||||
* SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package io.github.ilri.cgspace.scripts;
 | 
			
		||||
 | 
			
		||||
import org.apache.commons.lang.StringUtils;
 | 
			
		||||
import org.dspace.authorize.AuthorizeException;
 | 
			
		||||
import org.dspace.content.*;
 | 
			
		||||
import org.dspace.content.factory.ContentServiceFactory;
 | 
			
		||||
import org.dspace.content.service.BundleService;
 | 
			
		||||
import org.dspace.content.service.ItemService;
 | 
			
		||||
import org.dspace.core.Constants;
 | 
			
		||||
import org.dspace.core.Context;
 | 
			
		||||
import org.dspace.handle.factory.HandleServiceFactory;
 | 
			
		||||
import org.dspace.handle.service.HandleService;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.sql.SQLException;
 | 
			
		||||
import java.util.Iterator;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Fix low-quality thumbnails in a DSpace repository.
 | 
			
		||||
 *
 | 
			
		||||
 * <p>Search the DSpace repository for items containing bitstreams matching the following criteria:
 | 
			
		||||
 *
 | 
			
		||||
 * <ul>
 | 
			
		||||
 *   <li>If an item has an <code>IM Thumbnail</code> and a <code>Generated Thumbnail</code> in the
 | 
			
		||||
 *       <code>THUMBNAIL</code> bundle, remove the <code>Generated Thumbnail</code>.
 | 
			
		||||
 *   <li>If an item has a PDF bitstream and a JPEG bitstream with description "thumbnail" in the
 | 
			
		||||
 *       <code>ORIGINAL</code> bundle, remove the "thumbnail" bitstream in the ORIGINAL bundle.
 | 
			
		||||
 * </ul>
 | 
			
		||||
 *
 | 
			
		||||
 * <p>The general idea is that we should always prefer thumbnails generated from PDFs by ImageMagick
 | 
			
		||||
 * to manually uploaded JPEGs because ImageMagick Thumbnails can be regenerated with higher quality,
 | 
			
		||||
 * resolution, etc. Furthermore, if there are JPEG bitstreams in the ORIGINAL bundle DSpace will
 | 
			
		||||
 * automatically create ".jpg.jpg" thumbnails from them in the THUMBNAIL bundle so we should remove
 | 
			
		||||
 * those as well!
 | 
			
		||||
 *
 | 
			
		||||
 * @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories
 | 
			
		||||
 * @author Alan Orth for the International Livestock Research Institute
 | 
			
		||||
 * @version 6.1
 | 
			
		||||
 * @since 6.1
 | 
			
		||||
 * @see FixJpgJpgThumbnails
 | 
			
		||||
 */
 | 
			
		||||
public class FixLowQualityThumbnails {
 | 
			
		||||
    // note: static members belong to the class itself, not any one instance
 | 
			
		||||
    public static ItemService itemService = ContentServiceFactory.getInstance().getItemService();
 | 
			
		||||
    public static HandleService handleService =
 | 
			
		||||
            HandleServiceFactory.getInstance().getHandleService();
 | 
			
		||||
    public static BundleService bundleService =
 | 
			
		||||
            ContentServiceFactory.getInstance().getBundleService();
 | 
			
		||||
 | 
			
		||||
    public static void main(String[] args) {
 | 
			
		||||
        String parentHandle = null;
 | 
			
		||||
        if (args.length >= 1) {
 | 
			
		||||
            parentHandle = args[0];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Context context = null;
 | 
			
		||||
        try {
 | 
			
		||||
            context = new Context();
 | 
			
		||||
            context.turnOffAuthorisationSystem();
 | 
			
		||||
 | 
			
		||||
            if (StringUtils.isBlank(parentHandle)) {
 | 
			
		||||
                process(context, itemService.findAll(context));
 | 
			
		||||
            } else {
 | 
			
		||||
                DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
 | 
			
		||||
                if (parent != null) {
 | 
			
		||||
                    switch (parent.getType()) {
 | 
			
		||||
                        case Constants.COLLECTION:
 | 
			
		||||
                            process(
 | 
			
		||||
                                    context,
 | 
			
		||||
                                    itemService.findByCollection(context, (Collection) parent));
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.COMMUNITY:
 | 
			
		||||
                            List<Collection> collections = ((Community) parent).getCollections();
 | 
			
		||||
                            for (Collection collection : collections) {
 | 
			
		||||
                                process(
 | 
			
		||||
                                        context,
 | 
			
		||||
                                        itemService.findAllByCollection(context, collection));
 | 
			
		||||
                            }
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.SITE:
 | 
			
		||||
                            process(context, itemService.findAll(context));
 | 
			
		||||
                            break;
 | 
			
		||||
                        case Constants.ITEM:
 | 
			
		||||
                            processItem(context, (Item) parent);
 | 
			
		||||
                            context.commit();
 | 
			
		||||
                            break;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        } catch (SQLException | AuthorizeException | IOException e) {
 | 
			
		||||
            e.printStackTrace(System.err);
 | 
			
		||||
        } finally {
 | 
			
		||||
            if (context != null && context.isValid()) {
 | 
			
		||||
                context.abort();
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private static void process(Context context, Iterator<Item> items)
 | 
			
		||||
            throws SQLException, IOException, AuthorizeException {
 | 
			
		||||
        while (items.hasNext()) {
 | 
			
		||||
            Item item = items.next();
 | 
			
		||||
            processItem(context, item);
 | 
			
		||||
            itemService.update(context, item);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private static void processItem(Context context, Item item)
 | 
			
		||||
            throws SQLException, AuthorizeException, IOException {
 | 
			
		||||
        // Set some state for the item before we iterate over the THUMBNAIL bundle
 | 
			
		||||
        boolean itemHasImThumbnail = false;
 | 
			
		||||
 | 
			
		||||
        // Iterate over the THUMBNAIL bundle to first identify if this item has an "IM Thumbnail"
 | 
			
		||||
        List<Bundle> thumbnailBundles = item.getBundles("THUMBNAIL");
 | 
			
		||||
        for (Bundle thumbnailBundle : thumbnailBundles) {
 | 
			
		||||
            List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
 | 
			
		||||
            for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
			
		||||
                String thumbnailDescription = thumbnailBitstream.getDescription();
 | 
			
		||||
 | 
			
		||||
                if (StringUtils.isEmpty(thumbnailDescription)) {
 | 
			
		||||
                    continue;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                // Check if this item has a bitstream in the THUMBNAIL bundle with description "IM
 | 
			
		||||
                // Thumbnail", but only if we haven't already seen one in another iteration for this
 | 
			
		||||
                // bundle.
 | 
			
		||||
                if (!itemHasImThumbnail && "IM Thumbnail".equals(thumbnailDescription)) {
 | 
			
		||||
                    itemHasImThumbnail = true;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // If this item has an IM Thumbnail we can be reasonably sure that there is a PDF
 | 
			
		||||
            // in the ORIGINAL bundle and we don't need any other thumbnails.
 | 
			
		||||
            if (itemHasImThumbnail) {
 | 
			
		||||
                // Iterate over the bitstreams in the THUMBNAIL bundle again.
 | 
			
		||||
                for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
 | 
			
		||||
                    String thumbnailName = thumbnailBitstream.getName();
 | 
			
		||||
                    String thumbnailDescription = thumbnailBitstream.getDescription();
 | 
			
		||||
 | 
			
		||||
                    if (StringUtils.isEmpty(thumbnailDescription)) {
 | 
			
		||||
                        continue;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    // If this item has a "Generated Thumbnail" we can remove it, because those
 | 
			
		||||
                    // typically come from other JPEGs in the ORIGINAL bundle and we would prefer
 | 
			
		||||
                    // the IM Thumbnail generated from a PDF anyway. The DSpace-generated descri-
 | 
			
		||||
                    // ption will *always* be "Generated Thumbnail".
 | 
			
		||||
                    if ("Generated Thumbnail".equals(thumbnailDescription)) {
 | 
			
		||||
                        System.out.print("\u001b[33m");
 | 
			
		||||
                        System.out.println("Deleting (" + item.getHandle() + "):");
 | 
			
		||||
                        System.out.println("> Name: »" + thumbnailName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + thumbnailDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
 | 
			
		||||
                        // Remove the "Generated Thumbnail" bitstream from the THUMBNAIL bundle
 | 
			
		||||
                        thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
			
		||||
 | 
			
		||||
                        // If this item has a bitstream with the word "thumbnail" in it then we can
 | 
			
		||||
                        // remove it because we already know this item has an IM Thumbnail and we
 | 
			
		||||
                        // prefer that one.
 | 
			
		||||
                    } else if (thumbnailDescription.toLowerCase().contains("thumbnail")
 | 
			
		||||
                            && !"IM Thumbnail".equals(thumbnailDescription)) {
 | 
			
		||||
                        System.out.print("\u001b[33m");
 | 
			
		||||
                        System.out.println("Deleting (" + item.getHandle() + "):");
 | 
			
		||||
                        System.out.println("> Name: »" + thumbnailName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + thumbnailDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
 | 
			
		||||
                        // Remove the "thumbnail" bitstream from the THUMBNAIL bundle
 | 
			
		||||
                        thumbnailBundle.removeBitstream(thumbnailBitstream);
 | 
			
		||||
 | 
			
		||||
                        // Otherwise skip it because it might be something uploaded manually, like
 | 
			
		||||
                        // a thumbnail for a journal or a limited access item.
 | 
			
		||||
                    } else {
 | 
			
		||||
                        System.out.print("\u001b[34m");
 | 
			
		||||
                        System.out.println("Skipping (" + item.getHandle() + "):");
 | 
			
		||||
                        System.out.println("> Name: »" + thumbnailName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + thumbnailDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    // Print a blank line
 | 
			
		||||
                    System.out.println();
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Set some state before we iterate over the ORIGINAL bundle
 | 
			
		||||
        boolean itemHasOriginalPdfBitstream = false;
 | 
			
		||||
        boolean itemHasOriginalJpegBitstream = false;
 | 
			
		||||
 | 
			
		||||
        // Iterate over the ORIGINAL bundle to delete manually-uploaded JPEG
 | 
			
		||||
        // bitstreams labeled "Thumbnail" whenever we have a PDF because they
 | 
			
		||||
        // don't belong in the ORIGINAL bundle and DSpace will automatically
 | 
			
		||||
        // create a better thumbnail from the PDF anyway.
 | 
			
		||||
        List<Bundle> originalBundles = item.getBundles("ORIGINAL");
 | 
			
		||||
        for (Bundle originalBundle : originalBundles) {
 | 
			
		||||
            List<Bitstream> originalBundleBitstreams = originalBundle.getBitstreams();
 | 
			
		||||
            for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
			
		||||
                String originalFormat = originalBitstream.getFormat(context).getMIMEType();
 | 
			
		||||
 | 
			
		||||
                // Check if this item has a PDF bitstream in the ORIGINAL bundle,
 | 
			
		||||
                // but only if we haven't already seen one in another iteration
 | 
			
		||||
                // for this bundle. DSpace will return "format application/pdf"
 | 
			
		||||
                // for the MIME type.
 | 
			
		||||
                if (!itemHasOriginalPdfBitstream && originalFormat.contains("application/pdf")) {
 | 
			
		||||
                    itemHasOriginalPdfBitstream = true;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                // Check if this item has a JPEG bitstream in the ORIGINAL bundle,
 | 
			
		||||
                // but only if we haven't already seen one in another iteration
 | 
			
		||||
                // for this bundle. DSpace will return "format image/jpeg" for
 | 
			
		||||
                // the MIME type.
 | 
			
		||||
                if (!itemHasOriginalJpegBitstream && originalFormat.contains("image/jpeg")) {
 | 
			
		||||
                    itemHasOriginalJpegBitstream = true;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // Check if we found a PDF *and* a JPEG in this item's ORIGINAL
 | 
			
		||||
            // bundle.
 | 
			
		||||
            if (itemHasOriginalPdfBitstream && itemHasOriginalJpegBitstream) {
 | 
			
		||||
                // Yes! Now iterate over the bitstreams in the ORIGINAL bundle
 | 
			
		||||
                // again to see if the JPEG is a manually uploaded "Thumbnail"
 | 
			
		||||
                for (Bitstream originalBitstream : originalBundleBitstreams) {
 | 
			
		||||
                    String originalName = originalBitstream.getName();
 | 
			
		||||
                    String originalDescription = originalBitstream.getDescription();
 | 
			
		||||
                    String originalFormat = originalBitstream.getFormat(context).getMIMEType();
 | 
			
		||||
 | 
			
		||||
                    if (StringUtils.isEmpty(originalDescription)) {
 | 
			
		||||
                        continue;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    /*
 | 
			
		||||
                    - check if the bitstream is a JPEG based on its MIME Type
 | 
			
		||||
                    - check if the bitstream's name or description is "Thumbnail"
 | 
			
		||||
                    */
 | 
			
		||||
                    if (originalFormat.toLowerCase().contains("image/jpeg")
 | 
			
		||||
                            && (originalName.toLowerCase().contains("thumbnail")
 | 
			
		||||
                                    || originalDescription.toLowerCase().contains("thumbnail"))) {
 | 
			
		||||
                        System.out.print("\u001b[33m");
 | 
			
		||||
                        System.out.println("Removing (" + item.getHandle() + "):");
 | 
			
		||||
                        System.out.println("> Name: »" + originalName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + originalDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
 | 
			
		||||
                        // Remove the original bitstream from the ORIGINAL bundle
 | 
			
		||||
                        originalBundle.removeBitstream(originalBitstream);
 | 
			
		||||
 | 
			
		||||
                    } else {
 | 
			
		||||
                        System.out.print("\u001b[34m");
 | 
			
		||||
                        System.out.println("Skipping (" + item.getHandle() + "):");
 | 
			
		||||
                        System.out.println("> Name: »" + originalName + "«");
 | 
			
		||||
                        System.out.println("> Description: »" + originalDescription + "«");
 | 
			
		||||
                        System.out.print("\u001b[0m");
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    // Print a blank line
 | 
			
		||||
                    System.out.println();
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										42
									
								
								src/main/java/io/github/ilri/cgspace/scripts/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								src/main/java/io/github/ilri/cgspace/scripts/README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,42 @@
 | 
			
		||||
# Scripts
 | 
			
		||||
Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
 | 
			
		||||
 | 
			
		||||
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
 | 
			
		||||
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
 | 
			
		||||
 | 
			
		||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC6x/Curation+System).
 | 
			
		||||
 | 
			
		||||
## Build and Install
 | 
			
		||||
 | 
			
		||||
### Integrate into DSpace Build
 | 
			
		||||
To use these curation tasks in a DSpace project add the following dependency to `dspace/modules/additions/pom.xml`:
 | 
			
		||||
 | 
			
		||||
```xml
 | 
			
		||||
<dependency>
 | 
			
		||||
  <groupId>io.github.ilri.cgspace</groupId>
 | 
			
		||||
  <artifactId>cgspace-java-helpers</artifactId>
 | 
			
		||||
  <version>6.1-SNAPSHOT</version>
 | 
			
		||||
</dependency>
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The jar will be copied to all DSpace applications.
 | 
			
		||||
 | 
			
		||||
### Manual Build and Install
 | 
			
		||||
To build the standalone jar:
 | 
			
		||||
 | 
			
		||||
```console
 | 
			
		||||
$ mvn package
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Copy the resulting jar to the DSpace `lib` directory:
 | 
			
		||||
 | 
			
		||||
```console
 | 
			
		||||
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Invocation
 | 
			
		||||
The scripts take only one argument, which is a community, collection, or item:
 | 
			
		||||
 | 
			
		||||
```console
 | 
			
		||||
$ dspace dsrun io.github.ilri.cgspace.scripts.FixJpgJpgThumbnails 10568/83389
 | 
			
		||||
```
 | 
			
		||||
		Reference in New Issue
	
	Block a user