mirror of
https://github.com/ilri/cgspace-java-helpers.git
synced 2024-11-28 09:38:21 +01:00
Compare commits
10 Commits
3aa1503163
...
f0754ab419
Author | SHA1 | Date | |
---|---|---|---|
f0754ab419 | |||
6772145bec | |||
b31557aa05 | |||
095f843067 | |||
f7fda9922f | |||
83a416afaf | |||
922e3892a7 | |||
6b648c2c85 | |||
781ddcd931 | |||
49cb8e3468 |
6
.gitignore
vendored
6
.gitignore
vendored
@ -70,4 +70,10 @@ fabric.properties
|
|||||||
# Android studio 3.1+ serialized cache file
|
# Android studio 3.1+ serialized cache file
|
||||||
.idea/caches/build_file_checksums.ser
|
.idea/caches/build_file_checksums.ser
|
||||||
|
|
||||||
|
# VS Code settings
|
||||||
|
.vscode
|
||||||
|
|
||||||
|
# asdf-vm tool-versions file
|
||||||
|
.tool-versions
|
||||||
|
|
||||||
target/
|
target/
|
||||||
|
15
CHANGELOG.md
Normal file
15
CHANGELOG.md
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# Changelog
|
||||||
|
All notable changes to this project will be documented in this file.
|
||||||
|
|
||||||
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
### Updated
|
||||||
|
- Update dependencies in `pom.xml`
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Java compiler and target from JDK 7 to JDK 8
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- New `FixLowQualityThumbnails` script to detect and remove more low-quality thumbnails
|
@ -2,7 +2,8 @@
|
|||||||
DSpace curation tasks and other Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
|
DSpace curation tasks and other Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
|
||||||
|
|
||||||
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
|
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
|
||||||
- **FixJpgJpgThumbnails**: Fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
|
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
|
||||||
|
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
|
||||||
|
|
||||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
|
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
|
||||||
|
|
||||||
@ -24,13 +25,13 @@ The jar will be copied to all DSpace applications.
|
|||||||
### Manual Build and Install
|
### Manual Build and Install
|
||||||
To build the standalone jar:
|
To build the standalone jar:
|
||||||
|
|
||||||
```
|
```console
|
||||||
$ mvn package
|
$ mvn package
|
||||||
```
|
```
|
||||||
|
|
||||||
Copy the resulting jar to the DSpace `lib` directory:
|
Copy the resulting jar to the DSpace `lib` directory:
|
||||||
|
|
||||||
```
|
```console
|
||||||
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
|
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -40,7 +41,7 @@ Please refer to the appropriate README.md file:
|
|||||||
- Curation Tasks: [src/main/java/io/github/ilri/cgspace/ctasks/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace6/src/main/java/io/github/ilri/cgspace/ctasks/README.md)
|
- Curation Tasks: [src/main/java/io/github/ilri/cgspace/ctasks/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace6/src/main/java/io/github/ilri/cgspace/ctasks/README.md)
|
||||||
- Scripts: [src/main/java/io/github/ilri/cgspace/scripts/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace6/src/main/java/io/github/ilri/cgspace/scripts/README.md)
|
- Scripts: [src/main/java/io/github/ilri/cgspace/scripts/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace6/src/main/java/io/github/ilri/cgspace/scripts/README.md)
|
||||||
|
|
||||||
## Todo
|
## TODO
|
||||||
|
|
||||||
- Add a curation task to normalize DOIs to "https://doi.org" format
|
- Add a curation task to normalize DOIs to "https://doi.org" format
|
||||||
|
|
||||||
|
30
pom.xml
30
pom.xml
@ -28,21 +28,15 @@
|
|||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<maven.compiler.source>1.7</maven.compiler.source>
|
<maven.compiler.source>1.8</maven.compiler.source>
|
||||||
<maven.compiler.target>1.7</maven.compiler.target>
|
<maven.compiler.target>1.8</maven.compiler.target>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
|
||||||
<groupId>junit</groupId>
|
|
||||||
<artifactId>junit</artifactId>
|
|
||||||
<version>4.11</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.code.gson</groupId>
|
<groupId>com.google.code.gson</groupId>
|
||||||
<artifactId>gson</artifactId>
|
<artifactId>gson</artifactId>
|
||||||
<version>2.6.1</version>
|
<version>2.9.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.dspace</groupId>
|
<groupId>org.dspace</groupId>
|
||||||
@ -75,41 +69,41 @@
|
|||||||
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
|
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-clean-plugin</artifactId>
|
<artifactId>maven-clean-plugin</artifactId>
|
||||||
<version>3.1.0</version>
|
<version>3.2.0</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
|
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-resources-plugin</artifactId>
|
<artifactId>maven-resources-plugin</artifactId>
|
||||||
<version>3.0.2</version>
|
<version>3.3.0</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-compiler-plugin</artifactId>
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
<version>3.8.0</version>
|
<version>3.10.1</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
<version>2.22.1</version>
|
<version>3.0.0-M7</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-jar-plugin</artifactId>
|
<artifactId>maven-jar-plugin</artifactId>
|
||||||
<version>3.0.2</version>
|
<version>3.3.0</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-install-plugin</artifactId>
|
<artifactId>maven-install-plugin</artifactId>
|
||||||
<version>2.5.2</version>
|
<version>3.0.1</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-deploy-plugin</artifactId>
|
<artifactId>maven-deploy-plugin</artifactId>
|
||||||
<version>2.8.2</version>
|
<version>3.3.0</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
|
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-site-plugin</artifactId>
|
<artifactId>maven-site-plugin</artifactId>
|
||||||
<version>3.7.1</version>
|
<version>3.12.1</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<artifactId>maven-project-info-reports-plugin</artifactId>
|
<artifactId>maven-project-info-reports-plugin</artifactId>
|
||||||
<version>3.0.0</version>
|
<version>3.4.1</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</pluginManagement>
|
</pluginManagement>
|
||||||
|
@ -1,19 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
DSpace Curation Tasks
|
* Copyright (C) 2020 Alan Orth
|
||||||
Copyright (C) 2020 Alan Orth
|
*
|
||||||
|
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package io.github.ilri.cgspace.ctasks;
|
package io.github.ilri.cgspace.ctasks;
|
||||||
|
@ -1,19 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
DSpace Curation Tasks
|
* Copyright (C) 2020 Alan Orth
|
||||||
Copyright (C) 2020 Alan Orth
|
*
|
||||||
|
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package io.github.ilri.cgspace.ctasks;
|
package io.github.ilri.cgspace.ctasks;
|
||||||
|
@ -1,19 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
DSpace Curation Tasks
|
* Copyright (C) 2020 Alan Orth
|
||||||
Copyright (C) 2020 Alan Orth
|
*
|
||||||
|
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package io.github.ilri.cgspace.ctasks;
|
package io.github.ilri.cgspace.ctasks;
|
||||||
|
@ -1,19 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
DSpace Curation Tasks
|
* Copyright (C) 2020 Alan Orth
|
||||||
Copyright (C) 2020 Alan Orth
|
*
|
||||||
|
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package io.github.ilri.cgspace.ctasks;
|
package io.github.ilri.cgspace.ctasks;
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2020 Alan Orth
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
package io.github.ilri.cgspace.scripts;
|
package io.github.ilri.cgspace.scripts;
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
@ -0,0 +1,270 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2022 Alan Orth
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.github.ilri.cgspace.scripts;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.dspace.authorize.AuthorizeException;
|
||||||
|
import org.dspace.content.*;
|
||||||
|
import org.dspace.content.factory.ContentServiceFactory;
|
||||||
|
import org.dspace.content.service.BundleService;
|
||||||
|
import org.dspace.content.service.ItemService;
|
||||||
|
import org.dspace.core.Constants;
|
||||||
|
import org.dspace.core.Context;
|
||||||
|
import org.dspace.handle.factory.HandleServiceFactory;
|
||||||
|
import org.dspace.handle.service.HandleService;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fix low-quality thumbnails in a DSpace repository.
|
||||||
|
*
|
||||||
|
* <p>Search the DSpace repository for items containing bitstreams matching the following criteria:
|
||||||
|
*
|
||||||
|
* <ul>
|
||||||
|
* <li>If an item has an <code>IM Thumbnail</code> and a <code>Generated Thumbnail</code> in the
|
||||||
|
* <code>THUMBNAIL</code> bundle, remove the <code>Generated Thumbnail</code>.
|
||||||
|
* <li>If an item has a PDF bitstream and a JPEG bitstream with description "thumbnail" in the
|
||||||
|
* <code>ORIGINAL</code> bundle, remove the "thumbnail" bitstream in the ORIGINAL bundle.
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* <p>The general idea is that we should always prefer thumbnails generated from PDFs by ImageMagick
|
||||||
|
* to manually uploaded JPEGs because ImageMagick Thumbnails can be regenerated with higher quality,
|
||||||
|
* resolution, etc. Furthermore, if there are JPEG bitstreams in the ORIGINAL bundle DSpace will
|
||||||
|
* automatically create ".jpg.jpg" thumbnails from them in the THUMBNAIL bundle so we should remove
|
||||||
|
* those as well!
|
||||||
|
*
|
||||||
|
* @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories
|
||||||
|
* @author Alan Orth for the International Livestock Research Institute
|
||||||
|
* @version 6.1
|
||||||
|
* @since 6.1
|
||||||
|
* @see FixJpgJpgThumbnails
|
||||||
|
*/
|
||||||
|
public class FixLowQualityThumbnails {
|
||||||
|
// note: static members belong to the class itself, not any one instance
|
||||||
|
public static ItemService itemService = ContentServiceFactory.getInstance().getItemService();
|
||||||
|
public static HandleService handleService =
|
||||||
|
HandleServiceFactory.getInstance().getHandleService();
|
||||||
|
public static BundleService bundleService =
|
||||||
|
ContentServiceFactory.getInstance().getBundleService();
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
String parentHandle = null;
|
||||||
|
if (args.length >= 1) {
|
||||||
|
parentHandle = args[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
Context context = null;
|
||||||
|
try {
|
||||||
|
context = new Context();
|
||||||
|
context.turnOffAuthorisationSystem();
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(parentHandle)) {
|
||||||
|
process(context, itemService.findAll(context));
|
||||||
|
} else {
|
||||||
|
DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
|
||||||
|
if (parent != null) {
|
||||||
|
switch (parent.getType()) {
|
||||||
|
case Constants.COLLECTION:
|
||||||
|
process(
|
||||||
|
context,
|
||||||
|
itemService.findByCollection(context, (Collection) parent));
|
||||||
|
break;
|
||||||
|
case Constants.COMMUNITY:
|
||||||
|
List<Collection> collections = ((Community) parent).getCollections();
|
||||||
|
for (Collection collection : collections) {
|
||||||
|
process(
|
||||||
|
context,
|
||||||
|
itemService.findAllByCollection(context, collection));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Constants.SITE:
|
||||||
|
process(context, itemService.findAll(context));
|
||||||
|
break;
|
||||||
|
case Constants.ITEM:
|
||||||
|
processItem(context, (Item) parent);
|
||||||
|
context.commit();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (SQLException | AuthorizeException | IOException e) {
|
||||||
|
e.printStackTrace(System.err);
|
||||||
|
} finally {
|
||||||
|
if (context != null && context.isValid()) {
|
||||||
|
context.abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void process(Context context, Iterator<Item> items)
|
||||||
|
throws SQLException, IOException, AuthorizeException {
|
||||||
|
while (items.hasNext()) {
|
||||||
|
Item item = items.next();
|
||||||
|
processItem(context, item);
|
||||||
|
itemService.update(context, item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void processItem(Context context, Item item)
|
||||||
|
throws SQLException, AuthorizeException, IOException {
|
||||||
|
// Set some state for the item before we iterate over the THUMBNAIL bundle
|
||||||
|
boolean itemHasImThumbnail = false;
|
||||||
|
|
||||||
|
// Iterate over the THUMBNAIL bundle to first identify if this item has an "IM Thumbnail"
|
||||||
|
List<Bundle> thumbnailBundles = item.getBundles("THUMBNAIL");
|
||||||
|
for (Bundle thumbnailBundle : thumbnailBundles) {
|
||||||
|
List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
|
||||||
|
for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
|
||||||
|
String thumbnailDescription = thumbnailBitstream.getDescription();
|
||||||
|
|
||||||
|
if (StringUtils.isEmpty(thumbnailDescription)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this item has a bitstream in the THUMBNAIL bundle with description "IM
|
||||||
|
// Thumbnail", but only if we haven't already seen one in another iteration for this
|
||||||
|
// bundle.
|
||||||
|
if (!itemHasImThumbnail && "IM Thumbnail".equals(thumbnailDescription)) {
|
||||||
|
itemHasImThumbnail = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this item has an IM Thumbnail we can be reasonably sure that there is a PDF
|
||||||
|
// in the ORIGINAL bundle and we don't need any other thumbnails.
|
||||||
|
if (itemHasImThumbnail) {
|
||||||
|
// Iterate over the bitstreams in the THUMBNAIL bundle again.
|
||||||
|
for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
|
||||||
|
String thumbnailName = thumbnailBitstream.getName();
|
||||||
|
String thumbnailDescription = thumbnailBitstream.getDescription();
|
||||||
|
|
||||||
|
if (StringUtils.isEmpty(thumbnailDescription)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this item has a "Generated Thumbnail" we can remove it, because those
|
||||||
|
// typically come from other JPEGs in the ORIGINAL bundle and we would prefer
|
||||||
|
// the IM Thumbnail generated from a PDF anyway. The DSpace-generated descri-
|
||||||
|
// ption will *always* be "Generated Thumbnail".
|
||||||
|
if ("Generated Thumbnail".equals(thumbnailDescription)) {
|
||||||
|
System.out.print("\u001b[33m");
|
||||||
|
System.out.println("Deleting (" + item.getHandle() + "):");
|
||||||
|
System.out.println("> Name: »" + thumbnailName + "«");
|
||||||
|
System.out.println("> Description: »" + thumbnailDescription + "«");
|
||||||
|
System.out.print("\u001b[0m");
|
||||||
|
|
||||||
|
// Remove the "Generated Thumbnail" bitstream from the THUMBNAIL bundle
|
||||||
|
thumbnailBundle.removeBitstream(thumbnailBitstream);
|
||||||
|
|
||||||
|
// If this item has a bitstream with the word "thumbnail" in it then we can
|
||||||
|
// remove it because we already know this item has an IM Thumbnail and we
|
||||||
|
// prefer that one.
|
||||||
|
} else if (thumbnailDescription.toLowerCase().contains("thumbnail")
|
||||||
|
&& !"IM Thumbnail".equals(thumbnailDescription)) {
|
||||||
|
System.out.print("\u001b[33m");
|
||||||
|
System.out.println("Deleting (" + item.getHandle() + "):");
|
||||||
|
System.out.println("> Name: »" + thumbnailName + "«");
|
||||||
|
System.out.println("> Description: »" + thumbnailDescription + "«");
|
||||||
|
System.out.print("\u001b[0m");
|
||||||
|
|
||||||
|
// Remove the "thumbnail" bitstream from the THUMBNAIL bundle
|
||||||
|
thumbnailBundle.removeBitstream(thumbnailBitstream);
|
||||||
|
|
||||||
|
// Otherwise skip it because it might be something uploaded manually, like
|
||||||
|
// a thumbnail for a journal or a limited access item.
|
||||||
|
} else {
|
||||||
|
System.out.print("\u001b[34m");
|
||||||
|
System.out.println("Skipping (" + item.getHandle() + "):");
|
||||||
|
System.out.println("> Name: »" + thumbnailName + "«");
|
||||||
|
System.out.println("> Description: »" + thumbnailDescription + "«");
|
||||||
|
System.out.print("\u001b[0m");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print a blank line
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set some state before we iterate over the ORIGINAL bundle
|
||||||
|
boolean itemHasOriginalPdfBitstream = false;
|
||||||
|
boolean itemHasOriginalJpegBitstream = false;
|
||||||
|
|
||||||
|
// Iterate over the ORIGINAL bundle to delete manually-uploaded JPEG
|
||||||
|
// bitstreams labeled "Thumbnail" whenever we have a PDF because they
|
||||||
|
// don't belong in the ORIGINAL bundle and DSpace will automatically
|
||||||
|
// create a better thumbnail from the PDF anyway.
|
||||||
|
List<Bundle> originalBundles = item.getBundles("ORIGINAL");
|
||||||
|
for (Bundle originalBundle : originalBundles) {
|
||||||
|
List<Bitstream> originalBundleBitstreams = originalBundle.getBitstreams();
|
||||||
|
for (Bitstream originalBitstream : originalBundleBitstreams) {
|
||||||
|
String originalFormat = originalBitstream.getFormat(context).getMIMEType();
|
||||||
|
|
||||||
|
// Check if this item has a PDF bitstream in the ORIGINAL bundle,
|
||||||
|
// but only if we haven't already seen one in another iteration
|
||||||
|
// for this bundle. DSpace will return "format application/pdf"
|
||||||
|
// for the MIME type.
|
||||||
|
if (!itemHasOriginalPdfBitstream && originalFormat.contains("application/pdf")) {
|
||||||
|
itemHasOriginalPdfBitstream = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this item has a JPEG bitstream in the ORIGINAL bundle,
|
||||||
|
// but only if we haven't already seen one in another iteration
|
||||||
|
// for this bundle. DSpace will return "format image/jpeg" for
|
||||||
|
// the MIME type.
|
||||||
|
if (!itemHasOriginalJpegBitstream && originalFormat.contains("image/jpeg")) {
|
||||||
|
itemHasOriginalJpegBitstream = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we found a PDF *and* a JPEG in this item's ORIGINAL
|
||||||
|
// bundle.
|
||||||
|
if (itemHasOriginalPdfBitstream && itemHasOriginalJpegBitstream) {
|
||||||
|
// Yes! Now iterate over the bitstreams in the ORIGINAL bundle
|
||||||
|
// again to see if the JPEG is a manually uploaded "Thumbnail"
|
||||||
|
for (Bitstream originalBitstream : originalBundleBitstreams) {
|
||||||
|
String originalName = originalBitstream.getName();
|
||||||
|
String originalDescription = originalBitstream.getDescription();
|
||||||
|
String originalFormat = originalBitstream.getFormat(context).getMIMEType();
|
||||||
|
|
||||||
|
if (StringUtils.isEmpty(originalDescription)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
- check if the bitstream is a JPEG based on its MIME Type
|
||||||
|
- check if the bitstream's name or description is "Thumbnail"
|
||||||
|
*/
|
||||||
|
if (originalFormat.toLowerCase().contains("image/jpeg")
|
||||||
|
&& (originalName.toLowerCase().contains("thumbnail")
|
||||||
|
|| originalDescription.toLowerCase().contains("thumbnail"))) {
|
||||||
|
System.out.print("\u001b[33m");
|
||||||
|
System.out.println("Removing (" + item.getHandle() + "):");
|
||||||
|
System.out.println("> Name: »" + originalName + "«");
|
||||||
|
System.out.println("> Description: »" + originalDescription + "«");
|
||||||
|
System.out.print("\u001b[0m");
|
||||||
|
|
||||||
|
// Remove the original bitstream from the ORIGINAL bundle
|
||||||
|
originalBundle.removeBitstream(originalBitstream);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
System.out.print("\u001b[34m");
|
||||||
|
System.out.println("Skipping (" + item.getHandle() + "):");
|
||||||
|
System.out.println("> Name: »" + originalName + "«");
|
||||||
|
System.out.println("> Description: »" + originalDescription + "«");
|
||||||
|
System.out.print("\u001b[0m");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print a blank line
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,16 +1,17 @@
|
|||||||
# Scripts
|
# Scripts
|
||||||
Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
|
Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutional repository:
|
||||||
|
|
||||||
- **FixJpgJpgThumbnails**: Fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
|
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
|
||||||
|
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
|
||||||
|
|
||||||
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC5x/Curation+System).
|
Tested on DSpace 6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC6x/Curation+System).
|
||||||
|
|
||||||
## Build and Install
|
## Build and Install
|
||||||
|
|
||||||
### Integrate into DSpace Build
|
### Integrate into DSpace Build
|
||||||
To use these curation tasks in a DSpace project add the following dependency to `dspace/modules/additions/pom.xml`:
|
To use these curation tasks in a DSpace project add the following dependency to `dspace/modules/additions/pom.xml`:
|
||||||
|
|
||||||
```
|
```xml
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>io.github.ilri.cgspace</groupId>
|
<groupId>io.github.ilri.cgspace</groupId>
|
||||||
<artifactId>cgspace-java-helpers</artifactId>
|
<artifactId>cgspace-java-helpers</artifactId>
|
||||||
@ -23,19 +24,19 @@ The jar will be copied to all DSpace applications.
|
|||||||
### Manual Build and Install
|
### Manual Build and Install
|
||||||
To build the standalone jar:
|
To build the standalone jar:
|
||||||
|
|
||||||
```
|
```console
|
||||||
$ mvn package
|
$ mvn package
|
||||||
```
|
```
|
||||||
|
|
||||||
Copy the resulting jar to the DSpace `lib` directory:
|
Copy the resulting jar to the DSpace `lib` directory:
|
||||||
|
|
||||||
```
|
```console
|
||||||
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
|
$ cp target/cgspace-java-helpers-6.1-SNAPSHOT.jar ~/dspace/lib/
|
||||||
```
|
```
|
||||||
|
|
||||||
## Invocation
|
## Invocation
|
||||||
The script only takes one argument, which is a community, collection, or item:
|
The scripts take only one argument, which is a community, collection, or item:
|
||||||
|
|
||||||
```
|
```console
|
||||||
$ dspace dsrun io.github.ilri.cgspace.scripts.FixJpgJpgThumbnails 10568/83389
|
$ dspace dsrun io.github.ilri.cgspace.scripts.FixJpgJpgThumbnails 10568/83389
|
||||||
```
|
```
|
||||||
|
Loading…
Reference in New Issue
Block a user