1 Commits

Author SHA1 Message Date
10b8058e57 Add new RemoveGeneratedThumbnails script 2025-11-05 10:21:46 +03:00
4 changed files with 154 additions and 0 deletions

View File

@@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased ## Unreleased
### Added
- New `RemoveGeneratedThumbnails` script
### Updated ### Updated
- Update dspace-api dependency to 7.6.3 - Update dspace-api dependency to 7.6.3
- Update gson dependency to 2.11.0 to match dspace-api - Update gson dependency to 2.11.0 to match dspace-api

View File

@@ -4,6 +4,7 @@ DSpace curation tasks and other Java-based helpers used on the [CGSpace](https:/
- **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata - **CountryCodeTagger**: add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals - **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present - **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
- **RemoveGeneratedThumbnails**: remove generated thumbnails (in preparation for re-generating)
- **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format - **NormalizeDOIs**: normalize DOIs by stripping whitespace, lowercasing, and converting to https://doi.org/ format
Tested on DSpace 7.6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System). Tested on DSpace 7.6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System).

View File

@@ -3,6 +3,7 @@ Java-based helpers used on the [CGSpace](https://cgspace.cgiar.org) institutiona
- **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals - **FixJpgJpgThumbnails**: fix low-quality ".jpg.jpg" thumbnails by replacing them with their originals
- **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present - **FixLowQualityThumbnails**: remove low-quality thumbnails when PDF bitstreams are present
- **RemoveGeneratedThumbnails**: remove generated thumbnails (in preparation for re-generating)
Tested on DSpace 7.6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System). Tested on DSpace 7.6.3. Read more about the [DSpace curation system](https://wiki.lyrasis.org/display/DSDOC7x/Curation+System).

View File

@@ -0,0 +1,149 @@
/*
* Copyright (C) 2025 Alan Orth
*
* SPDX-License-Identifier: GPL-3.0-only
*/
package io.github.ilri.cgspace.scripts;
import org.apache.commons.lang.StringUtils;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.Bundle;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.BundleService;
import org.dspace.content.service.ItemService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;
import java.io.IOException;
import java.sql.SQLException;
import java.util.Iterator;
import java.util.List;
/**
* @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories
* @author Alan Orth for the International Livestock Research Institute
* @version 7.6.1.4
* @since 7.6.1.4
*/
public class RemoveGeneratedThumbnails {
// note: static members belong to the class itself, not any one instance
public static ItemService itemService = ContentServiceFactory.getInstance().getItemService();
public static HandleService handleService =
HandleServiceFactory.getInstance().getHandleService();
public static BundleService bundleService =
ContentServiceFactory.getInstance().getBundleService();
public static void main(String[] args) {
String parentHandle = null;
if (args.length >= 1) {
parentHandle = args[0];
}
Context context = null;
try {
context = new Context();
context.turnOffAuthorisationSystem();
if (StringUtils.isBlank(parentHandle)) {
process(context, itemService.findAll(context));
} else {
DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
if (parent != null) {
switch (parent.getType()) {
case Constants.SITE:
process(context, itemService.findAll(context));
context.commit();
break;
case Constants.COMMUNITY:
List<Collection> collections = ((Community) parent).getCollections();
for (Collection collection : collections) {
process(
context,
itemService.findAllByCollection(context, collection));
}
context.commit();
break;
case Constants.COLLECTION:
process(
context,
itemService.findByCollection(context, (Collection) parent));
context.commit();
break;
case Constants.ITEM:
processItem(context, (Item) parent);
context.commit();
break;
}
}
}
} catch (SQLException | AuthorizeException | IOException e) {
e.printStackTrace(System.err);
} finally {
if (context != null && context.isValid()) {
context.abort();
}
}
}
private static void process(Context context, Iterator<Item> items)
throws SQLException, IOException, AuthorizeException {
while (items.hasNext()) {
Item item = items.next();
processItem(context, item);
itemService.update(context, item);
}
}
private static void processItem(Context context, Item item)
throws SQLException, AuthorizeException, IOException {
List<Bundle> thumbnailBundles = item.getBundles("THUMBNAIL");
for (Bundle thumbnailBundle : thumbnailBundles) {
List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
String thumbnailName = thumbnailBitstream.getName();
String thumbnailDescription = thumbnailBitstream.getDescription();
// There is no point continuing if the thumbnail's description is empty or null
if (StringUtils.isEmpty(thumbnailDescription)) {
continue;
}
if (thumbnailName.toLowerCase().endsWith(".pdf.jpg")) {
List<Bundle> originalBundles = item.getBundles("ORIGINAL");
for (Bundle originalBundle : originalBundles) {
List<Bitstream> originalBundleBitstreams = originalBundle.getBitstreams();
for (Bitstream originalBitstream : originalBundleBitstreams) {
String originalName = originalBitstream.getName();
/*
- check if the original file name is the same as the thumbnail name minus the extra ".jpg"
- check if the thumbnail description indicates it was automatically generated
*/
if (originalName.equalsIgnoreCase(
StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg"))
&& ("Generated Thumbnail".equals(thumbnailDescription)
|| "IM Thumbnail".equals(thumbnailDescription))) {
System.out.println(
item.getHandle()
+ ": removing "
+ thumbnailName);
thumbnailBundle.removeBitstream(thumbnailBitstream);
}
}
}
}
}
}
}
}