From da1ecad2388b55d1c4ebd4c14fd5e36cd4cd2640 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Fri, 7 Aug 2020 21:49:48 +0300 Subject: [PATCH] src/main/java: DSpace 6 port of FixJpgJpgThumbnails.java Need to use the new DSpace 6 service model in most places. Not sure why addBitstream is no longer public, but removeBitstream is... --- .../cgspace/scripts/FixJpgJpgThumbnails.java | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 src/main/java/io/github/ilri/cgspace/scripts/FixJpgJpgThumbnails.java diff --git a/src/main/java/io/github/ilri/cgspace/scripts/FixJpgJpgThumbnails.java b/src/main/java/io/github/ilri/cgspace/scripts/FixJpgJpgThumbnails.java new file mode 100644 index 0000000..ff77ed9 --- /dev/null +++ b/src/main/java/io/github/ilri/cgspace/scripts/FixJpgJpgThumbnails.java @@ -0,0 +1,141 @@ +package io.github.ilri.cgspace.scripts; + +import org.apache.commons.lang.StringUtils; +import org.dspace.authorize.AuthorizeException; +import org.dspace.content.*; +import org.dspace.core.Constants; +import org.dspace.core.Context; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.ItemService; +import org.dspace.handle.factory.HandleServiceFactory; +import org.dspace.handle.service.HandleService; +import org.dspace.content.service.BundleService; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Iterator; +import java.util.List; + +/** + * @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories + * @author Alan Orth for the International Livestock Research Institute + * @version 6.0 + * @since 5.1 + */ +public class FixJpgJpgThumbnails { + //note: static members belong to the class itself, not any one instance + public static ItemService itemService = ContentServiceFactory.getInstance().getItemService(); + public static HandleService handleService = HandleServiceFactory.getInstance().getHandleService(); + public static BundleService bundleService = ContentServiceFactory.getInstance().getBundleService(); + + public static void main(String[] args) { + String parentHandle = null; + if (args.length >= 1) { + parentHandle = args[0]; + } + + System.out.println("STFUUUU"); + Context context = null; + try { + context = new Context(); + context.turnOffAuthorisationSystem(); + + if (StringUtils.isBlank(parentHandle)) { + process(context, itemService.findAll(context)); + } else { + DSpaceObject parent = handleService.resolveToObject(context, parentHandle); + if (parent != null) { + switch (parent.getType()) { + case Constants.COLLECTION: + process(context, itemService.findByCollection(context, (Collection) parent)); + break; + case Constants.COMMUNITY: + List collections = ((Community) parent).getCollections(); + for (Collection collection : collections) { + process(context, itemService.findAllByCollection(context, collection)); + } + break; + case Constants.SITE: + process(context, itemService.findAll(context)); + break; + case Constants.ITEM: + processItem(context, (Item) parent); + context.commit(); + break; + } + } + } + } catch (SQLException | AuthorizeException | IOException e) { + e.printStackTrace(System.err); + } finally { + if (context != null && context.isValid()) { + context.abort(); + } + } + } + + private static void process(Context context, Iterator items) throws SQLException, IOException, AuthorizeException { + while (items.hasNext()) { + Item item = items.next(); + processItem(context, item); + itemService.update(context, item); + } + } + + private static void processItem(Context context, Item item) throws SQLException, AuthorizeException, IOException { + // Some bitstreams like Infographics are large JPGs and put in the ORIGINAL bundle on purpose so we shouldn't + // swap them. + List itemTypes = itemService.getMetadataByMetadataString(item, "dc.type"); + boolean itemHasInfographic = false; + for (MetadataValue itemType: itemTypes) { + if (itemType.getValue().equals("Infographic")) { + itemHasInfographic = true; + } + } + + List thumbnailBundles = item.getBundles("THUMBNAIL"); + for (Bundle thumbnailBundle : thumbnailBundles) { + List thumbnailBundleBitstreams = thumbnailBundle.getBitstreams(); + for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) { + String thumbnailName = thumbnailBitstream.getName(); + + if (thumbnailName.toLowerCase().contains(".jpg.jpg")) { + List originalBundles = item.getBundles("ORIGINAL"); + for (Bundle originalBundle : originalBundles) { + List originalBundleBitstreams = originalBundle.getBitstreams(); + + for (Bitstream originalBitstream : originalBundleBitstreams) { + String originalName = originalBitstream.getName(); + + long originalBitstreamBytes = originalBitstream.getSize(); + + /* + - check if the original file name is the same as the thumbnail name minus the extra ".jpg" + - check if the thumbnail description indicates it was automatically generated + - check if the item has dc.type Infographic (JPG could be the "real" item!) + - check if the original bitstream is less than ~100KiB + - Note: in my tests there were 4022 items with ".jpg.jpg" thumbnails totaling 394549249 + bytes for an average of about 98KiB so ~100KiB seems like a good cut off + */ + if ( + originalName.equalsIgnoreCase(StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg")) + && ("Generated Thumbnail".equals(thumbnailBitstream.getDescription()) || "IM Thumbnail".equals(thumbnailBitstream.getDescription())) + && !itemHasInfographic + && originalBitstreamBytes < 100000 + ) { + System.out.println(item.getHandle() + ": replacing " + thumbnailName + " with " + originalName); + + //add the original bitstream to the THUMBNAIL bundle + bundleService.addBitstream(context, thumbnailBundle, originalBitstream); + //remove the original bitstream from the ORIGINAL bundle + originalBundle.removeBitstream(originalBitstream); + //remove the JpgJpg bitstream from the THUMBNAIL bundle + thumbnailBundle.removeBitstream(thumbnailBitstream); + } + } + } + } + } + } + } +}