diff --git a/src/main/java/io/github/ilri/cgspace/scripts/FixLowQualityThumbnails.java b/src/main/java/io/github/ilri/cgspace/scripts/FixLowQualityThumbnails.java new file mode 100644 index 0000000..f46af73 --- /dev/null +++ b/src/main/java/io/github/ilri/cgspace/scripts/FixLowQualityThumbnails.java @@ -0,0 +1,252 @@ +package io.github.ilri.cgspace.scripts; + +import org.apache.commons.lang.StringUtils; +import org.dspace.authorize.AuthorizeException; +import org.dspace.content.*; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.BundleService; +import org.dspace.content.service.ItemService; +import org.dspace.core.Constants; +import org.dspace.core.Context; +import org.dspace.handle.factory.HandleServiceFactory; +import org.dspace.handle.service.HandleService; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Iterator; +import java.util.List; + +/** + * Fix low-quality thumbnails in a DSpace repository. + * + *

Search the DSpace repository for items containing bitstreams matching the following criteria: + * + *

+ * + *

The general idea is that we should always prefer thumbnails generated from PDFs by ImageMagick + * to manually uploaded JPEGs because ImageMagick Thumbnails can be regenerated with higher quality, + * resolution, etc. Furthermore, if there are JPEG bitstreams in the ORIGINAL bundle DSpace will + * automatically create ".jpg.jpg" thumbnails from them in the THUMBNAIL bundle so we should remove + * those as well! + * + * @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories + * @author Alan Orth for the International Livestock Research Institute + * @version 6.1 + * @since 6.1 + * @see FixJpgJpgThumbnails + */ +public class FixLowQualityThumbnails { + // note: static members belong to the class itself, not any one instance + public static ItemService itemService = ContentServiceFactory.getInstance().getItemService(); + public static HandleService handleService = + HandleServiceFactory.getInstance().getHandleService(); + public static BundleService bundleService = + ContentServiceFactory.getInstance().getBundleService(); + + public static void main(String[] args) { + String parentHandle = null; + if (args.length >= 1) { + parentHandle = args[0]; + } + + Context context = null; + try { + context = new Context(); + context.turnOffAuthorisationSystem(); + + if (StringUtils.isBlank(parentHandle)) { + process(context, itemService.findAll(context)); + } else { + DSpaceObject parent = handleService.resolveToObject(context, parentHandle); + if (parent != null) { + switch (parent.getType()) { + case Constants.COLLECTION: + process( + context, + itemService.findByCollection(context, (Collection) parent)); + break; + case Constants.COMMUNITY: + List collections = ((Community) parent).getCollections(); + for (Collection collection : collections) { + process( + context, + itemService.findAllByCollection(context, collection)); + } + break; + case Constants.SITE: + process(context, itemService.findAll(context)); + break; + case Constants.ITEM: + processItem(context, (Item) parent); + context.commit(); + break; + } + } + } + } catch (SQLException | AuthorizeException | IOException e) { + e.printStackTrace(System.err); + } finally { + if (context != null && context.isValid()) { + context.abort(); + } + } + } + + private static void process(Context context, Iterator items) + throws SQLException, IOException, AuthorizeException { + while (items.hasNext()) { + Item item = items.next(); + processItem(context, item); + itemService.update(context, item); + } + } + + private static void processItem(Context context, Item item) + throws SQLException, AuthorizeException, IOException { + // Set some state for the item before we iterate over the THUMBNAIL bundle + boolean itemHasImThumbnail = false; + + // Iterate over the THUMBNAIL bundle to first identify if this item has an "IM Thumbnail" + List thumbnailBundles = item.getBundles("THUMBNAIL"); + for (Bundle thumbnailBundle : thumbnailBundles) { + List thumbnailBundleBitstreams = thumbnailBundle.getBitstreams(); + for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) { + String thumbnailDescription = thumbnailBitstream.getDescription(); + + // Check if this item has a bitstream in the THUMBNAIL bundle with description "IM + // Thumbnail", but only if we haven't already seen one in another iteration for this + // bundle. + if (!itemHasImThumbnail && "IM Thumbnail".equals(thumbnailDescription)) { + itemHasImThumbnail = true; + } + } + + // If this item has an IM Thumbnail we can be reasonably sure that there is a PDF + // in the ORIGINAL bundle and we don't need any other thumbnails. + if (itemHasImThumbnail) { + // Iterate over the bitstreams in the THUMBNAIL bundle again. + for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) { + String thumbnailName = thumbnailBitstream.getName(); + String thumbnailDescription = thumbnailBitstream.getDescription(); + + // If this item has a "Generated Thumbnail" we can remove it, because those + // typically come from other JPEGs in the ORIGINAL bundle and we would prefer + // the IM Thumbnail generated from a PDF anyway. The DSpace-generated descri- + // ption will *always* be "Generated Thumbnail". + if ("Generated Thumbnail".equals(thumbnailDescription)) { + System.out.print("\u001b[33m"); + System.out.println("Deleting (" + item.getHandle() + "):"); + System.out.println("> Name: »" + thumbnailName + "«"); + System.out.println("> Description: »" + thumbnailDescription + "«"); + System.out.print("\u001b[0m"); + + // Remove the "Generated Thumbnail" bitstream from the THUMBNAIL bundle + thumbnailBundle.removeBitstream(thumbnailBitstream); + + // If this item has a bitstream with the word "thumbnail" in it then we can + // remove it because we already know this item has an IM Thumbnail and we + // prefer that one. + } else if (thumbnailDescription.toLowerCase().contains("thumbnail") + && !"IM Thumbnail".equals(thumbnailDescription)) { + System.out.print("\u001b[33m"); + System.out.println("Deleting (" + item.getHandle() + "):"); + System.out.println("> Name: »" + thumbnailName + "«"); + System.out.println("> Description: »" + thumbnailDescription + "«"); + System.out.print("\u001b[0m"); + + // Remove the "thumbnail" bitstream from the THUMBNAIL bundle + thumbnailBundle.removeBitstream(thumbnailBitstream); + + // Otherwise skip it because it might be something uploaded manually, like + // a thumbnail for a journal or a limited access item. + } else { + System.out.print("\u001b[34m"); + System.out.println("Skipping (" + item.getHandle() + "):"); + System.out.println("> Name: »" + thumbnailName + "«"); + System.out.println("> Description: »" + thumbnailDescription + "«"); + System.out.print("\u001b[0m"); + } + + // Print a blank line + System.out.println(); + } + } + } + + // Set some state before we iterate over the ORIGINAL bundle + boolean itemHasOriginalPdfBitstream = false; + boolean itemHasOriginalJpegBitstream = false; + + // Iterate over the ORIGINAL bundle to delete manually-uploaded JPEG + // bitstreams labeled "Thumbnail" whenever we have a PDF because they + // don't belong in the ORIGINAL bundle and DSpace will automatically + // create a better thumbnail from the PDF anyway. + List originalBundles = item.getBundles("ORIGINAL"); + for (Bundle originalBundle : originalBundles) { + List originalBundleBitstreams = originalBundle.getBitstreams(); + for (Bitstream originalBitstream : originalBundleBitstreams) { + String originalFormat = originalBitstream.getFormat(context).getMIMEType(); + + // Check if this item has a PDF bitstream in the ORIGINAL bundle, + // but only if we haven't already seen one in another iteration + // for this bundle. DSpace will return "format application/pdf" + // for the MIME type. + if (!itemHasOriginalPdfBitstream && originalFormat.contains("application/pdf")) { + itemHasOriginalPdfBitstream = true; + } + + // Check if this item has a JPEG bitstream in the ORIGINAL bundle, + // but only if we haven't already seen one in another iteration + // for this bundle. DSpace will return "format image/jpeg" for + // the MIME type. + if (!itemHasOriginalJpegBitstream && originalFormat.contains("image/jpeg")) { + itemHasOriginalJpegBitstream = true; + } + } + + // Check if we found a PDF *and* a JPEG in this item's ORIGINAL + // bundle. + if (itemHasOriginalPdfBitstream && itemHasOriginalJpegBitstream) { + // Yes! Now iterate over the bitstreams in the ORIGINAL bundle + // again to see if the JPEG is a manually uploaded "Thumbnail" + for (Bitstream originalBitstream : originalBundleBitstreams) { + String originalName = originalBitstream.getName(); + String originalDescription = originalBitstream.getDescription(); + String originalFormat = originalBitstream.getFormat(context).getMIMEType(); + + /* + - check if the bitstream is a JPEG based on its MIME Type + - check if the bitstream's name or description is "Thumbnail" + */ + if (originalFormat.toLowerCase().contains("image/jpeg") + && (originalName.toLowerCase().contains("thumbnail") + || originalDescription.toLowerCase().contains("thumbnail"))) { + System.out.print("\u001b[33m"); + System.out.println("Removing (" + item.getHandle() + "):"); + System.out.println("> Name: »" + originalName + "«"); + System.out.println("> Description: »" + originalDescription + "«"); + System.out.print("\u001b[0m"); + + // Remove the original bitstream from the ORIGINAL bundle + originalBundle.removeBitstream(originalBitstream); + + } else { + System.out.print("\u001b[34m"); + System.out.println("Skipping (" + item.getHandle() + "):"); + System.out.println("> Name: »" + originalName + "«"); + System.out.println("> Description: »" + originalDescription + "«"); + System.out.print("\u001b[0m"); + } + + // Print a blank line + System.out.println(); + } + } + } + } +}