diff --git a/src/main/java/io/github/ilri/cgspace/scripts/FixJpgJpgThumbnails.java b/src/main/java/io/github/ilri/cgspace/scripts/FixJpgJpgThumbnails.java index 0d09f45..8bc43f5 100644 --- a/src/main/java/io/github/ilri/cgspace/scripts/FixJpgJpgThumbnails.java +++ b/src/main/java/io/github/ilri/cgspace/scripts/FixJpgJpgThumbnails.java @@ -13,8 +13,8 @@ import java.sql.SQLException; /** * @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories * @author Alan Orth for the International Livestock Research Institute - * @version 5.1-SNAPSHOT - * @since 5.1-SNAPSHOT + * @version 5.3 + * @since 5.1 */ public class FixJpgJpgThumbnails { @@ -73,6 +73,16 @@ public class FixJpgJpgThumbnails { } private static void processItem(Item item) throws SQLException, AuthorizeException, IOException { + // Some bitstreams like Infographics are large JPGs and put in the ORIGINAL bundle on purpose so we shouldn't + // swap them. + Metadatum[] itemTypes = item.getMetadataByMetadataString("dc.type"); + Boolean itemHasInfographic = false; + for (Metadatum itemType: itemTypes) { + if (itemType.value.equals("Infographic")) { + itemHasInfographic = true; + } + } + Bundle[] thumbnailBundles = item.getBundles("THUMBNAIL"); for (Bundle thumbnailBundle : thumbnailBundles) { Bitstream[] thumbnailBundleBitstreams = thumbnailBundle.getBitstreams(); @@ -84,11 +94,25 @@ public class FixJpgJpgThumbnails { for (Bundle originalBundle : originalBundles) { Bitstream[] originalBundleBitstreams = originalBundle.getBitstreams(); - for(Bitstream originalBitstream : originalBundleBitstreams) { + for (Bitstream originalBitstream : originalBundleBitstreams) { String originalName = originalBitstream.getName(); - //check if the original file name is the same as the thumbnail name minus the extra ".jpg" - if (originalName.equalsIgnoreCase(StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg")) && ("Generated Thumbnail".equals(thumbnailBitstream.getDescription()) || "IM Thumbnail".equals(thumbnailBitstream.getDescription()))) { + Long originalBitstreamBytes = originalBitstream.getSize(); + + /* + - check if the original file name is the same as the thumbnail name minus the extra ".jpg" + - check if the thumbnail description indicates it was automatically generated + - check if the item has dc.type Infographic (JPG could be the "real" item!) + - check if the original bitstream is less than ~100KiB + - Note: in my tests there were 4022 items with ".jpg.jpg" thumbnails totaling 394549249 + bytes for an average of about 98KiB so ~100KiB seems like a good cut off + */ + if ( + originalName.equalsIgnoreCase(StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg")) + && ("Generated Thumbnail".equals(thumbnailBitstream.getDescription()) || "IM Thumbnail".equals(thumbnailBitstream.getDescription())) + && !itemHasInfographic + && originalBitstreamBytes < 100000 + ) { System.out.println(item.getHandle() + ": replacing " + thumbnailName + " with " + originalName); //add the original bitstream to the THUMBNAIL bundle