Add initial version of generate.py

This commit is contained in:
Alan Orth 2024-02-25 16:17:01 +03:00
parent b75af10e1d
commit 2a33c8c5bd
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
1 changed files with 179 additions and 0 deletions

179
generate.py Executable file
View File

@ -0,0 +1,179 @@
#!/usr/bin/env python3
#
# SPDX-License-Identifier: GPL-3.0-only
import argparse
import logging
import os.path
from random import sample
import pandas as pd
import pyvips
import requests
from colorama import Fore
# Create a local logger instance
logger = logging.getLogger(__name__)
def create_thumbnail(animal):
image_filename = f"{animal['id']}.jpg"
image_path = f"images/{image_filename}"
thumbnail_filename = os.path.splitext(image_filename)[0] + "_thumb.vips"
thumbnail_path = f"images/{thumbnail_filename}"
# check if the file has been downloaded
if not os.path.isfile(image_path):
logger.error(
Fore.YELLOW + f"> Missing image for {animal['common_name']}." + Fore.RESET
)
# check if we already have a thumbnail
elif os.path.isfile(thumbnail_path):
if args.debug:
logger.debug(
Fore.YELLOW
+ f"> Thumbnail for {animal['common_name']} already exists."
+ Fore.RESET
)
else:
logger.info(
Fore.GREEN
+ f"> Creating thumbnail for {animal['common_name']}..."
+ Fore.RESET
)
vips_image = pyvips.Image.new_from_file(image_path)
# Crop to a 600x600 square using smartcrop to focus attention
# See: https://stackoverflow.com/questions/47852390/making-a-huge-image-mosaic-with-pyvips
vips_thumbnail = vips_image.thumbnail_image(
600, height=600, linear=True, crop=animal["vips_smartcrop"]
)
# Create a temporary image with text using Pango markup, which can use
# some HTML. This allows us to use consistent font sizes as opposed to
# pyvips.Image.text's width and height which are maximums in pixels.
# We also use RGBA to make the background transparent, which we need so
# we can see the rectangle we will draw next.
#
# See: https://docs.gtk.org/Pango/pango_markup.html
text = pyvips.Image.text(
f'<span foreground="white" size="48pt">{animal["common_name"]}</span>',
rgba=True,
)
# Draw a rectangle on top of our image with the same dimensions as our
# text and position it relative to the bottom using the height. The RGB
# values are ILRI red (#702D3E).
vips_thumbnail = vips_thumbnail.draw_rect(
[112, 45, 62], 0, 600 - text.height, text.width, text.height, fill=True
)
vips_thumbnail = vips_thumbnail.composite(
text, "over", x=0, y=600 - text.height
)
# Write to VIPS format for intermediate thumbnails so we don't do JPEG
# conversion twice.
vips_thumbnail.vipssave(thumbnail_path, strip=True)
return
def download_image(animal):
image_url = animal["image"]
image_filename = f"{animal['id']}.jpg"
image_path = f"images/{image_filename}"
if os.path.isfile(image_path):
if args.debug:
logger.debug(
Fore.YELLOW
+ f"> {animal['common_name']} already downloaded."
+ Fore.RESET
)
else:
logger.info(
Fore.GREEN + f"> Downloading {animal['common_name']}..." + Fore.RESET
)
headers = {
"User-Agent": "safari-bingo-cards-bot/0.1 (https://git.mjanja.ch/alanorth/safari-bingo-cards)"
}
response = requests.get(image_url, headers=headers, stream=True)
if response.status_code == 200:
with open(image_path, "wb") as fd:
for chunk in response:
fd.write(chunk)
else:
logger.error(
Fore.RED
+ f"> Download failed (HTTP {response.status_code}), I will try again next time."
+ Fore.RESET
)
return
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Download images and generate thumbnails from files in a CSV."
)
parser.add_argument(
"-a",
"--across",
help="Number of images across grid.",
type=int,
default=4,
)
parser.add_argument(
"-d",
"--debug",
help="Print debug messages to standard error (stderr).",
action="store_true",
)
parser.add_argument(
"-i",
"--csv-file",
help="Path to input file (CSV).",
required=True,
type=argparse.FileType("r", encoding="UTF-8"),
)
parser.add_argument(
"-o",
"--output-file",
help="Path to output file (JPEG).",
required=True,
)
args = parser.parse_args()
# The default log level is WARNING, but we want to set it to DEBUG or INFO
if args.debug:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)
# Set the global log format
logging.basicConfig(format="[%(levelname)s] %(message)s")
# Open the CSV
animals_df = pd.read_csv(args.csv_file)
# Get a random list of animal IDs. We will build a square grid using the
# "across" parameter passed by the user.
logger.info(f"Generating random sample of {animals_df.shape[0]} animals...")
animal_ids = animals_df["id"].sample(args.across * args.across)
# Slice those random animals from the dataframe
animals_df_random = animals_df[animals_df["id"].isin(animal_ids)]
# Apparently iterating over dataframes is bad practice so I will use apply
# over the dataframe's columns (axis=1) instead. In pandas, apply should
# be a bit faster than iterrows.
animals_df_random.apply(download_image, axis=1)
animals_df_random.apply(create_thumbnail, axis=1)
thumbnails = []
for animal_id in animal_ids:
thumbnails.append(pyvips.Image.new_from_file(f"images/{animal_id}_thumb.vips"))
# Join all thumbnails together in an array of x across with a padding of 2
# pixels between each square.
joined = pyvips.Image.arrayjoin(thumbnails, across=args.across, shim=2)
joined.jpegsave(args.output_file, optimize_coding=True, strip=True)
logger.info(f"Wrote {args.output_file}")