Skip to content

Commit

Permalink
Speed up the initial build when the image cache is cold
Browse files Browse the repository at this point in the history
This takes the cold build from ~50s to ~7s.
  • Loading branch information
alexwlchan committed Apr 7, 2024
1 parent 827087e commit 7c0c1a1
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 28 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
.jekyll-metadata
.sass-cache
_site
*.pyc

.DS_Store
44 changes: 44 additions & 0 deletions src/_plugins/pillow/get_all_image_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python3
"""
Get the information about every image it can find.
"""

import json
import pathlib

from get_image_info import get_info


def get_file_paths_under(root=".", *, suffix=""):
"""
Generates the absolute paths to every matching file under ``root``.
"""
root = pathlib.Path(root)

if root.exists() and not root.is_dir():
raise ValueError(f"Cannot find files under file: {root!r}")

if not root.is_dir():
raise FileNotFoundError(root)

for dirpath, _, filenames in root.walk():
for f in filenames:
if f == ".DS_Store":
continue

if f.endswith((".svg", ".mov")):
continue

p = dirpath / f

if p.is_file() and f.lower().endswith(suffix):
yield p


if __name__ == "__main__":
result = {}

for p in get_file_paths_under("src/_images"):
result[str(p)] = get_info(p)

print(json.dumps(result))
24 changes: 12 additions & 12 deletions src/_plugins/pillow/get_image_info.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
#!/usr/bin/env python3
"""
Get the information about one or more images.
Get the information about an images.
This takes one argument, which is a JSON-formatted list of image paths.
It prints a JSON object whose keys are paths and values are image info.
This takes a path to an image, prints the output as JSON.
Example:
$ python3 convert_image.py '["cat.jpg", "dog.png"]'
{
"cat.jpg": {"width": 400, "height": 300, "format": "JPEG"},
"dog.png": {"width": 1600, "height": 900, "format": "PNG"}
}
$ python3 convert_image.py cat.jpg
{"width": 400, "height": 300, "format": "JPEG"}
"""

import json
import os
import sys

from PIL import Image


def get_info(path):
im = Image.open(path)
return {"width": im.width, "height": im.height, "format": im.format}
return {
"width": im.width,
"height": im.height,
"format": im.format,
"mtime": int(os.path.getmtime(path)),
}


if __name__ == "__main__":
result = {path: get_info(path) for path in json.loads(sys.argv[1])}

print(json.dumps(result))
print(json.dumps(get_info(sys.argv[1])))
49 changes: 39 additions & 10 deletions src/_plugins/pillow/get_image_info.rb
Original file line number Diff line number Diff line change
@@ -1,19 +1,48 @@
# Use Python to get some basic info about an image, including the
# dimensions and format.

require 'json'
require 'open3'

def get_image_info(image_paths)
stdout, status = Open3.capture2('python3', 'src/_plugins/pillow/get_image_info.py', JSON.generate(image_paths))
raise "Unable to get info for images #{image_paths}" unless status.success?

JSON.parse(stdout)
end

def get_single_image_info(path)
cache = Jekyll::Cache.new('ImageInfo')
mtime = File.mtime(path)

# How this works:
#
# - When we have an empty cache, we get the info for every image
# we can find in `_src/images` and warm the cache.
#
# - If the image isn't in the cache (e.g. it was just added) or the
# cache entry is invalid (the image has changed), we get the info
# for just that image.
#
# We want to minimise the calls to Python, because each one involves
# a minor performance penalty we'd rather avoid.
#
# Jekyll will clear the cache every time `_config.yml` changes, or you
# change your options (e.g. adding the `--profile` flag), so we warm
# the cache to speed up that initial build.

unless cache.key?('-1')
puts 'Image info cache is empty, rebuilding...'

stdout, status = Open3.capture2('python3', 'src/_plugins/pillow/get_all_image_info.py')

if status.success?
JSON.parse(stdout).each do |path, info|
cache["#{path}--#{info['mtime']}"] = info
end

cache['-1'] = -1
end
end

mtime = File.mtime(path).to_i

cache.getset("#{path}--#{mtime}") do
info = get_image_info([path])
info[path]
stdout, status = Open3.capture2('python3', 'src/_plugins/pillow/get_image_info.py', path)
raise "Unable to get info for images #{image_paths}" unless status.success?

JSON.parse(stdout)
end
end
6 changes: 0 additions & 6 deletions src/_tests/test_pillow.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,8 @@
require 'test/unit'

require_relative '../_plugins/pillow/convert_image'
require_relative '../_plugins/pillow/get_image_info'

class TestPillow < Test::Unit::TestCase
def test_get_image_info
output = get_image_info(['src/_tests/images/gradient.png'])
assert_equal(output, { 'src/_tests/images/gradient.png' => { 'width' => 250, 'height' => 250, 'format' => 'PNG' } })
end

def test_convert_image
Dir.mktmpdir do |d|
convert_image({
Expand Down

0 comments on commit 7c0c1a1

Please sign in to comment.