Skip to content

Commit

Permalink
Fingerprint 1389 (#1417)
Browse files Browse the repository at this point in the history
* Implemented signature, #1389 for sync and exportdb

* Fix for linux

* Fix for linux

* Add test for #1389
  • Loading branch information
RhetTbull authored Feb 27, 2024
1 parent 0a01952 commit d113f3c
Show file tree
Hide file tree
Showing 9 changed files with 259 additions and 191 deletions.
132 changes: 2 additions & 130 deletions osxphotos/cli/import_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@
import datetime
import fnmatch
import json
import logging
import os
import os.path
import pathlib
import sqlite3
import sys
import uuid
from contextlib import suppress
from textwrap import dedent
from typing import TYPE_CHECKING, Callable, List, Optional, Tuple, Union
Expand Down Expand Up @@ -42,13 +40,11 @@
from osxphotos.cli.param_types import FunctionCall, StrpDateTimePattern, TemplateString
from osxphotos.datetime_utils import (
datetime_has_tz,
datetime_naive_to_local,
datetime_remove_tz,
datetime_tz_to_utc,
datetime_utc_to_local,
)
from osxphotos.exiftool import ExifToolCaching, get_exiftool_path
from osxphotos.fingerprint import fingerprint
from osxphotos.exiftool import get_exiftool_path
from osxphotos.fingerprintquery import FingerprintQuery
from osxphotos.metadata_reader import (
MetaData,
Expand All @@ -57,6 +53,7 @@
metadata_from_sidecar,
)
from osxphotos.photoinfo import PhotoInfoNone
from osxphotos.photoinfo_file import PhotoInfoFromFile
from osxphotos.photosalbum import PhotosAlbumPhotoScript
from osxphotos.phototemplate import PhotoTemplate, RenderOptions
from osxphotos.sqlitekvstore import SQLiteKVStore
Expand Down Expand Up @@ -128,131 +125,6 @@ def get_sidecar_file(
return sidecar_file


class PhotoInfoFromFile:
"""Mock PhotoInfo class for a file to be imported
Returns None for most attributes but allows some templates like exiftool and created to work correctly
"""

def __init__(
self,
filepath: Union[str, pathlib.Path],
exiftool: str | None = None,
sidecar: str | None = None,
):
self._path = str(filepath)
self._exiftool_path = exiftool or EXIFTOOL_PATH
self._uuid = str(uuid.uuid1()).upper()
self._sidecar = sidecar
if sidecar:
self._metadata = metadata_from_sidecar(pathlib.Path(sidecar), exiftool)
elif self._exiftool_path:
self._metadata = metadata_from_file(
pathlib.Path(filepath), self._exiftool_path
)
else:
self._metadata = MetaData(
title="", description="", keywords=[], location=(None, None)
)

@property
def uuid(self):
return self._uuid

@property
def original_filename(self):
return pathlib.Path(self._path).name

@property
def filename(self):
return pathlib.Path(self._path).name

@property
def date(self):
"""Use file creation date and local time zone if not exiftool or sidecar"""
if self._metadata.date:
if dt := self._metadata.date:
return datetime_naive_to_local(dt)

ctime = os.path.getctime(self._path)
dt = datetime.datetime.fromtimestamp(ctime)
return datetime_naive_to_local(dt)

@property
def path(self):
"""Path to photo file"""
return self._path

@property
def keywords(self) -> list[str]:
"""list of keywords for picture"""
return self._metadata.keywords

@property
def persons(self) -> list[str]:
"""list of persons in picture"""
return self._metadata.persons

@property
def title(self) -> str | None:
"""name / title of picture"""
return self._metadata.title

@property
def description(self) -> str | None:
"""description of picture"""
return self._metadata.description

@property
def exiftool(self):
"""Returns a ExifToolCaching (read-only instance of ExifTool) object for the photo.
Requires that exiftool (https://exiftool.org/) be installed
If exiftool not installed, logs warning and returns None
If photo path is missing, returns None
"""
try:
# return the memoized instance if it exists
return self._exiftool
except AttributeError:
try:
exiftool_path = self._exiftool_path or get_exiftool_path()
if self._path is not None and os.path.isfile(self._path):
exiftool = ExifToolCaching(self._path, exiftool=exiftool_path)
else:
exiftool = None
except FileNotFoundError:
# get_exiftool_path raises FileNotFoundError if exiftool not found
exiftool = None
logging.warning(
"exiftool not in path; download and install from https://exiftool.org/"
)

self._exiftool = exiftool
return self._exiftool

def render_template(
self, template_str: str, options: Optional[RenderOptions] = None
):
"""Renders a template string for PhotoInfo instance using PhotoTemplate
Args:
template_str: a template string with fields to render
options: a RenderOptions instance
Returns:
([rendered_strings], [unmatched]): tuple of list of rendered strings and list of unmatched template values
"""
options = options or RenderOptions(caller="import")
template = PhotoTemplate(self, exiftool_path=self._exiftool_path)
return template.render(template_str, options)

def __getattr__(self, name):
"""Return None for any other non-private attribute"""
if not name.startswith("_"):
return None
raise AttributeError()


def import_photo(
filepath: pathlib.Path, dup_check: bool, verbose: Callable[..., None]
) -> Tuple[Optional[Photo], str | None]:
Expand Down
10 changes: 3 additions & 7 deletions osxphotos/cli/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import click

from osxphotos import PhotoInfo, PhotosDB, __version__
from osxphotos.photo_signature import photo_signature
from osxphotos.photoinfo import PhotoInfoNone
from osxphotos.photoquery import (
IncompatibleQueryOptions,
Expand Down Expand Up @@ -142,11 +143,6 @@ def open_metadata_db(db_path: str):
return metadata_db


def key_from_photo(photo: PhotoInfo) -> str:
"""Return key for photo used to correlate photos between libraries"""
return f"{photo.fingerprint}:{photo.original_filename}"


def get_photo_metadata(photos: list[PhotoInfo]) -> str:
"""Return JSON string of metadata for photos; if more than one photo, merge metadata"""
if len(photos) == 1:
Expand Down Expand Up @@ -216,7 +212,7 @@ def export_metadata_to_db(
# as there is no way to know which photo is the "correct" one
key_to_photos = {}
for photo in photos:
key = key_from_photo(photo)
key = photo_signature(photo)
if key in key_to_photos:
key_to_photos[key].append(photo)
else:
Expand Down Expand Up @@ -278,7 +274,7 @@ def import_metadata(
# build mapping of key to photo
key_to_photo = {}
for photo in photos:
key = key_from_photo(photo)
key = photo_signature(photo)
if key in key_to_photo:
key_to_photo[key].append(photo)
else:
Expand Down
52 changes: 9 additions & 43 deletions osxphotos/export_db_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
""" Utility functions for working with export_db """


from __future__ import annotations

import contextlib
Expand All @@ -19,8 +18,9 @@
from ._constants import OSXPHOTOS_EXPORT_DB, SQLITE_CHECK_SAME_THREAD
from ._version import __version__
from .configoptions import ConfigOptions
from .export_db import OSXPHOTOS_EXPORTDB_VERSION, ExportDB
from .export_db import ExportDB
from .fileutil import FileUtil
from .photo_signature import photo_signature
from .photosdb import PhotosDB
from .utils import hexdigest, noop

Expand Down Expand Up @@ -239,7 +239,7 @@ def export_db_touch_files(
"""
export_dir = pathlib.Path(export_dir)

# open and close exportdb to ensure it gets migrated
# open and close exportdb to ensure it gets d
exportdb = ExportDB(dbfile, export_dir)
if upgraded := exportdb.was_upgraded:
verbose_(
Expand Down Expand Up @@ -340,38 +340,21 @@ def export_db_migrate_photos_library(

verbose(f"Loading data from Photos library {photos_library}")
photosdb = PhotosDB(dbfile=photos_library, verbose=verbose)
photosdb_fingerprint = {}
photosdb_signature = {}
photosdb_cloud_guid = {}
photosdb_name_size = {}
photosdb_shared = {}
for photo in photosdb.photos():
photosdb_fingerprint[
f"{photo.original_filename}:{photo.fingerprint}"
] = photo.uuid
photosdb_signature[photo_signature(photo)] = photo.uuid
photosdb_cloud_guid[
f"{photo.original_filename}:{photo.cloud_guid}"
] = photo.uuid
photosdb_name_size[
f"{photo.original_filename}:{photo.original_filesize}"
] = photo.uuid
if photo.shared:
photosdb_shared[_shared_photo_key(photo)] = photo.uuid
verbose("Matching photos in export database to photos in Photos library")
matched = 0
notmatched = 0
for uuid, photoinfo in exportdb_uuids.items():
if photoinfo.get("shared"):
key = _shared_photo_key(photoinfo)
if key in photosdb_shared:
new_uuid = photosdb_shared[key]
verbose(
f"[green]Matched by shared info[/green]: [uuid]{uuid}[/] -> [uuid]{new_uuid}[/]"
)
_export_db_update_uuid_info(
conn, uuid, new_uuid, photoinfo, photosdb, dry_run
)
matched += 1
continue
if cloud_guid := photoinfo.get("cloud_guid", None):
key = f"{photoinfo['original_filename']}:{cloud_guid}"
if key in photosdb_cloud_guid:
Expand All @@ -384,12 +367,11 @@ def export_db_migrate_photos_library(
)
matched += 1
continue
if fingerprint := photoinfo.get("fingerprint", None):
key = f"{photoinfo['original_filename']}:{fingerprint}"
if key in photosdb_fingerprint:
new_uuid = photosdb_fingerprint[key]
if signature := photo_signature(photoinfo):
if signature in photosdb_signature:
new_uuid = photosdb_signature[signature]
verbose(
f"[green]Matched by fingerprint[/green]: [uuid]{uuid}[/] -> [uuid]{new_uuid}[/]"
f"[green]Matched by signature[/green]: [uuid]{uuid}[/] -> [uuid]{new_uuid}[/]"
)
_export_db_update_uuid_info(
conn, uuid, new_uuid, photoinfo, photosdb, dry_run
Expand Down Expand Up @@ -420,22 +402,6 @@ def export_db_migrate_photos_library(
return (matched, notmatched)


def _shared_photo_key(photo: PhotoInfo | dict[str, Any]) -> str:
"""return a key for matching a shared photo between libraries"""
photoinfo = photo.asdict() if isinstance(photo, PhotoInfo) else photo
date = photoinfo.get("date")
if isinstance(date, datetime.datetime):
date = date.isoformat()
return (
f"{photoinfo.get('cloud_owner_hashed_id')}:"
f"{photoinfo.get('original_height')}:"
f"{photoinfo.get('original_width')}:"
f"{photoinfo.get('isphoto')}:"
f"{photoinfo.get('ismovie')}:"
f"{date}"
)


def _export_db_update_uuid_info(
conn: sqlite3.Connection,
uuid: str,
Expand Down
6 changes: 3 additions & 3 deletions osxphotos/fingerprintquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,14 @@ def photos_by_filename_size(
ZADDITIONALASSETATTRIBUTES.ZORIGINALFILENAME
FROM {asset_table}
JOIN ZADDITIONALASSETATTRIBUTES ON ZADDITIONALASSETATTRIBUTES.ZASSET = {asset_table}.Z_PK
WHERE ZADDITIONALASSETATTRIBUTES.ZORIGINALFILENAME = ?
AND ZADDITIONALASSETATTRIBUTES.ZORIGINALFILESIZE = ?
WHERE ZADDITIONALASSETATTRIBUTES.ZORIGINALFILESIZE = ?
AND LOWER(ZADDITIONALASSETATTRIBUTES.ZORIGINALFILENAME) = LOWER(?)
"""

if not in_trash:
sql += f"\nAND {asset_table}.ZTRASHEDSTATE = 0"

results = self.conn.execute(sql, (filename, size)).fetchall()
results = self.conn.execute(sql, (size, filename)).fetchall()
results = [
(row[0], photos_timestamp_to_datetime(row[1], row[2]), row[3])
for row in results
Expand Down
2 changes: 1 addition & 1 deletion osxphotos/iphoto.py
Original file line number Diff line number Diff line change
Expand Up @@ -1257,7 +1257,7 @@ def moment_info(self) -> iPhotoMomentInfo | None:

@cached_property
def fingerprint(self) -> str | None:
"""Returns fingerprint of original photo as a string; returns None if not available"""
"""Returns fingerprint of original photo as a string; returns None if not available. On linux, returns None."""
if not is_macos:
logger.warning("fingerprint only supported on macOS")
return None
Expand Down
52 changes: 52 additions & 0 deletions osxphotos/photo_signature.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Compute unique signature for photos"""

from __future__ import annotations

import datetime
import os

from .photoinfo import PhotoInfo
from .photoinfo_file import PhotoInfoFromDict, PhotoInfoFromFile
from .platform import is_macos

if is_macos:
from .fingerprint import fingerprint


def photo_signature(
photo: PhotoInfo | PhotoInfoFromFile | dict | str | os.PathLike,
exiftool: str | None = None,
) -> str:
"""Compute photo signature for a PhotoInfo, a PhotoInfo dict, or file path"""
if isinstance(photo, dict):
photo = PhotoInfoFromDict(photo)
elif not isinstance(photo, PhotoInfo):
photo = PhotoInfoFromFile(photo, exiftool=exiftool)

if photo.shared:
return _shared_photo_signature(photo)

if photo.fingerprint:
return f"{photo.original_filename.lower()}:{photo.fingerprint}"

if photo.path and is_macos:
return f"{photo.original_filename.lower()}:{fingerprint(photo.path)}"

return f"{photo.original_filename.lower()}:{photo.original_filesize}"


def _shared_photo_signature(
photo: PhotoInfo | PhotoInfoFromFile | PhotoInfoFromDict,
) -> str:
"""return a key for matching a shared photo between libraries"""
date = photo.date
if isinstance(date, datetime.datetime):
date = date.isoformat()
return (
f"{photo.cloud_owner_hashed_id}:"
f"{photo.original_height}:"
f"{photo.original_width}:"
f"{photo.isphoto}:"
f"{photo.ismovie}:"
f"{date}"
)
Loading

0 comments on commit d113f3c

Please sign in to comment.