Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OcrdFile.local_filename returns str, some typing #1182

Merged
merged 1 commit into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/ocrd/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import sys
import tarfile
import io
from ocrd.workspace import Workspace

from ocrd_utils import (
VERSION as OCRD_VERSION,
Expand Down Expand Up @@ -49,7 +50,7 @@ class Processor():

def __init__(
self,
workspace,
workspace : Workspace,
ocrd_tool=None,
parameter=None,
# TODO OCR-D/core#274
Expand Down Expand Up @@ -163,7 +164,7 @@ def verify(self):
"""
return True

def process(self):
def process(self) -> None:
"""
Process the :py:attr:`workspace`
from the given :py:attr:`input_file_grp`
Expand Down
2 changes: 1 addition & 1 deletion src/ocrd/processor/builtin/dummy_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class DummyProcessor(Processor):
Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group
"""

def process(self):
def process(self) -> None:
LOG = getLogger('ocrd.dummy')
assert_file_grp_cardinality(self.input_file_grp, 1)
assert_file_grp_cardinality(self.output_file_grp, 1)
Expand Down
21 changes: 17 additions & 4 deletions src/ocrd/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from re import sub
from tempfile import NamedTemporaryFile
from contextlib import contextmanager
from typing import Optional, Union

from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, cvtColor
from PIL import Image
Expand All @@ -13,6 +14,7 @@
import requests

from ocrd_models import OcrdMets, OcrdFile
from ocrd_models.ocrd_file import ClientSideOcrdFile
from ocrd_models.ocrd_page import parse, BorderType, to_xml
from ocrd_modelfactory import exif_from_filename, page_from_file
from ocrd_utils import (
Expand Down Expand Up @@ -71,7 +73,16 @@ class Workspace():
baseurl (string) : Base URL to prefix to relative URL.
"""

def __init__(self, resolver, directory, mets=None, mets_basename=DEFAULT_METS_BASENAME, automatic_backup=False, baseurl=None, mets_server_url=None):
def __init__(
self,
resolver,
directory,
mets : Optional[Union[OcrdMets, ClientSideOcrdMets]] = None,
mets_basename=DEFAULT_METS_BASENAME,
automatic_backup=False,
baseurl=None,
mets_server_url=None
):
self.resolver = resolver
self.directory = directory
self.mets_target = str(Path(directory, mets_basename))
Expand Down Expand Up @@ -328,14 +339,16 @@ def rename_file_group(self, old, new):
local_filename_replacements = {}
log.info("Moving files")
for mets_file in self.mets.find_files(fileGrp=old, local_only=True):
new_local_filename = old_local_filename = str(mets_file.local_filename)
new_local_filename = old_local_filename = mets_file.local_filename
assert new_local_filename
assert old_local_filename
# Directory part
new_local_filename = sub(r'^%s/' % old, r'%s/' % new, new_local_filename)
# File part
new_local_filename = sub(r'/%s' % old, r'/%s' % new, new_local_filename)
local_filename_replacements[str(mets_file.local_filename)] = new_local_filename
# move file from ``old`` to ``new``
mets_file.local_filename.rename(new_local_filename)
Path(old_local_filename).rename(new_local_filename)
# change the url of ``mets:file``
mets_file.local_filename = new_local_filename
# change the file ID and update structMap
Expand Down Expand Up @@ -375,7 +388,7 @@ def rename_file_group(self, old, new):

@deprecated_alias(pageId="page_id")
@deprecated_alias(ID="file_id")
def add_file(self, file_grp, content=None, **kwargs):
def add_file(self, file_grp, content=None, **kwargs) -> Union[OcrdFile, ClientSideOcrdFile]:
"""
Add a file to the :py:class:`ocrd_models.ocrd_mets.OcrdMets` of the workspace.

Expand Down
3 changes: 2 additions & 1 deletion src/ocrd_modelfactory/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""
from datetime import datetime
from pathlib import Path
from typing import Tuple, Union
from yaml import safe_load, safe_dump

from PIL import Image
Expand Down Expand Up @@ -78,7 +79,7 @@ def page_from_image(input_file, with_tree=False):
revmap = dict(((node, element) for element, node in mapping.items()))
return pcgts, etree, mapping, revmap

def page_from_file(input_file, with_tree=False):
def page_from_file(input_file, with_tree=False) -> Union[PcGtsType, Tuple[PcGtsType, ET.Element, dict, dict]]:
"""
Create :py:class:`~ocrd_models.ocrd_page.OcrdPage`
from an :py:class:`~ocrd_models.ocrd_file.OcrdFile` or a file path
Expand Down
73 changes: 43 additions & 30 deletions src/ocrd_models/ocrd_file.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""
API to ``mets:file``
"""
from os.path import splitext, basename
from pathlib import Path
from typing import Any, List, Optional, Union

from ocrd_utils import deprecation_warning

from .ocrd_xml_base import ET
from .constants import NAMESPACES as NS, TAG_METS_FLOCAT, TAG_METS_FILE
from .ocrd_xml_base import ET # type: ignore
from .constants import NAMESPACES as NS, TAG_METS_FLOCAT

class OcrdFile():
"""
Expand All @@ -22,9 +22,8 @@ def __init__(self, el, mimetype=None, pageId=None, local_filename=None, mets=Non
mets (OcrdMets): Containing :py:class:`ocrd_models.ocrd_mets.OcrdMets`.
mimetype (string): ``@MIMETYPE`` of this ``mets:file``
pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
local_filename (string): Local filename
url (string): original ``@xlink:href`` of this ``mets:file``
local_filename (Path): ``@xlink:href`` pointing to the locally cached version of the file in the workspace
local_filename (string): ``@xlink:href`` pointing to the locally cached version of the file in the workspace
ID (string): ``@ID`` of this ``mets:file``
loctype (string): DEPRECATED do not use
"""
Expand All @@ -39,7 +38,7 @@ def __init__(self, el, mimetype=None, pageId=None, local_filename=None, mets=Non
self.pageId = pageId

if local_filename:
self.local_filename = Path(local_filename)
self.local_filename = local_filename
if url:
self.url = url

Expand Down Expand Up @@ -70,38 +69,38 @@ def __eq__(self, other):
# self.fileGrp == other.fileGrp

@property
def basename(self):
def basename(self) -> str:
"""
Get the ``.name`` of the local file
"""
if not self.local_filename:
return
return self.local_filename.name
return ''
return Path(self.local_filename).name

@property
def extension(self):
def extension(self) -> str:
if not self.local_filename:
return
return ''.join(self.local_filename.suffixes)
return ''
return ''.join(Path(self.local_filename).suffixes)

@property
def basename_without_extension(self):
def basename_without_extension(self) -> str:
"""
Get the ``os.path.basename`` of the local file, if any, with extension removed.
"""
if not self.local_filename:
return
return self.local_filename.name[:-len(self.extension)]
return ''
return Path(self.local_filename).name[:-len(self.extension)]

@property
def ID(self):
def ID(self) -> str:
"""
Get the ``@ID`` of the ``mets:file``.
"""
return self._el.get('ID')

@ID.setter
def ID(self, ID):
def ID(self, ID : Optional[str]) -> None:
"""
Set the ``@ID`` of the ``mets:file`` to :py:attr:`ID`.
"""
Expand All @@ -116,7 +115,7 @@ def ID(self, ID):
self.pageId = pageId

@property
def pageId(self):
def pageId(self) -> str:
"""
Get the ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file`` (physical page manifestation).
"""
Expand All @@ -125,7 +124,7 @@ def pageId(self):
return self.mets.get_physical_page_for_file(self)

@pageId.setter
def pageId(self, pageId):
def pageId(self, pageId : Optional[str]) -> None:
"""
Get the ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file`` (physical page manifestation) to :py:attr:`pageId`.
"""
Expand All @@ -136,21 +135,21 @@ def pageId(self, pageId):
self.mets.set_physical_page_for_file(pageId, self)

@property
def loctypes(self):
def loctypes(self) -> List[str]:
"""
Get the ``@LOCTYPE``s of the ``mets:file``.
"""
return [x.get('LOCTYPE') for x in self._el.findall('mets:FLocat', NS)]

@property
def mimetype(self):
def mimetype(self) -> str:
"""
Get the ``@MIMETYPE`` of the ``mets:file``.
"""
return self._el.get('MIMETYPE')

@mimetype.setter
def mimetype(self, mimetype):
def mimetype(self, mimetype : Optional[str]) -> None:
"""
Set the ``@MIMETYPE`` of the ``mets:file`` to :py:attr:`mimetype`.
"""
Expand All @@ -159,7 +158,7 @@ def mimetype(self, mimetype):
self._el.set('MIMETYPE', mimetype)

@property
def fileGrp(self):
def fileGrp(self) -> str:
"""
The ``@USE`` of the containing ``mets:fileGrp``
"""
Expand All @@ -169,7 +168,7 @@ def fileGrp(self):
raise ValueError("OcrdFile not related to METS")

@property
def url(self):
def url(self) -> str:
"""
Get the remote/original URL ``@xlink:href`` of this ``mets:file``.
"""
Expand All @@ -179,7 +178,7 @@ def url(self):
return ''

@url.setter
def url(self, url):
def url(self, url : Optional[str]) -> None:
"""
Set the remote/original URL ``@xlink:href`` of this ``mets:file`` to :py:attr:`url`.
"""
Expand All @@ -194,16 +193,17 @@ def url(self, url):
el_FLocat.set("LOCTYPE", "URL")

@property
def local_filename(self):
def local_filename(self) -> Optional[str]:
"""
Get the local/cached ``@xlink:href`` of this ``mets:file``.
"""
el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="OTHER"][@OTHERLOCTYPE="FILE"]', NS)
if el_FLocat is not None:
return Path(el_FLocat.get("{%s}href" % NS["xlink"]))
return el_FLocat.get("{%s}href" % NS["xlink"])
return None

@local_filename.setter
def local_filename(self, fname):
def local_filename(self, fname : Optional[Union[Path, str]]):
"""
Set the local/cached ``@xlink:href`` of this ``mets:file`` to :py:attr:`local_filename`.
"""
Expand All @@ -212,9 +212,11 @@ def local_filename(self, fname):
if el_FLocat is not None:
self._el.remove(el_FLocat)
return
else:
fname = str(fname)
if el_FLocat is None:
el_FLocat = ET.SubElement(self._el, TAG_METS_FLOCAT)
el_FLocat.set("{%s}href" % NS["xlink"], str(fname))
el_FLocat.set("{%s}href" % NS["xlink"], fname)
el_FLocat.set("LOCTYPE", "OTHER")
el_FLocat.set("OTHERLOCTYPE", "FILE")

Expand All @@ -226,7 +228,18 @@ class ClientSideOcrdFile:
this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
"""

def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None, fileGrp=None):
def __init__(
self,
el,
mimetype: str = '',
pageId: str = '',
loctype: str ='OTHER',
local_filename: Optional[str] = None,
mets : Any = None,
url: str = '',
ID: str = '',
fileGrp: str = ''
):
"""
Args:
el (): ignored
Expand Down
13 changes: 7 additions & 6 deletions src/ocrd_models/ocrd_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from datetime import datetime
import re
import typing
from lxml import etree as ET
from typing_extensions import Optional
from lxml import etree as ET # type: ignore
from copy import deepcopy
from warnings import warn

Expand Down Expand Up @@ -34,7 +35,7 @@
METS_XML_EMPTY,
)

from .ocrd_xml_base import OcrdXmlDocument, ET
from .ocrd_xml_base import OcrdXmlDocument
from .ocrd_file import OcrdFile
from .ocrd_agent import OcrdAgent

Expand Down Expand Up @@ -134,9 +135,9 @@ def _clear_caches(self):
Deallocates the caches
"""

self._file_cache = None
self._page_cache = None
self._fptr_cache = None
self._file_cache = {}
self._page_cache = {}
self._fptr_cache = {}

def refresh_caches(self):
if self._cache_flag:
Expand Down Expand Up @@ -243,7 +244,7 @@ def find_files(
pageId=None,
mimetype=None,
url=None,
local_filename=None,
local_filename : Optional[str] = None,
local_only=False,
include_fileGrp=None,
exclude_fileGrp=None,
Expand Down
2 changes: 1 addition & 1 deletion src/ocrd_models/ocrd_xml_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Base class for XML documents loaded from either content or filename.
"""
from os.path import exists
from lxml import etree as ET
from lxml import etree as ET # type: ignore

from .constants import NAMESPACES
from .utils import xmllint_format
Expand Down
Loading
Loading