Skip to content

Commit

Permalink
Merge pull request #268 from clamsproject/develop
Browse files Browse the repository at this point in the history
releasing 1.0.11
  • Loading branch information
keighrim authored Mar 31, 2024
2 parents 1693cbc + 187968c commit 1803922
Show file tree
Hide file tree
Showing 21 changed files with 539 additions and 60 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
include ./VERSION
include ./requirements.txt
include ./requirements.cv
include ./requirements.seq

9 changes: 5 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@ $(generatedcode): dist/$(sdistname)*.tar.gz
docs: latest := $(shell git tag | sort -r | head -n 1)
docs: VERSION $(generatedcode)
rm -rf docs
# sphinx-multiversion documentation docs -b html -a -vvv
sphinx-build documentation docs -b html -a
pip install --upgrade -r requirements.txt
pip install --upgrade -r requirements.old
sphinx-multiversion documentation docs -b html -a -vvv
touch docs/.nojekyll
# ln -sf $(latest) docs/latest
# echo "<!DOCTYPE html> <html> <head> <title>Redirect to latest version</title> <meta charset=\"utf-8\"> <meta http-equiv=\"refresh\" content=\"0; url=./latest/index.html\"> </head> </html>" > docs/index.html
ln -sf $(latest) docs/latest
echo "<!DOCTYPE html> <html> <head> <title>Redirect to latest version</title> <meta charset=\"utf-8\"> <meta http-equiv=\"refresh\" content=\"0; url=./latest/index.html\"> </head> </html>" > docs/index.html

doc: VERSION $(generatedcode) # for single version sphinx - only use when developing
rm -rf docs
Expand Down
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
## MultiMedia Interchange Format
[MMIF](https://mmif.clams.ai) is a JSON(-LD)-based data format designed for transferring annotation data between computational analysis applications in [CLAMS project](https://clams.ai).


## mmif-python
`mmif-python` is a Python implementation of the MMIF data format.
`mmif-python` provides various helper classes and functions to handle MMIF JSON in Python,
including ;

1. de-/serialization of MMIF internal data structures to/from JSON
2. validation of MMIF JSON
3. handling of CLAMS vocabulary types
4. navigation of MMIF object via various "search" methods (e.g. `mmif.get_all_views_contain(vocab_type))`)

## For more ...
* [Version history and patch notes](https://github.com/clamsproject/mmif-python/blob/main/CHANGELOG.md)
* [MMIF Python API documentation](https://clamsproject.github.io/mmif-python)
* [MMIF JSON specification and schema](https://clamsproject.github.io/mmif)
8 changes: 8 additions & 0 deletions documentation/autodoc/mmif.utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,11 @@ mmif.utils.video_document_helper module
:members:
:undoc-members:
:show-inheritance:

mmif.utils.sequence_helper module
---------------------------------

.. automodule:: mmif.utils.sequence_helper
:members:
:undoc-members:
:show-inheritance:
2 changes: 1 addition & 1 deletion documentation/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def linkcode_resolve(domain, info):

# configuration for multiversion extension
# Whitelist pattern for tags (set to None to ignore all tags)
smv_tag_whitelist = r'^[0-9]+\.[0-9]+\.[0-9]+.*$'
smv_tag_whitelist = r'^[0-9]+\.[0-9]+\.[0-9]+.*$'
# Whitelist pattern for branches (set to None to ignore all branches)
smv_branch_whitelist = None
# Whitelist pattern for remotes (set to None to use local branches only)
Expand Down
1 change: 1 addition & 0 deletions documentation/target-versions.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"``mmif-python`` version","Target MMIF Specification"
1.0.10.dev1,"1.0.3"
1.0.10,"1.0.2"
1.0.9,"1.1.0"
1.0.8,"1.0.0"
Expand Down
18 changes: 9 additions & 9 deletions mmif/serialize/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@
from urllib.parse import urlparse

from mmif.vocabulary import ThingTypesBase, DocumentTypesBase
from .model import MmifObject, JSON_PRMTV_TYPES
from .model import MmifObject, PRMTV_TYPES
from .. import DocumentTypes, AnnotationTypes
import mmif_docloc_http

__all__ = ['Annotation', 'AnnotationProperties', 'Document', 'DocumentProperties', 'Text']

T = TypeVar('T')
LIST_PRMTV = typing.List[JSON_PRMTV_TYPES] # list of values (most cases for annotation props)
LIST_PRMTV = typing.List[PRMTV_TYPES] # list of values (most cases for annotation props)
LIST_LIST_PRMTV = typing.List[LIST_PRMTV] # list of list of values (e.g. for coordinates)
DICT_PRMTV = typing.Dict[str, JSON_PRMTV_TYPES] # dict of values (`text` prop of `TextDocument` and other complex props)
DICT_PRMTV = typing.Dict[str, PRMTV_TYPES] # dict of values (`text` prop of `TextDocument` and other complex props)
DICT_LIST_PRMTV = typing.Dict[str, LIST_PRMTV] # dict of list of values (even more complex props)


Expand Down Expand Up @@ -77,7 +77,7 @@ def _add_prop_aliases(self, key_to_add, val_to_add):
subtypes, and effectively deprecated `frameType` and `boxType`
in `TimeFrame` and `BoundingBox` respectively.
"""
prop_aliases = AnnotationTypes.prop_aliases.get(self._type.shortname, {})
prop_aliases = AnnotationTypes._prop_aliases.get(self._type.shortname, {})
for alias_reprep, alias_group in prop_aliases.items():
if key_to_add in alias_group:
for alias in alias_group:
Expand Down Expand Up @@ -129,10 +129,10 @@ def id(self, aid: str) -> None:

@staticmethod
def check_prop_value_is_simple_enough(
value: Union[JSON_PRMTV_TYPES, LIST_PRMTV, LIST_LIST_PRMTV, DICT_PRMTV, DICT_LIST_PRMTV]) -> bool:
value: Union[PRMTV_TYPES, LIST_PRMTV, LIST_LIST_PRMTV, DICT_PRMTV, DICT_LIST_PRMTV]) -> bool:

def json_primitives(x):
return isinstance(x, typing.get_args(JSON_PRMTV_TYPES))
return isinstance(x, typing.get_args(PRMTV_TYPES))

def json_primitives_list(x):
return isinstance(x, list) and all(map(json_primitives, x))
Expand All @@ -146,7 +146,7 @@ def json_primitives_list_of_list(x):
or (isinstance(value, dict) and all(map(lambda x: isinstance(x[0], str) and (json_primitives(x[1]) or json_primitives_list(x[1])), value.items())))

def add_property(self, name: str,
value: Union[JSON_PRMTV_TYPES, LIST_PRMTV, LIST_LIST_PRMTV, DICT_PRMTV, DICT_LIST_PRMTV]) -> None:
value: Union[PRMTV_TYPES, LIST_PRMTV, LIST_LIST_PRMTV, DICT_PRMTV, DICT_LIST_PRMTV]) -> None:
"""
Adds a property to the annotation's properties.
:param name: the name of the property
Expand All @@ -162,7 +162,7 @@ def add_property(self, name: str,
# f"(\"{name}\": \"{str(value)}\"")
self._add_prop_aliases(name, value)

def get(self, prop_name: str) -> Union['AnnotationProperties', JSON_PRMTV_TYPES, LIST_PRMTV, LIST_LIST_PRMTV, DICT_PRMTV, DICT_LIST_PRMTV]:
def get(self, prop_name: str) -> Union['AnnotationProperties', PRMTV_TYPES, LIST_PRMTV, LIST_LIST_PRMTV, DICT_PRMTV, DICT_LIST_PRMTV]:
"""
A special getter for Annotation properties. This is to allow for
directly accessing properties without having to go through the
Expand Down Expand Up @@ -242,7 +242,7 @@ def __init__(self, doc_obj: Optional[Union[bytes, str, dict]] = None) -> None:
super().__init__(doc_obj)

def add_property(self, name: str,
value: Union[JSON_PRMTV_TYPES, LIST_PRMTV]
value: Union[PRMTV_TYPES, LIST_PRMTV]
) -> None:
"""
Adds a property to the document's properties.
Expand Down
41 changes: 29 additions & 12 deletions mmif/serialize/mmif.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import json
import math
import warnings
from collections import defaultdict
from datetime import datetime
Expand Down Expand Up @@ -123,12 +124,20 @@ def _deserialize(self, input_dict: dict) -> None:
# then, do the same to associated Document objects. Note that,
# in a view, it is guaranteed that all Annotation objects are not duplicates
if ann.at_type == AnnotationTypes.Annotation:
doc_id = ann.get_property('document')
try:
doc_id = ann.get_property('document')
for prop_key, prop_value in ann.properties.items():
self.get_document_by_id(doc_id)._props_ephemeral[prop_key] = prop_value
except KeyError:
warnings.warn(f"Annotation {ann.id} has a document ID {doc_id} that does not exist in the MMIF object. Skipping.", RuntimeWarning)
warnings.warn(f"Annotation {ann.id} (in view {view.id}) has a document ID {doc_id} that "
f"does not exist in the MMIF object. Skipping.", RuntimeWarning)
# lastly, add quick access to `start` and `end` values if the annotation is using `targets` property
if 'targets' in ann.properties:
if 'start' in ann.properties or 'end' in ann.properties:
raise ValueError(f"Annotation {ann.id} (in view {view.id}) has `targes` and `start`/`end/` "
f"properties at the same time. Annotation anchors are ambiguous.")
ann._props_ephemeral['start'] = self._get_linear_anchor_point(ann, start=True)
ann._props_ephemeral['end'] = self._get_linear_anchor_point(ann, start=False)

def generate_capital_annotations(self):
"""
Expand Down Expand Up @@ -427,7 +436,8 @@ def get_views_for_document(self, doc_id: str) -> List[View]:
next(annotations)
views.append(view)
except StopIteration:
# search failed by the full doc_id string, now try trimming the view_id from the string and re-do the search
# means search failed by the full doc_id string,
# now try trimming the view_id from the string and re-do the search
if Mmif.id_delimiter in doc_id:
vid, did = doc_id.split(Mmif.id_delimiter)
if view.id == vid:
Expand Down Expand Up @@ -479,29 +489,36 @@ def get_view_contains(self, at_types: Union[ThingTypesBase, str, List[Union[str,
return view
return None

def _get_linear_anchor_point(self, ann: Annotation, start: bool = True) -> Union[int, float]:
def _get_linear_anchor_point(self, ann: Annotation, targets_sorted=False, start: bool = True) -> Union[int, float]:
# TODO (krim @ 2/5/24): Update the return type once timeunits are unified to `ms` as integers (https://github.com/clamsproject/mmif/issues/192)
"""
Retrieves the anchor point of the annotation. Currently, this method only supports linear anchors,
namely time and text, hence does not work with spatial anchors (polygons or video-object).
:param ann: An Annotation object that has a linear anchor point. Namely, some subtypes of `Region` vocabulary type.
:param start: If True, returns the start anchor point. Otherwise, returns the end anchor point. N/A for `timePoint` anchors.
:param targets_sorted: If True, the method will assume that the targets are sorted in the order of the anchor points.
:return: the anchor point of the annotation. 1d for linear regions (time, text)
"""
props = ann.properties
if 'timePoint' in props:
return ann.get_property('timePoint')
elif 'targets' in props:

def get_target_ann(cur_ann, target_id):
if Mmif.id_delimiter not in target_id:
target_id = Mmif.id_delimiter.join((cur_ann.parent, target_id))
return self.__getitem__(target_id)

if not targets_sorted:
point = math.inf if start else -1
comp = min if start else max
for target_id in ann.get_property('targets'):
target = get_target_ann(ann, target_id)
point = comp(point, self._get_linear_anchor_point(target, start=start))
return point
target_id = ann.get_property('targets')[0 if start else -1]
# TODO (krim @ 2/5/24): not sure if this is the correct way to pick the "first" (or "last") target,
# since the targets list is not guaranteed to be sorted.
# However, due the recursive nature of this method, it is likely impossible
# to get all `start` (or `end`) values of the targets recursively and then pick the min (or max) value.
if Mmif.id_delimiter in target_id:
target = self.__getitem__(target_id)
else:
target = self.__getitem__(Mmif.id_delimiter.join((ann.parent, target_id)))
target = get_target_ann(ann, target_id)
return self._get_linear_anchor_point(target, start=start)
elif (start and 'start' in props) or (not start and 'end' in props):
return ann.get_property('start' if start else 'end')
Expand Down
4 changes: 2 additions & 2 deletions mmif/serialize/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@

T = TypeVar('T')
S = TypeVar('S')
JSON_PRMTV_TYPES: Type = Union[str, int, float, bool, None]
PRMTV_TYPES: Type = Union[str, int, float, bool, None]

__all__ = [
'MmifObject',
'MmifObjectEncoder',
'DataList',
'DataDict',
'JSON_PRMTV_TYPES'
'PRMTV_TYPES'
]


Expand Down
47 changes: 40 additions & 7 deletions mmif/serialize/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from datetime import datetime
from typing import Dict, Union, Optional, Generator, List, cast

from mmif.serialize.model import PRMTV_TYPES
from mmif.vocabulary import ThingTypesBase, ClamsTypesBase
from .annotation import Annotation, Document
from .model import MmifObject, DataList, DataDict
Expand Down Expand Up @@ -243,7 +244,8 @@ def __init__(self, viewmetadata_obj: Optional[Union[bytes, str, dict]] = None) -
self.timestamp: Optional[datetime] = None
self.app: str = ''
self.contains: ContainsDict = ContainsDict()
self.parameters: dict = {}
self.parameters: Dict[str, str] = {}
self.app_configuration: Dict[str, Union[PRMTV_TYPES, List[PRMTV_TYPES]]] = {}
self.error: Union[dict, ErrorDict] = {}
self.warnings: List[str] = []
self._required_attributes = ["app"]
Expand Down Expand Up @@ -282,16 +284,43 @@ def new_contain(self, at_type: Union[str, ThingTypesBase], **contains_metadata)
self.add_contain(new_contain, at_type)
return new_contain

def add_contain(self, contain: 'Contain', at_type: Union[str, ThingTypesBase]):
def add_contain(self, contain: 'Contain', at_type: Union[str, ThingTypesBase]) -> None:
self.contains[at_type] = contain

def add_parameters(self, **runtime_params):
self.parameters.update(dict(runtime_params))
def add_app_configuration(self, config_key: str, config_value: Union[PRMTV_TYPES, List[PRMTV_TYPES]]) -> None:
"""
Add a configuration key-value pair to the app_configuration dictionary.
"""
self.app_configuration[config_key] = config_value

def get_app_configuration(self, config_key: str) -> Union[PRMTV_TYPES, List[PRMTV_TYPES]]:
"""
Get a configuration value from the app_configuration dictionary.
"""
try:
return self.app_configuration[config_key]
except KeyError:
raise KeyError(f"app is not configured for \"{config_key}\" key in the view: {self.serialize()}")

def add_parameter(self, param_key, param_value):
def add_parameters(self, **runtime_params: str):
"""
Add runtime parameters as a batch (dict) to the view metadata. Note that parameter values must be strings.
"""
for k, v in runtime_params.items():
self.add_parameter(k, v)

def add_parameter(self, param_key: str, param_value: str):
"""
Add a single runtime parameter to the view metadata. Note that parameter value must be a string.
"""
assert isinstance(param_value, str), \
f"Parameter value must be a string, \"{param_value}\" ({type(param_value)}) is given for key \"{param_key}\"."
self.parameters[param_key] = param_value

def get_parameter(self, param_key):
def get_parameter(self, param_key: str) -> str:
"""
Get a runtime parameter from the view metadata.
"""
try:
return self.parameters[param_key]
except KeyError:
Expand Down Expand Up @@ -384,7 +413,11 @@ def get(self, key: Union[str, ThingTypesBase], default=None):
return self._items.get(key, default)

def __contains__(self, item: Union[str, ThingTypesBase]):
return item in self._items
if isinstance(item, str):
string_keys = [str(k) for k in self._items.keys()]
return item in string_keys
else:
return item in self._items

def pop(self, key):
self._items.pop(key)
Loading

0 comments on commit 1803922

Please sign in to comment.