Skip to content

Commit

Permalink
feat(schema): add support for extensions on primitive types
Browse files Browse the repository at this point in the history
i.e. add support for "sunder" fields like _status (sibling of status).

See http://hl7.org/fhir/R4/json.html#primitive for more details.

Example:
{
  "birthDate": "1970-03-30",
  "_birthDate": {
    "id": "314159",
    "extension": [ {
       "url": "http://example.org/fhir/StructureDefinition/text",
       "valueString": "Easter 1970"
    }]
  }
}

These fields will be in the resulting schema if they are present in the
input rows, else they will be left off.
  • Loading branch information
mikix committed Nov 1, 2024
1 parent 4f5906d commit 538d1a6
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 20 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion cumulus_fhir_support/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""FHIR support code for the Cumulus project"""

__version__ = "1.2.1"
__version__ = "1.3.0"

from .json import list_multiline_json_in_dir, read_multiline_json, read_multiline_json_from_dir
from .schemas import pyarrow_schema_from_rows
69 changes: 51 additions & 18 deletions cumulus_fhir_support/schemas.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""Detect FHIR resource schemas"""

from collections import namedtuple
from functools import partial
from typing import Any, Iterable, Optional

import pyarrow
from fhirclient.models import (
codeableconcept,
coding,
element,
extension,
fhirabstractbase,
fhirdate,
Expand Down Expand Up @@ -140,7 +140,8 @@ def _create_pyarrow_schema_for_resource(
"""
instance = fhirelementfactory.FHIRElementFactory.instantiate(resource_type, None)

# fhirclient doesn't include `resourceType` in the list of properties. So do that manually.
# fhirclient doesn't include `resourceType` in the list of properties, because it's only
# used in ndjson representations. But it's useful to have, so add it manually.
type_field = pyarrow.field("resourceType", pyarrow.string())

level = 0 if wide else 2
Expand All @@ -153,27 +154,27 @@ def _fhir_obj_to_pyarrow_fields(
base_obj: fhirabstractbase.FHIRAbstractBase, batch_shape: dict, *, level: int
) -> list[pyarrow.Field]:
"""Convert a FHIR instance to a PyArrow Field schema list"""
properties = map(FhirProperty._make, base_obj.elementProperties())
return list(
filter(
None,
map(
partial(
_fhir_to_pyarrow_property,
base_obj=base_obj,
batch_shape=batch_shape,
level=level,
),
properties,
),
)
)
fhir_properties = map(FhirProperty._make, base_obj.elementProperties())
pa_properties = []

for fhir_property in fhir_properties:
if pa_property := _fhir_to_pyarrow_property(
fhir_property,
base_obj=base_obj,
batch_shape=batch_shape,
level=level,
):
pa_properties.append(pa_property)
if pa_sunder := _sunder_to_pyarrow_property(fhir_property, batch_shape=batch_shape):
pa_properties.append(pa_sunder)

return pa_properties


def _fhir_to_pyarrow_property(
prop: FhirProperty,
*,
base_obj: fhirabstractbase.FHIRAbstractBase,
base_obj: Optional[fhirabstractbase.FHIRAbstractBase] = None,
batch_shape: dict = None,
level: int,
) -> Optional[pyarrow.Field]:
Expand Down Expand Up @@ -222,6 +223,38 @@ def _fhir_to_pyarrow_property(
return pyarrow.field(prop.json_name, pyarrow_type, nullable=True)


def _sunder_to_pyarrow_property(
prop: FhirProperty,
*,
batch_shape: Optional[dict] = None,
) -> Optional[pyarrow.Field]:
"""
Checks for a FhirProperty's "sunder" sibling and returns a PyArrow field for it.
A sunder (single underscore) field is an adjacent JSON field for primitive types that don't
otherwise have a place to put extension information. So "status" might have a sibling
"_status" field.
See http://hl7.org/fhir/R4/json.html#primitive for more information.
Returns None if the sunder field isn't present.
"""
# First, check if the sunder version is even present.
if not batch_shape or f"_{prop.json_name}" not in batch_shape:
return None

# Make a fake property definition and see if it's good.
sunder_prop = FhirProperty(
name=f"_{prop.name}",
json_name=f"_{prop.json_name}",
pytype=element.Element,
is_list=prop.is_list,
of_many=prop.of_many,
required=prop.required,
)
return _fhir_to_pyarrow_property(sunder_prop, level=LEVEL_INCLUSION, batch_shape=batch_shape)


def _basic_fhir_to_pyarrow_type(pytype: type) -> pyarrow.DataType:
"""Converts a basic python type to a Pyspark type"""
if pytype is int:
Expand Down
71 changes: 71 additions & 0 deletions tests/test_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,74 @@ def test_unexpected_fhir_type(self, mock_instantiate):
mock_instantiate.return_value = mock_resource
with self.assertRaisesRegex(ValueError, "Unexpected type: <class 'object'>"):
support.pyarrow_schema_from_rows("AllergyIntolerance")

def test_primitive_field_extension(self):
"""Verify that we support extensions to primitive fields"""
# See http://hl7.org/fhir/R4/json.html#primitive for details
rows = [
{
# Non-existant sunder field
"_doesNotExist": {"id": "test-fake"},
# Extension only, no ID
"_status": {"extension": [{"valueCode": "test-status"}]},
# ID only, no extension (but with bogus modifierExtension that will be ignored)
"_priority": {"id": "test-priority", "modifierExtension": "not-supported"},
# Array
"_instantiatesUri": [
None,
{"id": "test-array"},
{"extension": [{"url": "test"}]},
],
# Deep field
"dispenseRequest": {
"validityPeriod": {"_start": {"id": "test-start"}},
},
}
]
schema = support.pyarrow_schema_from_rows("MedicationRequest", rows)

self.assertEqual(-1, schema.get_field_index("_doesNotExist"))
self.assertEqual(-1, schema.get_field_index("_intent")) # never specified
self.assertEqual(
pyarrow.struct(
{
"extension": pyarrow.list_(
pyarrow.struct(
{
"valueCode": pyarrow.string(),
}
)
),
}
),
schema.field("_status").type,
)
self.assertEqual(
pyarrow.struct({"id": pyarrow.string()}),
schema.field("_priority").type,
)
self.assertEqual(
pyarrow.list_(
pyarrow.struct(
{
"extension": pyarrow.list_(
pyarrow.struct(
{
"url": pyarrow.string(),
}
)
),
"id": pyarrow.string(),
}
)
),
schema.field("_instantiatesUri").type,
)
self.assertEqual(
pyarrow.struct(
{
"id": pyarrow.string(),
}
),
schema.field("dispenseRequest").type.field("validityPeriod").type.field("_start").type,
)

0 comments on commit 538d1a6

Please sign in to comment.