From db04f0fe42e42e21fd5c0ead4c80066d663531c8 Mon Sep 17 00:00:00 2001 From: Michael Terry Date: Mon, 6 Jan 2025 10:03:05 -0300 Subject: [PATCH] docs: clarify that sort order is guaranteed --- README.md | 4 ++-- cumulus_fhir_support/json.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 33a8f06..91bc634 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,9 @@ import cumulus_fhir_support cumulus_fhir_support.list_multiline_json_in_dir("/") # { -# "/random.jsonl": None, # "/con1.ndjson": "Condition", # "/pat1.jsonl": "Patient", +# "/random.jsonl": None, # } cumulus_fhir_support.list_multiline_json_in_dir("/", "Patient") @@ -77,10 +77,10 @@ import cumulus_fhir_support list(cumulus_fhir_support.read_multiline_json_from_dir("/")) # [ -# {"description": "not a fhir object"}, # {"resourceType": "Condition", "id": "con1", "onsetDateTime": "2011-11-24"}, # {"resourceType": "Patient", "id": "pat1", "birthDate": "2020-10-16"}, # {"resourceType": "Patient", "id": "pat2", "birthDate": "2013-04-18"}, +# {"description": "not a fhir object"}, # ] list(cumulus_fhir_support.read_multiline_json_from_dir("/", "Condition")) diff --git a/cumulus_fhir_support/json.py b/cumulus_fhir_support/json.py index 6eadbe1..d93a182 100644 --- a/cumulus_fhir_support/json.py +++ b/cumulus_fhir_support/json.py @@ -67,7 +67,7 @@ def list_multiline_json_in_dir( - Will return an empty dict if the path does not exist. - Passing None as the resource filter (the default) will return all multi-line JSON found. - Returned filenames will be full paths. - - The order of filenames will be consistent across calls. + - The order of returned filenames will be consistent across calls (Python sort order). - This function will notice both JSON Lines (.jsonl) and NDJSON (.ndjson) files. Examples: @@ -105,7 +105,7 @@ def list_multiline_json_in_dir( # Now grab filenames for all target resource types results = {} - for child in sorted(children): # sort for reproducibility + for child in sorted(children): # sorted as an API promise results.update(_get_resource_type(child, resource, fsspec_fs=fsspec_fs)) return results @@ -229,7 +229,8 @@ def read_multiline_json_from_dir( - Will return an empty result if the path does not exist or is not readable. - Passing None as the resource filter (the default) will return all multi-line JSON found. - The lines of JSON are not required to be dictionaries. - - The order of results will be consistent across calls. + - The order of results will be consistent across calls (filenames are Python-sorted first, + then rows are returned from each file in order, top to bottom) - This function will notice both JSON Lines (.jsonl) and NDJSON (.ndjson) files. :param path: the folder to scan