From ed8b5da2fb9b3196fc05c4d9e49364200695e215 Mon Sep 17 00:00:00 2001 From: aleixpuig <94959119+aleixpuigb@users.noreply.github.com> Date: Mon, 25 Nov 2024 15:28:26 +0000 Subject: [PATCH 1/4] Create script --- src/scripts/2D_FTU_images.py | 105 +++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 src/scripts/2D_FTU_images.py diff --git a/src/scripts/2D_FTU_images.py b/src/scripts/2D_FTU_images.py new file mode 100644 index 000000000..dcbab195b --- /dev/null +++ b/src/scripts/2D_FTU_images.py @@ -0,0 +1,105 @@ +import csv +import sys + +print("Python version:", sys.version) +print("Python executable:", sys.executable) + +try: + import requests + print("Requests imported successfully!") +except ModuleNotFoundError as e: + print("Error:", e) + +# Define SPARQL endpoint and query (from https://github.com/hubmapconsortium/ccf-grlc/blob/main/hra/ftu-parts.rq) +SPARQL_ENDPOINT = "https://lod.humanatlas.io/sparql" # Update if necessary +SPARQL_QUERY = """ +PREFIX dcat: +PREFIX prov: +PREFIX schema: +PREFIX rdf: +PREFIX rdfs: +PREFIX ccf: +PREFIX UBERON: +PREFIX obo: +PREFIX HRA: +PREFIX LOD: + +SELECT DISTINCT ?ftu_digital_object ?ftu_digital_object_doi ?image_url ?organ_iri ?ftu_iri ?ftu_part_iri +WHERE { + ?ftu_illustration a ccf:FtuIllustration ; + a ?ftu_iri ; + ccf:ccf_located_in ?organ_id ; + ccf:illustration_node [ a ?ftu_part_iri ] ; + ccf:image_file [ + ccf:file_format ?format ; + ccf:file_url ?image_url + ] . + + HRA: prov:hadMember ?versioned_ftu . + + GRAPH LOD: { + ?versioned_ftu prov:wasDerivedFrom [ + ccf:doi ?ftu_digital_object_doi + ] . + } + + BIND(IRI(REPLACE(?organ_id, 'UBERON:', STR(UBERON:))) as ?organ_iri) + BIND(IRI(REPLACE(STR(?ftu_illustration), "#primary", "")) as ?ftu_digital_object) + + FILTER(?format = "image/png") # or "image/svg+xml" + FILTER(STRSTARTS(STR(?ftu_iri), STR(obo:))) + FILTER(STRSTARTS(STR(?ftu_part_iri), STR(obo:))) + FILTER(STRSTARTS(STR(?versioned_ftu), STR(?ftu_digital_object))) +} +""" + +# Function to run SPARQL query and fetch results +def fetch_sparql_results(endpoint, query): + sparql = SPARQLWrapper(endpoint) + sparql.setQuery(query) + sparql.setReturnFormat(JSON) + results = sparql.query().convert() + return results["results"]["bindings"] + +# Function to generate ROBOT template CSV +def generate_robot_template(data, output_file): + with open(output_file, mode='w', newline='', encoding='utf-8') as file: + writer = csv.writer(file) + # Write header for ROBOT template + writer.writerow([ + "ID", "Label", "Definition", "Comment", "Annotation:source", + "Annotation:image_url", "Related To" + ]) + # Process each row in data + for row in data: + ftu_iri = row["ftu_iri"]["value"] + ftu_part_iri = row["ftu_part_iri"]["value"] + organ_iri = row["organ_iri"]["value"] + image_url = row["image_url"]["value"] + doi = row["ftu_digital_object_doi"]["value"] + ftu_digital_object = row["ftu_digital_object"]["value"] + + # Add to ROBOT template + writer.writerow([ + ftu_iri, # ID + ftu_part_iri.split("/")[-1], # Label (last part of IRI) + f"FTU illustration for {organ_iri}", # Definition + f"Associated digital object: {ftu_digital_object}", # Comment + doi, # Annotation:source + image_url, # Annotation:image_url + organ_iri # Related To + ]) + +# Main execution +def main(): + print("Fetching SPARQL results...") + data = fetch_sparql_results(SPARQL_ENDPOINT, SPARQL_QUERY) + print(f"Fetched {len(data)} records.") + + output_file = "robot_template.csv" + print(f"Generating ROBOT template: {output_file}") + generate_robot_template(data, output_file) + print(f"Template saved to {output_file}") + +if __name__ == "__main__": + main() From 044d9968deac87c82a933778308a47e6c0bdcb71 Mon Sep 17 00:00:00 2001 From: aleixpuig <94959119+aleixpuigb@users.noreply.github.com> Date: Tue, 26 Nov 2024 11:38:30 +0000 Subject: [PATCH 2/4] Retrieve only latest version of FTU difital objects --- src/scripts/2D_FTU_images.py | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/src/scripts/2D_FTU_images.py b/src/scripts/2D_FTU_images.py index dcbab195b..6074d6087 100644 --- a/src/scripts/2D_FTU_images.py +++ b/src/scripts/2D_FTU_images.py @@ -1,14 +1,5 @@ import csv -import sys - -print("Python version:", sys.version) -print("Python executable:", sys.executable) - -try: - import requests - print("Requests imported successfully!") -except ModuleNotFoundError as e: - print("Error:", e) +from SPARQLWrapper import SPARQLWrapper, JSON # Define SPARQL endpoint and query (from https://github.com/hubmapconsortium/ccf-grlc/blob/main/hra/ftu-parts.rq) SPARQL_ENDPOINT = "https://lod.humanatlas.io/sparql" # Update if necessary @@ -25,10 +16,11 @@ PREFIX LOD: SELECT DISTINCT ?ftu_digital_object ?ftu_digital_object_doi ?image_url ?organ_iri ?ftu_iri ?ftu_part_iri +FROM HRA: WHERE { ?ftu_illustration a ccf:FtuIllustration ; - a ?ftu_iri ; - ccf:ccf_located_in ?organ_id ; + a ?ftu_iri ; + ccf:ccf_located_in ?organ_id ; ccf:illustration_node [ a ?ftu_part_iri ] ; ccf:image_file [ ccf:file_format ?format ; @@ -67,8 +59,8 @@ def generate_robot_template(data, output_file): writer = csv.writer(file) # Write header for ROBOT template writer.writerow([ - "ID", "Label", "Definition", "Comment", "Annotation:source", - "Annotation:image_url", "Related To" + "FTU_IRI", "FTU_part_IRI", "Organ_IRI", "FTU_digital_object", "DOI", + "Image_URL", ]) # Process each row in data for row in data: @@ -82,12 +74,11 @@ def generate_robot_template(data, output_file): # Add to ROBOT template writer.writerow([ ftu_iri, # ID - ftu_part_iri.split("/")[-1], # Label (last part of IRI) - f"FTU illustration for {organ_iri}", # Definition - f"Associated digital object: {ftu_digital_object}", # Comment + ftu_part_iri, # Label (last part of IRI) + organ_iri, # Definition + ftu_digital_object, # Comment doi, # Annotation:source image_url, # Annotation:image_url - organ_iri # Related To ]) # Main execution @@ -102,4 +93,4 @@ def main(): print(f"Template saved to {output_file}") if __name__ == "__main__": - main() + main() \ No newline at end of file From b738c67612eb42abbf7fd4f0fff294d4716e4f08 Mon Sep 17 00:00:00 2001 From: aleixpuig <94959119+aleixpuigb@users.noreply.github.com> Date: Tue, 26 Nov 2024 15:39:14 +0000 Subject: [PATCH 3/4] Use GRLC endpoint --- src/scripts/2D_FTU_images.py | 133 +++++++++++++---------------------- 1 file changed, 47 insertions(+), 86 deletions(-) diff --git a/src/scripts/2D_FTU_images.py b/src/scripts/2D_FTU_images.py index 6074d6087..cb389436c 100644 --- a/src/scripts/2D_FTU_images.py +++ b/src/scripts/2D_FTU_images.py @@ -1,96 +1,57 @@ import csv -from SPARQLWrapper import SPARQLWrapper, JSON - -# Define SPARQL endpoint and query (from https://github.com/hubmapconsortium/ccf-grlc/blob/main/hra/ftu-parts.rq) -SPARQL_ENDPOINT = "https://lod.humanatlas.io/sparql" # Update if necessary -SPARQL_QUERY = """ -PREFIX dcat: -PREFIX prov: -PREFIX schema: -PREFIX rdf: -PREFIX rdfs: -PREFIX ccf: -PREFIX UBERON: -PREFIX obo: -PREFIX HRA: -PREFIX LOD: - -SELECT DISTINCT ?ftu_digital_object ?ftu_digital_object_doi ?image_url ?organ_iri ?ftu_iri ?ftu_part_iri -FROM HRA: -WHERE { - ?ftu_illustration a ccf:FtuIllustration ; - a ?ftu_iri ; - ccf:ccf_located_in ?organ_id ; - ccf:illustration_node [ a ?ftu_part_iri ] ; - ccf:image_file [ - ccf:file_format ?format ; - ccf:file_url ?image_url - ] . - - HRA: prov:hadMember ?versioned_ftu . - - GRAPH LOD: { - ?versioned_ftu prov:wasDerivedFrom [ - ccf:doi ?ftu_digital_object_doi - ] . - } - - BIND(IRI(REPLACE(?organ_id, 'UBERON:', STR(UBERON:))) as ?organ_iri) - BIND(IRI(REPLACE(STR(?ftu_illustration), "#primary", "")) as ?ftu_digital_object) - - FILTER(?format = "image/png") # or "image/svg+xml" - FILTER(STRSTARTS(STR(?ftu_iri), STR(obo:))) - FILTER(STRSTARTS(STR(?ftu_part_iri), STR(obo:))) - FILTER(STRSTARTS(STR(?versioned_ftu), STR(?ftu_digital_object))) -} -""" - -# Function to run SPARQL query and fetch results -def fetch_sparql_results(endpoint, query): - sparql = SPARQLWrapper(endpoint) - sparql.setQuery(query) - sparql.setReturnFormat(JSON) - results = sparql.query().convert() - return results["results"]["bindings"] - -# Function to generate ROBOT template CSV -def generate_robot_template(data, output_file): +import requests + +# Define the API URL +API_URL = "https://grlc.io/api-git/hubmapconsortium/ccf-grlc/subdir/hra/ftu-parts" + +# Function to fetch CSV data from the API +def fetch_api_data(url): + headers = {"Accept": "text/csv"} # Request CSV format + response = requests.get(url, headers=headers) + if response.status_code == 200: + # Return the CSV data as text + return response.text + else: + print(f"Failed to fetch data from the API. Status code: {response.status_code}") + return None + +# Function to parse CSV data into a list of rows +def parse_csv_data(csv_data): + rows = [] + # Use csv.reader to parse the CSV content + for row in csv.reader(csv_data.splitlines()): + rows.append(row) + return rows + +# Function to generate ROBOT template CSV from API data +def generate_robot_template(data, header, output_file): with open(output_file, mode='w', newline='', encoding='utf-8') as file: writer = csv.writer(file) - # Write header for ROBOT template - writer.writerow([ - "FTU_IRI", "FTU_part_IRI", "Organ_IRI", "FTU_digital_object", "DOI", - "Image_URL", - ]) - # Process each row in data + # Write the header directly from the API response to the ROBOT template + writer.writerow(header) + + # Process each row in the API data for row in data: - ftu_iri = row["ftu_iri"]["value"] - ftu_part_iri = row["ftu_part_iri"]["value"] - organ_iri = row["organ_iri"]["value"] - image_url = row["image_url"]["value"] - doi = row["ftu_digital_object_doi"]["value"] - ftu_digital_object = row["ftu_digital_object"]["value"] - - # Add to ROBOT template - writer.writerow([ - ftu_iri, # ID - ftu_part_iri, # Label (last part of IRI) - organ_iri, # Definition - ftu_digital_object, # Comment - doi, # Annotation:source - image_url, # Annotation:image_url - ]) + writer.writerow(row) # Write the row without modification # Main execution def main(): - print("Fetching SPARQL results...") - data = fetch_sparql_results(SPARQL_ENDPOINT, SPARQL_QUERY) - print(f"Fetched {len(data)} records.") - - output_file = "robot_template.csv" - print(f"Generating ROBOT template: {output_file}") - generate_robot_template(data, output_file) - print(f"Template saved to {output_file}") + print("Fetching data from the API...") + csv_data = fetch_api_data(API_URL) + if csv_data: + # Parse the CSV data into rows + rows = parse_csv_data(csv_data) + print(f"Fetched {len(rows)} rows from the API.") + + # The first row is the header, so we use it directly + header = rows[0] + # All the remaining rows are the data + data = rows[1:] + + output_file = "robot_template.csv" + print(f"Generating ROBOT template: {output_file}") + generate_robot_template(data, header, output_file) + print(f"Template saved to {output_file}") if __name__ == "__main__": main() \ No newline at end of file From fd4a748fe19f71c2cad72d52f38f37da08887b2c Mon Sep 17 00:00:00 2001 From: aleixpuig <94959119+aleixpuigb@users.noreply.github.com> Date: Tue, 26 Nov 2024 15:47:58 +0000 Subject: [PATCH 4/4] Simplify code to store CSV template directly from API --- src/scripts/2D_FTU_images.py | 50 +++++++----------------------------- 1 file changed, 9 insertions(+), 41 deletions(-) diff --git a/src/scripts/2D_FTU_images.py b/src/scripts/2D_FTU_images.py index cb389436c..3f67e43d0 100644 --- a/src/scripts/2D_FTU_images.py +++ b/src/scripts/2D_FTU_images.py @@ -1,57 +1,25 @@ -import csv import requests # Define the API URL API_URL = "https://grlc.io/api-git/hubmapconsortium/ccf-grlc/subdir/hra/ftu-parts" -# Function to fetch CSV data from the API -def fetch_api_data(url): +# Function to fetch CSV data from the API and save it directly to a file +def fetch_and_save_csv(url, output_file): headers = {"Accept": "text/csv"} # Request CSV format response = requests.get(url, headers=headers) if response.status_code == 200: - # Return the CSV data as text - return response.text + # Write the CSV data directly to the output file + with open(output_file, mode='w', newline='', encoding='utf-8') as file: + file.write(response.text) + print(f"CSV data saved to {output_file}") else: print(f"Failed to fetch data from the API. Status code: {response.status_code}") - return None - -# Function to parse CSV data into a list of rows -def parse_csv_data(csv_data): - rows = [] - # Use csv.reader to parse the CSV content - for row in csv.reader(csv_data.splitlines()): - rows.append(row) - return rows - -# Function to generate ROBOT template CSV from API data -def generate_robot_template(data, header, output_file): - with open(output_file, mode='w', newline='', encoding='utf-8') as file: - writer = csv.writer(file) - # Write the header directly from the API response to the ROBOT template - writer.writerow(header) - - # Process each row in the API data - for row in data: - writer.writerow(row) # Write the row without modification # Main execution def main(): - print("Fetching data from the API...") - csv_data = fetch_api_data(API_URL) - if csv_data: - # Parse the CSV data into rows - rows = parse_csv_data(csv_data) - print(f"Fetched {len(rows)} rows from the API.") - - # The first row is the header, so we use it directly - header = rows[0] - # All the remaining rows are the data - data = rows[1:] - - output_file = "robot_template.csv" - print(f"Generating ROBOT template: {output_file}") - generate_robot_template(data, header, output_file) - print(f"Template saved to {output_file}") + output_file = "robot_template.csv" + print(f"Fetching and saving CSV template to {output_file}...") + fetch_and_save_csv(API_URL, output_file) if __name__ == "__main__": main() \ No newline at end of file