diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index 6a57a47261..110b1372b6 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -60,6 +60,7 @@ LearningResourcePlatformFactory, LearningResourceRunFactory, LearningResourceTopicFactory, + LearningResourceTopicMappingFactory, PodcastEpisodeFactory, PodcastFactory, ProgramFactory, @@ -718,6 +719,49 @@ def test_load_topics(mocker, parent_factory, topics_exist): assert parent.learning_resource.topics.count() == 0 +@pytest.mark.parametrize( + ("raw_topics", "expected_topics"), + [ + (["Technology:AI/Machine Learning", "Management"], ["Management"]), + ( + ["Technology:AI/Machine Learning", "Business:Management"], + [], + ), + (["Machine Learning", "Management"], ["Machine Learning", "Management"]), + (["AI", "Machine Learning"], ["AI", "Machine Learning"]), + ( + ["AI", "Machine Learning", "Technology:AI/Machine Learning"], + ["AI", "Machine Learning"], + ), + ], +) +def test_load_mixed_topics_data(raw_topics, expected_topics): + """Test that topics are correctly parsed from data containing valid & invalid topics""" + resource = LearningResourceFactory.create(is_course=True, topics=[]) + offeror = LearningResourceOfferorFactory.create(is_xpro=True) + LearningResourceTopicMappingFactory.create( + offeror=offeror, + topic=LearningResourceTopicFactory.create(name="AI"), + topic_name="AI/Machine Learning", + ) + LearningResourceTopicMappingFactory.create( + offeror=offeror, + topic=LearningResourceTopicFactory.create(name="Machine Learning"), + topic_name="AI/Machine Learning", + ) + LearningResourceTopicMappingFactory.create( + offeror=offeror, + topic=LearningResourceTopicFactory.create(name="Management"), + topic_name="Management", + ) + + load_topics(resource, [{"name": topic} for topic in raw_topics]) + + assert sorted([topic.name for topic in resource.topics.all()]) == sorted( + expected_topics + ) + + @pytest.mark.parametrize("instructor_exists", [True, False]) def test_load_instructors(instructor_exists): """Test that load_instructors creates and/or assigns instructors to the course run""" diff --git a/learning_resources/etl/xpro.py b/learning_resources/etl/xpro.py index c6f3f4b225..6fdc136be1 100644 --- a/learning_resources/etl/xpro.py +++ b/learning_resources/etl/xpro.py @@ -20,7 +20,6 @@ from learning_resources.etl.utils import ( generate_course_numbers_json, transform_delivery, - transform_topics, ) from main.utils import clean_data @@ -52,27 +51,6 @@ def _parse_datetime(value): return parse(value).replace(tzinfo=UTC) if value else None -def parse_topics(resource_data: dict) -> list[dict]: - """ - Get a list containing {"name": } dict objects - Args: - resource_data: course or program data - Returns: - list of dict: list containing topic dicts with a name attribute - """ - extracted_topics = resource_data["topics"] - if not extracted_topics: - return [] - return transform_topics( - [ - {"name": topic["name"].split(":")[-1].strip()} - for topic in extracted_topics - if topic - ], - OfferedBy.xpro.name, - ) - - def extract_programs(): """Loads the xPro catalog data""" # noqa: D401 if settings.XPRO_CATALOG_API_URL: @@ -147,7 +125,7 @@ def _transform_learning_resource_course(course): "published": any( course_run.get("current_price", None) for course_run in course["courseruns"] ), - "topics": parse_topics(course), + "topics": course["topics"], "runs": [ _transform_run(course_run, course) for course_run in course["courseruns"] ], @@ -197,7 +175,7 @@ def transform_programs(programs): program["current_price"] ), # a program is only considered published if it has a product/price "url": program["url"], - "topics": parse_topics(program), + "topics": program["topics"], "platform": XPRO_PLATFORM_TRANSFORM.get(program["platform"], None), "resource_type": LearningResourceType.program.name, "delivery": transform_delivery(program.get("format")), diff --git a/learning_resources/etl/xpro_test.py b/learning_resources/etl/xpro_test.py index 7acb9e8226..c561ce550f 100644 --- a/learning_resources/etl/xpro_test.py +++ b/learning_resources/etl/xpro_test.py @@ -20,12 +20,7 @@ from learning_resources.etl.utils import ( transform_delivery, ) -from learning_resources.etl.xpro import _parse_datetime, parse_topics -from learning_resources.factories import ( - LearningResourceOfferorFactory, - LearningResourceTopicFactory, - LearningResourceTopicMappingFactory, -) +from learning_resources.etl.xpro import _parse_datetime from learning_resources.test_utils import set_up_topics from main.test_utils import any_instance_of @@ -109,7 +104,7 @@ def test_xpro_transform_programs(mock_xpro_programs_data): "published": bool(program_data["current_price"]), "url": program_data["url"], "availability": Availability.dated.name, - "topics": parse_topics(program_data), + "topics": program_data["topics"], "platform": PlatformType.xpro.name, "resource_type": LearningResourceType.program.name, "delivery": transform_delivery(program_data.get("format")), @@ -156,7 +151,7 @@ def test_xpro_transform_programs(mock_xpro_programs_data): for course_run in course_data["courseruns"] ), "availability": Availability.dated.name, - "topics": parse_topics(course_data), + "topics": course_data["topics"], "resource_type": LearningResourceType.course.name, "continuing_ed_credits": course_data.get("credits"), "pace": [Pace.self_paced.name], @@ -233,7 +228,7 @@ def test_xpro_transform_courses(mock_xpro_courses_data): for course_run in course_data["courseruns"] ), "availability": Availability.dated.name, - "topics": parse_topics(course_data), + "topics": course_data["topics"], "resource_type": LearningResourceType.course.name, "runs": [ { @@ -324,31 +319,3 @@ def test_program_run_start_date_value( assert transformed_programs[0]["runs"][0]["start_date"] == _parse_datetime( expected_dt ) - - -def test_parse_topics_data(): - """Test that topics are correctly parsed from the xpro data""" - offeror = LearningResourceOfferorFactory.create(is_xpro=True) - LearningResourceTopicMappingFactory.create( - offeror=offeror, - topic=LearningResourceTopicFactory.create(name="AI"), - topic_name="AI/Machine Learning", - ) - LearningResourceTopicMappingFactory.create( - offeror=offeror, - topic=LearningResourceTopicFactory.create(name="Machine Learning"), - topic_name="AI/Machine Learning", - ) - LearningResourceTopicMappingFactory.create( - offeror=offeror, - topic=LearningResourceTopicFactory.create(name="Management"), - topic_name="Management", - ) - course_data = { - "topics": [{"name": "AI/Machine Learning"}, {"name": "Management"}], - } - assert sorted(parse_topics(course_data), key=lambda topic: topic["name"]) == [ - {"name": "AI"}, - {"name": "Machine Learning"}, - {"name": "Management"}, - ] diff --git a/test_json/xpro_courses.json b/test_json/xpro_courses.json index 1cf1fc6266..5c15082e15 100644 --- a/test_json/xpro_courses.json +++ b/test_json/xpro_courses.json @@ -9,7 +9,10 @@ "courseruns": [], "next_run_id": null, "platform": "xPRO", - "topics": [{ "name": "Business:Leadership & Organizations" }], + "topics": [ + { "name": "Organizations & Leadership" }, + { "name": "Business:Leadership & Organizations" } + ], "format": "Online", "availability": "dated", "credits": "1.25" @@ -38,7 +41,7 @@ } ], "next_run_id": 49, - "topics": [{ "name": "Business:Leadership & Organizations" }], + "topics": [{ "name": "Organizations & Leadership" }], "format": "In person", "availability": "dated", "credits": "2.25"