diff --git a/umich_catalog_indexing/lib/common/subjects/field.rb b/umich_catalog_indexing/lib/common/subjects/field.rb index f70b6f95..9ead8221 100644 --- a/umich_catalog_indexing/lib/common/subjects/field.rb +++ b/umich_catalog_indexing/lib/common/subjects/field.rb @@ -41,15 +41,15 @@ def to_remediated match = _matching_deprecated_field sfields = @field.subfields.filter_map.with_index do |sf, index| - unless match["normalized"]["450"][sf.code] + unless match["normalized"]["4xx"][sf.code] &.include?(normalized_sfs[index]["value"]) MARC::Subfield.new(sf.code, sf.value) end end remediated_field = MARC::DataField.new(@field.tag, @field.indicator1, "7", *sfields) - match["given"]["150"].keys.each do |code| - match["given"]["150"][code].each do |value| + match["given"]["1xx"].keys.each do |code| + match["given"]["1xx"][code].each do |value| remediated_field.append(MARC::Subfield.new(code, value)) end end @@ -63,9 +63,9 @@ def to_remediated # generated because there were already remediated fields def to_deprecated match = _matching_remediated_field - match["given"]["450"].map do |f| + match["given"]["4xx"].map do |f| sfields = @field.subfields.filter_map.with_index do |sf, index| - unless match["normalized"]["150"][sf.code] + unless match["normalized"]["1xx"][sf.code] &.include?(normalized_sfs[index]["value"]) MARC::Subfield.new(sf.code, sf.value) end @@ -85,11 +85,11 @@ def to_deprecated def _matching_deprecated_field @_matching_deprecated_field ||= begin @mapping.each_with_index do |this_to_that, index| - # Find the matching index of the 450 array where all of the + # Find the matching index of the 4xx array where all of the # deprecated subfields are found in the bib record subject field - dep_match_index = this_to_that["450"].index.with_index do |deprecated_subfields, dep_index| + dep_match_index = this_to_that["4xx"].index.with_index do |deprecated_subfields, dep_index| deprecated_subfields.keys.all? do |code| - @normalized_mapping[index]["450"][dep_index][code].all? do |dep_sf_value| + @normalized_mapping[index]["4xx"][dep_index][code].all? do |dep_sf_value| _sf_in_field?(code: code, sf_value: dep_sf_value) end end @@ -98,12 +98,12 @@ def _matching_deprecated_field unless dep_match_index.nil? return { "given" => { - "150" => this_to_that["150"], - "450" => @mapping[index]["450"][dep_match_index] + "1xx" => this_to_that["1xx"], + "4xx" => @mapping[index]["4xx"][dep_match_index] }, "normalized" => { - "150" => @normalized_mapping[index]["150"], - "450" => @normalized_mapping[index]["450"][dep_match_index] + "1xx" => @normalized_mapping[index]["1xx"], + "4xx" => @normalized_mapping[index]["4xx"][dep_match_index] } } end @@ -119,8 +119,8 @@ def _matching_remediated_field @_matching_remediated_field ||= begin # find the index in mapping where the field from the bib record contains all of the values index = @mapping.index.with_index do |this_to_that, i| - this_to_that["150"].keys.all? do |code| - @normalized_mapping[i]["150"][code].all? do |dep_sf_value| + this_to_that["1xx"].keys.all? do |code| + @normalized_mapping[i]["1xx"][code].all? do |dep_sf_value| _sf_in_field?(code: code, sf_value: dep_sf_value) end end diff --git a/umich_catalog_indexing/lib/common/subjects/remediation_map.rb b/umich_catalog_indexing/lib/common/subjects/remediation_map.rb index 1278dd47..9dc0c550 100644 --- a/umich_catalog_indexing/lib/common/subjects/remediation_map.rb +++ b/umich_catalog_indexing/lib/common/subjects/remediation_map.rb @@ -14,24 +14,24 @@ def initialize(mapping = JSON.parse(File.read(File.join(S.translation_map_dir, " def _create_normalized_mapping @mapping.map do |heading| new_heading = {} - heading["150"].keys.each do |code| - new_heading[code] = heading["150"][code].map do |value| + heading["1xx"].keys.each do |code| + new_heading[code] = heading["1xx"][code].map do |value| normalize_sf(value) end end - dep_headings = heading["450"].map.with_index do |_, index| + dep_headings = heading["4xx"].map.with_index do |_, index| dep_heading = {} - heading["450"][index].keys.each do |code| - dep_heading[code] = heading["450"][index][code].map do |value| + heading["4xx"][index].keys.each do |code| + dep_heading[code] = heading["4xx"][index][code].map do |value| normalize_sf(value) end end dep_heading end { - "150" => new_heading, - "450" => dep_headings + "1xx" => new_heading, + "4xx" => dep_headings } end end diff --git a/umich_catalog_indexing/lib/jobs/translation_map_generator/subject_heading_remediation.rb b/umich_catalog_indexing/lib/jobs/translation_map_generator/subject_heading_remediation.rb index eb0a134f..b1f765a0 100644 --- a/umich_catalog_indexing/lib/jobs/translation_map_generator/subject_heading_remediation.rb +++ b/umich_catalog_indexing/lib/jobs/translation_map_generator/subject_heading_remediation.rb @@ -51,6 +51,8 @@ def to_a end class Authority + AUTHORIZED_TERM_FIELDS = ["100", "110", "111", "130", "150", "151", "155"] + VARIANT_TERM_FIELDS = ["400", "410", "411", "430", "450", "451", "455"] SUBFIELDS = ["a", "v", "x", "y", "z"] # @param authority_record_id [String] authority record id # @return [Job::TranslationMapGenerator::SubjecHeadingRemediation::Set::Authority] an Authority object @@ -70,7 +72,7 @@ def initialize(data) # code. def remediated_term out_hash = Hash.new { |h, key| h[key] = [] } - @record.fields("150").first.subfields.each do |sf| + AUTHORIZED_TERM_FIELDS.filter_map { |f| @record.fields(f)&.first }.first.subfields.each do |sf| out_hash[sf.code].push(sf.value) if SUBFIELDS.include?(sf.code) end out_hash @@ -80,7 +82,7 @@ def remediated_term # deprecated terms. The keys of the hash are the subfield code, the # value is an array of terms for the code. def deprecated_terms - @record.fields("450").map do |field| + VARIANT_TERM_FIELDS.filter_map { |f| @record.fields(f) unless @record.fields.empty? }.flatten.map do |field| out_hash = Hash.new { |h, key| h[key] = [] } field.subfields.each do |sf| out_hash[sf.code].push(sf.value) if SUBFIELDS.include?(sf.code) diff --git a/umich_catalog_indexing/spec/common/subject/field_spec.rb b/umich_catalog_indexing/spec/common/subject/field_spec.rb index 11559b7e..5bcb0fdf 100644 --- a/umich_catalog_indexing/spec/common/subject/field_spec.rb +++ b/umich_catalog_indexing/spec/common/subject/field_spec.rb @@ -4,14 +4,14 @@ @field = remediated_field @mapping = [ { - "150" => { + "1xx" => { "a" => ["A"], "x" => ["X1", "X2"], "v" => ["V1", "V2"], "y" => ["Y1", "Y2"], "z" => ["Z1", "Z2"] }, - "450" => [ + "4xx" => [ { "a" => ["deprecated A"], "x" => ["deprecated X1", "deprecated X2"], @@ -71,13 +71,13 @@ def _normalize_sf(str) expect(subject.remediable?).to eq(true) end it "is false when any subfield doesn't match deprecated field" do - @mapping[0]["450"][0]["v"][1] = "something other deprecated v" + @mapping[0]["4xx"][0]["v"][1] = "something other deprecated v" expect(subject.remediable?).to eq(false) end it "is true when the second mapping entity has the matching deprecated field" do @mapping.insert(0, { - "150" => {"a" => ["blah"]}, - "450" => [{"a" => ["whatever"]}] + "1xx" => {"a" => ["blah"]}, + "4xx" => [{"a" => ["whatever"]}] }) @field = deprecated_field expect(subject.remediable?).to eq(true) @@ -111,7 +111,7 @@ def _normalize_sf(str) expect(subject.already_remediated?).to eq(true) end it "returns false when it is missing a subfield" do - @mapping[0]["150"]["v"][1] = "something other than v" + @mapping[0]["1xx"]["v"][1] = "something other than v" expect(subject.already_remediated?).to eq(false) end it "returns true when the matching field has an extra field" do @@ -120,8 +120,8 @@ def _normalize_sf(str) end it "is true when the second mapping entity has the matching remediated field" do @mapping.insert(0, { - "150" => {"a" => ["blah"]}, - "450" => [{"a" => ["whatever"]}] + "1xx" => {"a" => ["blah"]}, + "4xx" => [{"a" => ["whatever"]}] }) expect(subject.already_remediated?).to eq(true) end diff --git a/umich_catalog_indexing/spec/fixtures/subjects/geo_authority_record.json b/umich_catalog_indexing/spec/fixtures/subjects/geo_authority_record.json new file mode 100644 index 00000000..c061ab8f --- /dev/null +++ b/umich_catalog_indexing/spec/fixtures/subjects/geo_authority_record.json @@ -0,0 +1,23 @@ +{ + "mms_id": 98188860307506380, + "record_format": "marc21_authority", + "title": "Mexico, Gulf of, Watershed", + "created_by": "Ex Libris", + "created_date": "2025-02-25Z", + "last_modified_by": "rednaal", + "last_modified_date": "2025-02-25Z", + "originating_system": "LIBRARY_OF_CONGRESS", + "originating_system_id": "98188860307506381", + "cataloging_level": { + "value": "00", + "desc": "Default Level" + }, + "vocabulary": { + "value": "MIUSH", + "desc": "miush" + }, + "anies": [ + "00729nz a2200169n 450020171103133952.0170906|| anannbabn |a ana 98188860307506381sh2017004336(DLC)sh2017004336DLCengDLCMiUMexico, Gulf of, WatershedTest Test TestAmerica, Gulf of, WatershedgWatershedsMexicogWatershedsUnited StatesWork cat.: 2017386748: El proceso de producción cafetalero en la región vertiente del Golfo de México, septiembre 2016:t.p. ([cataloger's translation] The process of coffee production in the watershed of the Gulf of Mexico)U.S. Fish & Wildlife Service WWW site, viewed Sept. 6, 2017(Gulf of Mexico Watershed)Mexico, Gulf of, Watershedsla-lab created local heading based on DEIA Catalog Working Group resolution to keep the term preferred by library community 2025-02-25(LIBRARY_OF_CONGRESS)988457839400041" + ], + "link": "https://api-na.hosted.exlibrisgroup.com/almaws/v1/bibs/authorities/98188860307506381" +} diff --git a/umich_catalog_indexing/spec/fixtures/translation_maps/umich/subject_heading_remediation.json b/umich_catalog_indexing/spec/fixtures/translation_maps/umich/subject_heading_remediation.json index 5b5c9bee..017e72fa 100644 --- a/umich_catalog_indexing/spec/fixtures/translation_maps/umich/subject_heading_remediation.json +++ b/umich_catalog_indexing/spec/fixtures/translation_maps/umich/subject_heading_remediation.json @@ -1,11 +1,11 @@ [ { - "150": { + "1xx": { "a": [ "Immigrant detention centers" ] }, - "450": [ + "4xx": [ { "a": [ "Alien detention centers" @@ -39,12 +39,12 @@ ] }, { - "150": { + "1xx": { "a": [ "Undocumented immigrants" ] }, - "450": [ + "4xx": [ { "a": [ "Aliens" @@ -89,12 +89,12 @@ ] }, { - "150": { + "1xx": { "a": [ "Children of undocumented immigrants" ] }, - "450": [ + "4xx": [ { "a": [ "Children of undocumented foreign nationals" @@ -128,7 +128,7 @@ ] }, { - "150": { + "1xx": { "a": [ "Children of undocumented immigrants" ], @@ -136,7 +136,7 @@ "Education" ] }, - "450": [ + "4xx": [ { "a": [ "Children of undocumented foreign nationals" @@ -156,7 +156,7 @@ ] }, { - "150": { + "1xx": { "a": [ "Children of undocumented immigrants" ], @@ -165,7 +165,7 @@ "Law and legislation" ] }, - "450": [ + "4xx": [ { "a": [ "Children of undocumented foreign nationals" @@ -187,7 +187,7 @@ ] }, { - "150": { + "1xx": { "a": [ "Undocumented immigrants" ], @@ -198,7 +198,7 @@ "United States" ] }, - "450": [ + "4xx": [ { "a": [ "Illegal aliens" @@ -221,7 +221,7 @@ ] }, { - "150": { + "1xx": { "a": [ "Undocumented immigrants" ], @@ -229,7 +229,7 @@ "United States" ] }, - "450": [ + "4xx": [ { "a": [ "Undocumented foreign nationals" @@ -249,7 +249,7 @@ ] }, { - "150": { + "1xx": { "a": [ "Undocumented immigrants" ], @@ -257,7 +257,7 @@ "Fiction" ] }, - "450": [ + "4xx": [ { "a": [ "Undocumented foreign nationals" @@ -277,12 +277,12 @@ ] }, { - "150": { + "1xx": { "a": [ "Women undocumented immigrants" ] }, - "450": [ + "4xx": [ { "a": [ "Women undocumented foreign nationals" @@ -306,12 +306,12 @@ ] }, { - "150": { + "1xx": { "a": [ "Human smuggling" ] }, - "450": [ + "4xx": [ { "a": [ "Immigrant smuggling" @@ -330,12 +330,12 @@ ] }, { - "150": { + "1xx": { "a": [ "Undocumented immigrants in literature" ] }, - "450": [ + "4xx": [ { "a": [ "Undocumented foreign nationals in literature" @@ -349,12 +349,12 @@ ] }, { - "150": { + "1xx": { "a": [ "Undocumented immigrant children" ] }, - "450": [ + "4xx": [ { "a": [ "Undocumented foreign national children" diff --git a/umich_catalog_indexing/spec/jobs/translation_map_generator/subject_heading_remediation_spec.rb b/umich_catalog_indexing/spec/jobs/translation_map_generator/subject_heading_remediation_spec.rb index a5b818b6..4366feaa 100644 --- a/umich_catalog_indexing/spec/jobs/translation_map_generator/subject_heading_remediation_spec.rb +++ b/umich_catalog_indexing/spec/jobs/translation_map_generator/subject_heading_remediation_spec.rb @@ -5,6 +5,10 @@ def remediated_term {"a" => ["Undocumented immigrants"]} end +def geo_remediated_term + {"a" => ["Mexico, Gulf of, Watershed"]} +end + def deprecated_terms [ { @@ -32,6 +36,17 @@ def deprecated_terms } ] end + +def geo_deprecated_terms + [ + { + "a" => ["America, Gulf of, Watershed"] + }, + { + "a" => ["Test Test Test"] + } + ] +end describe Jobs::TranslationMapGenerator::SubjectHeadingRemediation::Set do before(:each) do @data = fixture("subjects/authority_set.json") @@ -131,6 +146,7 @@ def deprecated_terms end end describe Jobs::TranslationMapGenerator::SubjectHeadingRemediation::Authority do + let(:geo_authority_record) { JSON.parse(fixture("subjects/geo_authority_record.json")) } before(:each) do @data = JSON.parse(fixture("subjects/authority_record.json")) end @@ -149,14 +165,22 @@ def deprecated_terms end end context "#remediated_term" do - it "returns the remediated term" do + it "returns the remediated term in the 150" do expect(subject.remediated_term).to eq(remediated_term) end + it "returns the remediated term in the 151" do + @data = geo_authority_record + expect(subject.remediated_term).to eq(geo_remediated_term) + end end context "#deprecated_terms" do it "returns the deprecated terms from the 450 field" do expect(subject.deprecated_terms).to contain_exactly(*deprecated_terms) end + it "returns the deprecated terms from the 451 field" do + @data = geo_authority_record + expect(subject.deprecated_terms).to contain_exactly(*geo_deprecated_terms) + end end context "#to_h" do it "returns the expected deprecated_to_remediated hash with downcased terms" do