nyudlts · teikmeout · May 14, 2024 · May 14, 2024 · May 14, 2024 · May 14, 2024
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 # src/solr/open-square-metadata
+src/dlts-epub-metadata
diff --git a/README.md b/README.md
@@ -28,5 +28,6 @@ gh repo clone NYULibraries/dlts-epub-metadata
 2) Run the ingest script
 
 ```bash
-cd scripts/ingest-documents
+cd src/scripts/ingest-documents
+deno run --allow-net --allow-read index.ts
 ```
diff --git a/src/scripts/ingest-document/index.ts b/src/scripts/ingest-document/index.ts
@@ -10,9 +10,9 @@ const solrUrl = `${solrHost}/${solrCollection}/update/json?commit=true`
 // Metadata collection
 const metadataCollection = 'nyupress'
 // Define the directory containing the JSON files
-const dir = Deno.cwd() + '/../../dlts-epub-metadata/' + metadataCollection
+const dir = Deno.cwd() + '/../../dlts-epub-metadata/' + metadataCollection;
 
-// Read the directory
+// Read the directory and loop through each file
 for await (const dirEntry of  Deno.readDir(dir)) {
   try {
     const isbn = dirEntry.name
@@ -21,14 +21,71 @@ for await (const dirEntry of  Deno.readDir(dir)) {
     // Parse the JSON string into an object
     const doc = JSON.parse(jsonStr)
 
-    delete doc.isDownloadable
-    delete doc.nyu_press_website_buy_the_book_url
-    delete doc.permanent_url
-    delete doc.rootUrl
+    const authors: string[] = doc.author.split(', ')
+    console.log(authors)
 
+    // create the full author object, unordered
+    const oa = [];
+    for (let i = 0; i < authors.length; i++) {
+      oa.push({
+        "contributors.bio": "",
+        "contributors.name": authors[i],
+        "contributors.nameSort": authors[i],
+        "contributors.order": i + 1,
+        "contributors.role": "author",
+      })
+    }
+    const final = JSON.stringify(oa)
+
+    const flatReviews = JSON.stringify([
+      {
+        "reviews.review": "",
+        "reviews.reviewer": "",
+      }
+    ])
+
+    // add the properties missing in the schema from Supadu
     doc.id = doc.identifier
+    doc.contributors = final;
     doc.collection_code = 'oa-books'
     doc.handle = doc.identifier
+    doc.publicationPlace = doc.coverage
+    doc.dateBook = doc.date
+    doc.descriptionHtml = doc.description_html
+    doc.pages = doc.format
+    doc.openSquareId = doc.identifier
+    doc.licenseAbbreviation = doc.license_abbreviation
+    doc.licenseIcon = doc.license_icon
+    doc.licenseLink = doc.license_link
+    doc.subjects = doc.subject
+    doc.titleSort = doc.title_sort
+    doc.pressUrl = doc.nyu_press_website_buy_the_book_url
+    doc.reviews = flatReviews
+    // required, but sometimes empty
+    doc.series = doc.series_names | "";
+
+    // remove the properties that are not in the schema
+    delete doc.author
+    delete doc.author_sort
+    delete doc.coverHref
+    delete doc.coverage
+    delete doc.date
+    delete doc.description_html
+    delete doc.format
+    delete doc.identifier
+    delete doc.isDownloadable
+    delete doc.license_abbreviation
+    delete doc.license_icon
+    delete doc.license_link
+    delete doc.nyu_press_website_buy_the_book_url
+    delete doc.packageUrl
+    delete doc.permanent_url
+    delete doc.rights
+    delete doc.rootUrl
+    delete doc.subject
+    delete doc.series_names
+    delete doc.thumbHref
+    delete doc.title_sort
 
     // Define the headers for the POST request
     const headers = new Headers()
@@ -63,8 +120,4 @@ for await (const dirEntry of  Deno.readDir(dir)) {
     console.log(err)
     console.log('-'.repeat(80))
   }
-
 }
-
-
-
diff --git a/src/solr/open-square-metadata b/src/solr/open-square-metadata
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		# src/solr/open-square-metadata
		src/dlts-epub-metadata