Finish mapping of npz files

fortran-lang · Feb 18, 2024 · 8708ff9 · 8708ff9
1 parent b3fdb18
commit 8708ff9
Show file tree

Hide file tree

Showing 8 changed files with 385 additions and 330 deletions.
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
@@ -1,6 +1,7 @@
 macro(ADD_EXAMPLE name)
     add_executable(example_${name} example_${name}.f90)
     target_link_libraries(example_${name} "${PROJECT_NAME}")
+    target_link_libraries(example_${name} ${PROJECT_NAME} ${MINIZIP_LIBRARY})
     add_test(NAME ${name}
              COMMAND $<TARGET_FILE:example_${name}> ${CMAKE_CURRENT_BINARY_DIR}
              WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -17,11 +17,8 @@ set(fppFiles
     stdlib_hash_64bit_spookyv2.fypp 
     stdlib_io.fypp
     stdlib_io_np.fypp
-    stdlib_io_npy_load.fypp
-    stdlib_io_npz_load.fypp
-    stdlib_io_npy_save.fypp
-    stdlib_io_npz_load.fypp
-    stdlib_io_npz_save.fypp
+    stdlib_io_np_load.fypp
+    stdlib_io_np_save.fypp
     stdlib_kinds.fypp
     stdlib_linalg.fypp
     stdlib_linalg_diag.fypp

diff --git a/src/stdlib_io_np.fypp b/src/stdlib_io_np.fypp
@@ -143,4 +143,19 @@ module stdlib_io_np
             character(len=:), allocatable, intent(out), optional :: iomsg
         end
     end interface
+
+    interface allocate_array
+        #:for k1, t1 in KINDS_TYPES
+            #:for rank in RANKS
+                module subroutine allocate_array_${t1[0]}$${k1}$_${rank}$ (array, vshape, stat)
+                    !> Instance of the array to be allocated.
+                    ${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$
+                    !> Dimensions to allocate for.
+                    integer, intent(in) :: vshape(:)
+                    !> Status of allocate.
+                    integer, intent(out) :: stat
+                end
+            #:endfor
+        #:endfor
+    end interface
 end
diff --git a/src/stdlib_io_npy_load.fypp → src/stdlib_io_np_load.fypp b/src/stdlib_io_npy_load.fypp → src/stdlib_io_np_load.fypp
@@ -5,9 +5,12 @@
 #:set KINDS_TYPES = REAL_KINDS_TYPES + INT_KINDS_TYPES + CMPLX_KINDS_TYPES
 
 !> Implementation of loading npy files into multidimensional arrays
-submodule (stdlib_io_np) stdlib_io_npy_load
-    use stdlib_error, only : error_stop
-    use stdlib_strings, only : to_string, starts_with
+submodule(stdlib_io_np) stdlib_io_np_load
+    use stdlib_error, only: error_stop
+    use stdlib_strings, only: to_string, starts_with
+    use stdlib_string_type, only: string_type
+    use stdlib_io_zip, only: unzip, zip_prefix, zip_suffix, t_unzipped_bundle, t_unzipped_file
+    use stdlib_array
     implicit none
 
 contains
@@ -33,28 +36,12 @@ contains
 
         open(newunit=io, file=filename, form="unformatted", access="stream", iostat=stat)
         catch: block
-            character(len=:), allocatable :: this_type
             integer, allocatable :: vshape(:)
 
-            call get_descriptor(io, filename, this_type, vshape, stat, msg)
+            call verify_npy_file(io, filename, vtype, vshape, rank, stat, msg)
             if (stat /= 0) exit catch
 
-            if (this_type /= vtype) then
-                stat = 1
-                msg = "File '"//filename//"' contains data of type '"//this_type//"', "//&
-                    & "but expected '"//vtype//"'"
-                exit catch
-            end if
-
-            if (size(vshape) /= rank) then
-                stat = 1
-                msg = "File '"//filename//"' contains data of rank "//&
-                    & to_string(size(vshape))//", but expected "//&
-                    & to_string(rank)
-                exit catch
-            end if
-
-            call allocator(array, vshape, stat)
+            call allocate_array(array, vshape, stat)
             if (stat /= 0) then
                 msg = "Failed to allocate array of type '"//vtype//"' "//&
                     & "with total size of "//to_string(product(vshape))
@@ -76,30 +63,210 @@ contains
         end if
 
         if (present(iomsg).and.allocated(msg)) call move_alloc(msg, iomsg)
-    contains
+    end
+  #:endfor
+#:endfor
 
-    !> Wrapped intrinsic allocate to create an allocation from a shape array
-    subroutine allocator(array, vshape, stat)
-        !> Instance of the array to be allocated
-        ${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$
-        !> Dimensions to allocate for
-        integer, intent(in) :: vshape(:)
-        !> Status of allocate
+    !> Verify header, type and rank of the npy file.
+    subroutine verify_npy_file(io, filename, vtype, vshape, rank, stat, msg)
+        !> Access unit to the npy file.
+        integer, intent(in) :: io
+        !> Name of the npy file to load from.
+        character(len=*), intent(in) :: filename
+        !> Type of the data stored, retrieved from field `descr`.
+        character(len=*), intent(in) :: vtype
+        !> Shape of the stored data, retrieved from field `shape`.
+        integer, allocatable, intent(out) :: vshape(:)
+        !> Expected rank of the data.
+        integer, intent(in) :: rank
+        !> Status of operation.
         integer, intent(out) :: stat
+        !> Associated error message in case of non-zero status.
+        character(len=:), allocatable, intent(out) :: msg
+
+        character(len=:), allocatable :: this_type
+
+        call get_descriptor(io, filename, this_type, vshape, stat, msg)
+        if (stat /= 0) return
+
+        if (this_type /= vtype) then
+            stat = 1
+            msg = "File '"//filename//"' contains data of type '"//this_type//"', "//&
+                & "but expected '"//vtype//"'"
+            return
+        end if
+
+        if (size(vshape) /= rank) then
+            stat = 1
+            msg = "File '"//filename//"' contains data of rank "//&
+                & to_string(size(vshape))//", but expected "//&
+                & to_string(rank)
+            return
+        end if
+    end
+
+    #:for k1, t1 in KINDS_TYPES
+        #:for rank in RANKS
+            module subroutine allocate_array_${t1[0]}$${k1}$_${rank}$(array, vshape, stat)
+                ${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$
+                integer, intent(in) :: vshape(:)
+                integer, intent(out) :: stat
+
+                allocate(array( &
+                    #:for i in range(rank-1)
+                    &    vshape(${i+1}$), &
+                    #:endfor
+                    &    vshape(${rank}$)), &
+                    &    stat=stat)
+            end
+        #:endfor
+    #:endfor
+
+    !> Version: experimental
+    !>
+    !> Load multidimensional arrays from a compressed or uncompressed npz file.
+    !> ([Specification](../page/specs/stdlib_io.html#load_npz))
+    module subroutine load_npz_to_bundle(filename, array_bundle, iostat, iomsg)
+        character(len=*), intent(in) :: filename
+        type(t_array_bundle), intent(out) :: array_bundle
+        integer, intent(out), optional :: iostat
+        character(len=:), allocatable, intent(out), optional :: iomsg
 
-        allocate(array( &
-            #:for i in range(rank-1)
-            &    vshape(${i+1}$), &
+        type(t_unzipped_bundle) :: unzipped_bundle
+        integer :: stat
+        character(len=:), allocatable :: msg
+
+        call unzip(filename, unzipped_bundle, stat, msg)
+        if (stat == 0) then
+            call load_raw_to_bundle(unzipped_bundle, array_bundle, stat, msg)
+        else
+            call identify_problem(filename, stat, msg)
+        end if
+
+        if (present(iostat)) then
+            iostat = stat
+        else if (stat /= 0) then
+            if (allocated(msg)) then
+                call error_stop("Failed to read arrays from file '"//filename//"'"//nl//msg)
+            else
+                call error_stop("Failed to read arrays from file '"//filename//"'")
+            end if
+        end if
+
+        if (present(iomsg) .and. allocated(msg)) call move_alloc(msg, iomsg)
+    end
+
+    module subroutine load_raw_to_bundle(unzipped_bundle, array_bundle, stat, msg)
+        type(t_unzipped_bundle), intent(in) :: unzipped_bundle
+        type(t_array_bundle), intent(out) :: array_bundle
+        integer, intent(out) :: stat
+        character(len=:), allocatable, intent(out) :: msg
+
+        integer :: i, io
+
+        allocate (array_bundle%files(size(unzipped_bundle%files)))
+        do i = 1, size(unzipped_bundle%files)
+            array_bundle%files(i)%name = unzipped_bundle%files(i)%name
+            open (newunit=io, status='scratch', form='unformatted', access='stream', iostat=stat)
+            if (stat /= 0) return
+            write (io) unzipped_bundle%files(i)%data
+            call load_string_to_array(io, unzipped_bundle%files(i), array_bundle%files(i), stat, msg)
+            close (io, status='delete', iostat=stat)
+            if (stat /= 0) return
+        end do
+    end
+
+    module subroutine load_string_to_array(io, unzipped_file, array, stat, msg)
+        integer, intent(in) :: io
+        type(t_unzipped_file), intent(in) :: unzipped_file
+        class(t_array), intent(inout) :: array
+        integer, intent(out) :: stat
+        character(len=:), allocatable, intent(out) :: msg
+
+        #:for k1, t1 in KINDS_TYPES
+            #:for rank in RANKS
+                ${t1}$, allocatable :: array_${t1[0]}$${k1}$_${rank}$${ranksuffix(rank)}$
             #:endfor
-            &    vshape(${rank}$)), &
-            &    stat=stat)
+        #:endfor
 
-    end subroutine allocator
+        integer, allocatable :: vshape(:)
 
-    end subroutine load_npy_${t1[0]}$${k1}$_${rank}$
-  #:endfor
-#:endfor
+        select type (arr => array)
+            #:for k1, t1 in KINDS_TYPES
+                #:for rank in RANKS
+                    type is (t_array_${t1[0]}$${k1}$_${rank}$)
+                    call verify_npy_file(io, unzipped_file%name, type_${t1[0]}$${k1}$, vshape, ${rank}$, stat, msg)
+                    if (stat /= 0) return
+                    call allocate_array(array_${t1[0]}$${k1}$_${rank}$, vshape, stat)
+                    if (stat /= 0) then
+                        msg = "Failed to allocate array of type '"//type_${t1[0]}$${k1}$//"' "//&
+                            & "with total size of "//to_string(product(vshape))
+                        return
+                    end if
+                    read (io, iostat=stat) array_${t1[0]}$${k1}$_${rank}$${ranksuffix(rank)}$
+                    arr%values = array_${t1[0]}$${k1}$_${rank}$${ranksuffix(rank)}$
+                #:endfor
+            #:endfor
+        class default
+            stat = 1; msg = 'Unsupported array type.'; return
+        end select
+    end
+
+    !> Open file and try to identify the problem.
+    module subroutine identify_problem(filename, stat, msg)
+        character(len=*), intent(in) :: filename
+        integer, intent(inout) :: stat
+        character(len=:), allocatable, intent(inout) :: msg
 
+        logical :: exists
+        integer :: io_unit, prev_stat
+        character(len=:), allocatable :: prev_msg
+
+        ! Keep track of the previous status and message in case no reason can be found.
+        prev_stat = stat
+        if (allocated(msg)) call move_alloc(msg, prev_msg)
+
+        inquire (file=filename, exist=exists)
+        if (.not. exists) then
+            stat = 1; msg = 'File does not exist: '//filename//'.'; return
+        end if
+        open (newunit=io_unit, file=filename, form='unformatted', access='stream', &
+            & status='old', action='read', iostat=stat, iomsg=msg)
+        if (stat /= 0) return
+
+        call verify_header(io_unit, stat, msg)
+        if (stat /= 0) return
+
+        ! Restore previous status and message if no reason could be found.
+        stat = prev_stat; msg = 'Failed to unzip file: '//filename//nl//prev_msg
+    end
+
+    module subroutine verify_header(io_unit, stat, msg)
+        integer, intent(in) :: io_unit
+        integer, intent(out) :: stat
+        character(len=:), allocatable, intent(out) :: msg
+
+        integer :: file_size
+        character(len=len(zip_prefix)) :: header
+
+        inquire (io_unit, size=file_size)
+        if (file_size < len(zip_suffix)) then
+            stat = 1; msg = 'File is too small to be an npz file.'; return
+        end if
+
+        read (io_unit, iostat=stat) header
+        if (stat /= 0) then
+            msg = 'Failed to read header from file'; return
+        end if
+
+        if (header == zip_suffix) then
+            stat = 1; msg = 'Empty npz file.'; return
+        end if
+
+        if (header /= zip_prefix) then
+            stat = 1; msg = 'Not an npz file.'; return
+        end if
+    end
 
     !> Read the npy header from a binary file and retrieve the descriptor string.
     subroutine get_descriptor(io, filename, vtype, vshape, stat, msg)
@@ -168,7 +335,7 @@ contains
         if (.not.fortran_order) then
             vshape = [(vshape(i), i = size(vshape), 1, -1)]
         end if
-    end subroutine get_descriptor
+    end
 
 
     !> Parse the first eight bytes of the npy header to verify the data
@@ -214,7 +381,7 @@ contains
                 & "'"//to_string(major)//"."//to_string(minor)//"'"
             return
         end if
-    end subroutine parse_header
+    end
 
     !> Parse the descriptor in the npy header. This routine implements a minimal
     !> non-recursive parser for serialized Python dictionaries.
@@ -367,7 +534,7 @@ contains
             & "1 | " // input // nl // &
             & "  |" // repeat(" ", first) // repeat("^", last - first + 1) // nl // &
             & "  |"
-    end function make_message
+    end
 
     !> Parse a tuple of integers into an array of integers
     subroutine parse_tuple(input, pos, tuple, stat, msg)
@@ -427,7 +594,7 @@ contains
                 return
             end select
         end do
-    end subroutine parse_tuple
+    end
 
     !> Get the next allowed token
     subroutine next_token(input, pos, token, allowed_token, stat, msg)
@@ -459,7 +626,7 @@ contains
                 exit
             end if
         end do
-    end subroutine next_token
+    end
 
     !> Tokenize input string
     subroutine get_token(input, pos, token)
@@ -531,8 +698,8 @@ contains
             token = token_type(pos, pos, invalid)
         end select
 
-    end subroutine get_token
+    end
 
-    end subroutine parse_descriptor
+    end
 
-end submodule stdlib_io_npy_load
+end