Skip to content

Commit

Permalink
Finish mapping of npz files
Browse files Browse the repository at this point in the history
  • Loading branch information
minhqdao committed Feb 18, 2024
1 parent b3fdb18 commit 8708ff9
Show file tree
Hide file tree
Showing 8 changed files with 385 additions and 330 deletions.
1 change: 1 addition & 0 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
macro(ADD_EXAMPLE name)
add_executable(example_${name} example_${name}.f90)
target_link_libraries(example_${name} "${PROJECT_NAME}")
target_link_libraries(example_${name} ${PROJECT_NAME} ${MINIZIP_LIBRARY})
add_test(NAME ${name}
COMMAND $<TARGET_FILE:example_${name}> ${CMAKE_CURRENT_BINARY_DIR}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
Expand Down
7 changes: 2 additions & 5 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,8 @@ set(fppFiles
stdlib_hash_64bit_spookyv2.fypp
stdlib_io.fypp
stdlib_io_np.fypp
stdlib_io_npy_load.fypp
stdlib_io_npz_load.fypp
stdlib_io_npy_save.fypp
stdlib_io_npz_load.fypp
stdlib_io_npz_save.fypp
stdlib_io_np_load.fypp
stdlib_io_np_save.fypp
stdlib_kinds.fypp
stdlib_linalg.fypp
stdlib_linalg_diag.fypp
Expand Down
15 changes: 15 additions & 0 deletions src/stdlib_io_np.fypp
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,19 @@ module stdlib_io_np
character(len=:), allocatable, intent(out), optional :: iomsg
end
end interface

interface allocate_array
#:for k1, t1 in KINDS_TYPES
#:for rank in RANKS
module subroutine allocate_array_${t1[0]}$${k1}$_${rank}$ (array, vshape, stat)
!> Instance of the array to be allocated.
${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$
!> Dimensions to allocate for.
integer, intent(in) :: vshape(:)
!> Status of allocate.
integer, intent(out) :: stat
end
#:endfor
#:endfor
end interface
end
259 changes: 213 additions & 46 deletions src/stdlib_io_npy_load.fypp → src/stdlib_io_np_load.fypp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
#:set KINDS_TYPES = REAL_KINDS_TYPES + INT_KINDS_TYPES + CMPLX_KINDS_TYPES

!> Implementation of loading npy files into multidimensional arrays
submodule (stdlib_io_np) stdlib_io_npy_load
use stdlib_error, only : error_stop
use stdlib_strings, only : to_string, starts_with
submodule(stdlib_io_np) stdlib_io_np_load
use stdlib_error, only: error_stop
use stdlib_strings, only: to_string, starts_with
use stdlib_string_type, only: string_type
use stdlib_io_zip, only: unzip, zip_prefix, zip_suffix, t_unzipped_bundle, t_unzipped_file
use stdlib_array
implicit none

contains
Expand All @@ -33,28 +36,12 @@ contains

open(newunit=io, file=filename, form="unformatted", access="stream", iostat=stat)
catch: block
character(len=:), allocatable :: this_type
integer, allocatable :: vshape(:)

call get_descriptor(io, filename, this_type, vshape, stat, msg)
call verify_npy_file(io, filename, vtype, vshape, rank, stat, msg)
if (stat /= 0) exit catch

if (this_type /= vtype) then
stat = 1
msg = "File '"//filename//"' contains data of type '"//this_type//"', "//&
& "but expected '"//vtype//"'"
exit catch
end if

if (size(vshape) /= rank) then
stat = 1
msg = "File '"//filename//"' contains data of rank "//&
& to_string(size(vshape))//", but expected "//&
& to_string(rank)
exit catch
end if

call allocator(array, vshape, stat)
call allocate_array(array, vshape, stat)
if (stat /= 0) then
msg = "Failed to allocate array of type '"//vtype//"' "//&
& "with total size of "//to_string(product(vshape))
Expand All @@ -76,30 +63,210 @@ contains
end if

if (present(iomsg).and.allocated(msg)) call move_alloc(msg, iomsg)
contains
end
#:endfor
#:endfor

!> Wrapped intrinsic allocate to create an allocation from a shape array
subroutine allocator(array, vshape, stat)
!> Instance of the array to be allocated
${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$
!> Dimensions to allocate for
integer, intent(in) :: vshape(:)
!> Status of allocate
!> Verify header, type and rank of the npy file.
subroutine verify_npy_file(io, filename, vtype, vshape, rank, stat, msg)
!> Access unit to the npy file.
integer, intent(in) :: io
!> Name of the npy file to load from.
character(len=*), intent(in) :: filename
!> Type of the data stored, retrieved from field `descr`.
character(len=*), intent(in) :: vtype
!> Shape of the stored data, retrieved from field `shape`.
integer, allocatable, intent(out) :: vshape(:)
!> Expected rank of the data.
integer, intent(in) :: rank
!> Status of operation.
integer, intent(out) :: stat
!> Associated error message in case of non-zero status.
character(len=:), allocatable, intent(out) :: msg

character(len=:), allocatable :: this_type

call get_descriptor(io, filename, this_type, vshape, stat, msg)
if (stat /= 0) return

if (this_type /= vtype) then
stat = 1
msg = "File '"//filename//"' contains data of type '"//this_type//"', "//&
& "but expected '"//vtype//"'"
return
end if

if (size(vshape) /= rank) then
stat = 1
msg = "File '"//filename//"' contains data of rank "//&
& to_string(size(vshape))//", but expected "//&
& to_string(rank)
return
end if
end

#:for k1, t1 in KINDS_TYPES
#:for rank in RANKS
module subroutine allocate_array_${t1[0]}$${k1}$_${rank}$(array, vshape, stat)
${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$
integer, intent(in) :: vshape(:)
integer, intent(out) :: stat

allocate(array( &
#:for i in range(rank-1)
& vshape(${i+1}$), &
#:endfor
& vshape(${rank}$)), &
& stat=stat)
end
#:endfor
#:endfor

!> Version: experimental
!>
!> Load multidimensional arrays from a compressed or uncompressed npz file.
!> ([Specification](../page/specs/stdlib_io.html#load_npz))
module subroutine load_npz_to_bundle(filename, array_bundle, iostat, iomsg)
character(len=*), intent(in) :: filename
type(t_array_bundle), intent(out) :: array_bundle
integer, intent(out), optional :: iostat
character(len=:), allocatable, intent(out), optional :: iomsg

allocate(array( &
#:for i in range(rank-1)
& vshape(${i+1}$), &
type(t_unzipped_bundle) :: unzipped_bundle
integer :: stat
character(len=:), allocatable :: msg

call unzip(filename, unzipped_bundle, stat, msg)
if (stat == 0) then
call load_raw_to_bundle(unzipped_bundle, array_bundle, stat, msg)
else
call identify_problem(filename, stat, msg)
end if

if (present(iostat)) then
iostat = stat
else if (stat /= 0) then
if (allocated(msg)) then
call error_stop("Failed to read arrays from file '"//filename//"'"//nl//msg)
else
call error_stop("Failed to read arrays from file '"//filename//"'")
end if
end if

if (present(iomsg) .and. allocated(msg)) call move_alloc(msg, iomsg)
end

module subroutine load_raw_to_bundle(unzipped_bundle, array_bundle, stat, msg)
type(t_unzipped_bundle), intent(in) :: unzipped_bundle
type(t_array_bundle), intent(out) :: array_bundle
integer, intent(out) :: stat
character(len=:), allocatable, intent(out) :: msg

integer :: i, io

allocate (array_bundle%files(size(unzipped_bundle%files)))
do i = 1, size(unzipped_bundle%files)
array_bundle%files(i)%name = unzipped_bundle%files(i)%name
open (newunit=io, status='scratch', form='unformatted', access='stream', iostat=stat)
if (stat /= 0) return
write (io) unzipped_bundle%files(i)%data
call load_string_to_array(io, unzipped_bundle%files(i), array_bundle%files(i), stat, msg)
close (io, status='delete', iostat=stat)
if (stat /= 0) return
end do
end

module subroutine load_string_to_array(io, unzipped_file, array, stat, msg)
integer, intent(in) :: io
type(t_unzipped_file), intent(in) :: unzipped_file
class(t_array), intent(inout) :: array
integer, intent(out) :: stat
character(len=:), allocatable, intent(out) :: msg

#:for k1, t1 in KINDS_TYPES
#:for rank in RANKS
${t1}$, allocatable :: array_${t1[0]}$${k1}$_${rank}$${ranksuffix(rank)}$
#:endfor
& vshape(${rank}$)), &
& stat=stat)
#:endfor

end subroutine allocator
integer, allocatable :: vshape(:)

end subroutine load_npy_${t1[0]}$${k1}$_${rank}$
#:endfor
#:endfor
select type (arr => array)
#:for k1, t1 in KINDS_TYPES
#:for rank in RANKS
type is (t_array_${t1[0]}$${k1}$_${rank}$)
call verify_npy_file(io, unzipped_file%name, type_${t1[0]}$${k1}$, vshape, ${rank}$, stat, msg)
if (stat /= 0) return
call allocate_array(array_${t1[0]}$${k1}$_${rank}$, vshape, stat)
if (stat /= 0) then
msg = "Failed to allocate array of type '"//type_${t1[0]}$${k1}$//"' "//&
& "with total size of "//to_string(product(vshape))
return
end if
read (io, iostat=stat) array_${t1[0]}$${k1}$_${rank}$${ranksuffix(rank)}$
arr%values = array_${t1[0]}$${k1}$_${rank}$${ranksuffix(rank)}$
#:endfor
#:endfor
class default
stat = 1; msg = 'Unsupported array type.'; return
end select
end

!> Open file and try to identify the problem.
module subroutine identify_problem(filename, stat, msg)
character(len=*), intent(in) :: filename
integer, intent(inout) :: stat
character(len=:), allocatable, intent(inout) :: msg

logical :: exists
integer :: io_unit, prev_stat
character(len=:), allocatable :: prev_msg

! Keep track of the previous status and message in case no reason can be found.
prev_stat = stat
if (allocated(msg)) call move_alloc(msg, prev_msg)

inquire (file=filename, exist=exists)
if (.not. exists) then
stat = 1; msg = 'File does not exist: '//filename//'.'; return
end if
open (newunit=io_unit, file=filename, form='unformatted', access='stream', &
& status='old', action='read', iostat=stat, iomsg=msg)
if (stat /= 0) return

call verify_header(io_unit, stat, msg)
if (stat /= 0) return

! Restore previous status and message if no reason could be found.
stat = prev_stat; msg = 'Failed to unzip file: '//filename//nl//prev_msg
end

module subroutine verify_header(io_unit, stat, msg)
integer, intent(in) :: io_unit
integer, intent(out) :: stat
character(len=:), allocatable, intent(out) :: msg

integer :: file_size
character(len=len(zip_prefix)) :: header

inquire (io_unit, size=file_size)
if (file_size < len(zip_suffix)) then
stat = 1; msg = 'File is too small to be an npz file.'; return
end if

read (io_unit, iostat=stat) header
if (stat /= 0) then
msg = 'Failed to read header from file'; return
end if

if (header == zip_suffix) then
stat = 1; msg = 'Empty npz file.'; return
end if

if (header /= zip_prefix) then
stat = 1; msg = 'Not an npz file.'; return
end if
end

!> Read the npy header from a binary file and retrieve the descriptor string.
subroutine get_descriptor(io, filename, vtype, vshape, stat, msg)
Expand Down Expand Up @@ -168,7 +335,7 @@ contains
if (.not.fortran_order) then
vshape = [(vshape(i), i = size(vshape), 1, -1)]
end if
end subroutine get_descriptor
end


!> Parse the first eight bytes of the npy header to verify the data
Expand Down Expand Up @@ -214,7 +381,7 @@ contains
& "'"//to_string(major)//"."//to_string(minor)//"'"
return
end if
end subroutine parse_header
end

!> Parse the descriptor in the npy header. This routine implements a minimal
!> non-recursive parser for serialized Python dictionaries.
Expand Down Expand Up @@ -367,7 +534,7 @@ contains
& "1 | " // input // nl // &
& " |" // repeat(" ", first) // repeat("^", last - first + 1) // nl // &
& " |"
end function make_message
end

!> Parse a tuple of integers into an array of integers
subroutine parse_tuple(input, pos, tuple, stat, msg)
Expand Down Expand Up @@ -427,7 +594,7 @@ contains
return
end select
end do
end subroutine parse_tuple
end

!> Get the next allowed token
subroutine next_token(input, pos, token, allowed_token, stat, msg)
Expand Down Expand Up @@ -459,7 +626,7 @@ contains
exit
end if
end do
end subroutine next_token
end

!> Tokenize input string
subroutine get_token(input, pos, token)
Expand Down Expand Up @@ -531,8 +698,8 @@ contains
token = token_type(pos, pos, invalid)
end select

end subroutine get_token
end

end subroutine parse_descriptor
end

end submodule stdlib_io_npy_load
end
Loading

0 comments on commit 8708ff9

Please sign in to comment.