beacon-biosignals · palday · Oct 20, 2023 · Oct 21, 2023 · Oct 23, 2023
diff --git a/src/EDF.jl b/src/EDF.jl
@@ -4,6 +4,7 @@ using BitIntegers, Dates, Printf
 
 include("types.jl")
 include("read.jl")
+include("discontiguous.jl")
 include("write.jl")
 
 end # module
diff --git a/src/discontiguous.jl b/src/discontiguous.jl
@@ -0,0 +1,87 @@
+function read_discontiguous!(file::File)
+    file.header.is_contiguous && throw(ArgumentError("File is continguous "))
+    isopen(file.io) && !eof(file.io) && read_discontiguous_signals!(file)
+    return file
+end
+
+"""
+    EDF.read_discontiguous(io::IO)
+
+Return `EDF.read_discontiguous(EDF.File(io))`.
+
+See also: [`EDF.File`](@ref), [`EDF.read_discontiguous`](@ref)
+"""
+read_discontiguous(io::IO) = read_discontiguous!(File(io))
+
+"""
+    EDF.read_discontiguous(path)
+
+Return `open(EDF.read_discontiguous, path)`.
+"""
+read_discontiguous(path) = open(read_discontiguous, path)
+
+function read_discontiguous_signals!(file::File)
+    # XXX need to have this be slightly more constrained
+    time_idx = findfirst(x -> isa(x, EDF.AnnotationsSignal), file.signals)
+    time_anns = file.signals[time_idx]
+    # annotations have their own timestamps
+    signals = filter(x -> isa(x, Signal), file.signals)
+    # XXX first we read, then we resize in memory, and finally copy around
+    # not the most efficient route, but c'est la vie
+
+    # 1. read
+    EDF.read!(file)
+
+    final_record_start = only(last(time_anns.records)).onset_in_seconds
+    final_record_start = Int(final_record_start)
+    for signal in signals
+        # 2. resize
+        sr = sampling_rate(file, signal)
+        total_length = final_record_start * sr + signal.header.samples_per_record
+        # prev_length = length(signal.samples)
+        resize!(signal.samples, total_length)
+        # # fill all the new stuff with 0s
+        # samples = signal.samples
+        # fill!(@view(samples[prev_length:end]), 0)
+    end
+
+    spr = file.header.seconds_per_record
+    # count is used for sanity checking that we foudn the correct number of
+    # discontinuities
+    count = 0
+
+    # XXX Note that this logic will probably fail for truncated final records.
+    for signal in signals
+        sr = sampling_rate(file, signal)
+        rec_n_samples = Int(sr * spr)
+        samples = signal.samples
+        prev_start = only(first(time_anns.records)).onset_in_seconds
+        for (tal_idx, tal) in enumerate(@view(time_anns.records[2:end]))
+            start = only(tal).onset_in_seconds
+            if start - spr != prev_start
+                # 4. copy around
+                start_idx = tal_idx * rec_n_samples + 1
+                # -1 because 1-based indexing
+                end_idx = start_idx + rec_n_samples - 1
+                @info "" start_idx, end_idx
+                slice = view(samples, start_idx:end_idx)
+                @info "" slice
+                post_slice = view(samples, end_idx:lastindex(samples))
+                @info "" post_slice
+                copyto!(post_slice, slice)
+                fill!(slice, 0)
+                count += 1
+            end
+            prev_start = start
+        end
+    end
+
+    !iszero(count) || count % length(signals) == 0 ||
+        error("Found an unexpected number of discontinuities")
+
+    return file
+end
+
+function sampling_rate(file::File, signal::Signal)
+    return Int(signal.header.samples_per_record / file.header.seconds_per_record)
+end
diff --git a/test/data/generate_discontinuous.jl b/test/data/generate_discontinuous.jl
@@ -0,0 +1,7 @@
+using Luna_jll
+
+luna() do exec
+    run(pipeline(`$exec test.edf -s WRITE edf-tag=vanilla force-edf`))
+    run(pipeline(`$exec test-vanilla.edf starttime=10.19.44 -s 'SET-HEADERS start-time=10.19.44 & WRITE edf-tag=2'`))
+    return run(pipeline(`$exec --merge test-vanilla.edf test-vanilla-2.edf edf=test_merged.edf `))
+end
diff --git a/test/data/test_merged.edf b/test/data/test_merged.edf
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -257,10 +257,10 @@ end
 
 @testset "BDF+ Files" begin
     # This is a `BDF+` file containing only trigger information.
-    # It is similiar to a `EDF Annotations` file except that 
+    # It is similiar to a `EDF Annotations` file except that
     # The `ANNOTATIONS_SIGNAL_LABEL` is `BDF Annotations`.
-    # The test data has 1081 trigger events, and 
-    # has 180 trials in total, and 
+    # The test data has 1081 trigger events, and
+    # has 180 trials in total, and
     # The annotation `255` signifies the offset of a trial.
     # More information, contact: [email protected]
     evt = EDF.read(joinpath(DATADIR, "evt.bdf"))
@@ -269,3 +269,29 @@ end
     annotations = [event[end].annotations[1] for event in events]
     @test count(==("255"), annotations) == 180
 end
+
+@testset "EDF+D Files" begin
+    uredf = EDF.read(joinpath(DATADIR, "test.edf"))
+    edf = EDF.read(joinpath(DATADIR, "test_merged.edf"))
+    edfd = EDF.read_discontiguous(joinpath(DATADIR, "test_merged.edf"))
+
+    @test edf.header == edfd.header
+    sc = edf.signals[1]
+    sd = edfd.signals[1]
+
+    @test sc.header == sd.header
+    # doubled signal with 12 hours between start times
+    # but we need to take into account that the 12 hours includes the
+    # duration of the original signal
+    beginning = view(sd.samples, 1:length(sc.samples))
+    ending = @view sd.samples[(end-length(sc.samples)+1):end]
+    gap = @view sd.samples[length(sc.samples)+1:(end-length(sc.samples))]
+    @test sc.samples == beginning
+    @test sc.samples == ending
+    @test all(==(0), gap)
+    # sd.samples[length(sc.samples))+1]
+    # sr = EDF.sampling_rate(edf, sc)
+    # dur = 2 * length(sc.samples) + 12 * 60 * 60 * sr
+    # dur -= length(sc.samples)
+    # length(sd.samples)
+end