Merge pull request #328 from MIT-LCP/huge-skip

tompollard · web-flow · commit 0d4160326417 · 2022-03-03T12:18:06.000-05:00
Handle sample numbers &gt; 2**31 in annotation files
diff --git a/sample-data/huge.qrs b/sample-data/huge.qrs
diff --git a/tests/test_annotation.py b/tests/test_annotation.py
@@ -1,10 +1,13 @@
+import os
 import re
+import unittest
 
 import numpy as np
 
 import wfdb
 
-class test_annotation():
+
+class TestAnnotation(unittest.TestCase):
     """
     Testing read and write of WFDB annotations, including Physionet
     streaming.
@@ -183,3 +186,34 @@ def test_3(self):
         assert (comp == [True] * 6)
         assert annotation.__eq__(pn_annotation)
         assert annotation.__eq__(write_annotation)
+
+    def test_4(self):
+        """
+        Read and write annotations with large time skips
+
+        Annotation file created by:
+            echo "xxxxxxxxx 10000000000 N 0 0 0" | wrann -r huge -a qrs
+        """
+        annotation = wfdb.rdann('sample-data/huge', 'qrs')
+        self.assertEqual(annotation.sample[0], 10000000000)
+        annotation.wrann()
+
+        annotation1 = wfdb.rdann('sample-data/huge', 'qrs')
+        annotation2 = wfdb.rdann('huge', 'qrs')
+        self.assertEqual(annotation1, annotation2)
+
+    @classmethod
+    def tearDownClass(cls):
+        writefiles = [
+            '100.atr',
+            '1003.atr',
+            '12726.anI',
+            'huge.qrs',
+        ]
+        for file in writefiles:
+            if os.path.isfile(file):
+                os.remove(file)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_record.py b/tests/test_record.py
@@ -550,9 +550,9 @@ def test_header_with_non_utf8(self):
     @classmethod
     def tearDownClass(cls):
         "Clean up written files"
-        writefiles = ['03700181.dat','03700181.hea','100.atr','100.dat',
-                      '100.hea','1003.atr','100_3chan.dat','100_3chan.hea',
-                      '12726.anI','a103l.hea','a103l.mat','s0010_re.dat',
+        writefiles = ['03700181.dat','03700181.hea','100.dat',
+                      '100.hea','100_3chan.dat','100_3chan.hea',
+                      'a103l.hea','a103l.mat','s0010_re.dat',
                       's0010_re.hea','s0010_re.xyz','test01_00s.dat',
                       'test01_00s.hea','test01_00s_skewframe.hea',
                       'n8_evoked_raw_95_F1_R9.dat', 'n8_evoked_raw_95_F1_R9.hea']
diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py
@@ -466,8 +466,6 @@ def check_field(self, field):
                 raise ValueError("The 'sample' field must only contain non-negative integers")
             if min(sampdiffs) < 0 :
                 raise ValueError("The 'sample' field must contain monotonically increasing sample numbers")
-            if max(sampdiffs) > 2147483648:
-                raise ValueError('WFDB annotation files cannot store sample differences greater than 2**31')
 
         elif field == 'label_store':
             if min(item) < 1 or max(item) > 49:
@@ -1370,19 +1368,30 @@ def field2bytes(field, value):
         # sample difference
         sd = value[0]
 
-        # Add SKIP element if value is too large for single byte
-        if sd>1023:
-            # 8 bytes in total:
-            # - [0, 59>>2] indicates SKIP
-            # - Next 4 gives sample difference
-            # - Final 2 give 0 and sym
-            data_bytes = [0, 236, (sd&16711680)>>16, (sd&4278190080)>>24, sd&255, (sd&65280)>>8, 0, 4*typecode]
-        # Just need samp and sym
-        else:
-            # - First byte stores low 8 bits of samp
-            # - Second byte stores high 2 bits of samp
-            #   and sym
-            data_bytes = [sd & 255, ((sd & 768) >> 8) + 4*typecode]
+        data_bytes = []
+
+        # Add SKIP element(s) if the sample difference is too large to
+        # be stored in the annotation type word.
+        #
+        # Each SKIP element consists of three words (6 bytes):
+        #  - Bytes 0-1 contain the SKIP indicator (59 << 10)
+        #  - Bytes 2-3 contain the high 16 bits of the sample difference
+        #  - Bytes 4-5 contain the low 16 bits of the sample difference
+        # If the total difference exceeds 2**31 - 1, multiple skips must
+        # be used.
+        while sd > 1023:
+            n = min(sd, 0x7fffffff)
+            data_bytes += [0, 59 << 2,
+                           (n >> 16) & 255,
+                           (n >> 24) & 255,
+                           (n >> 0) & 255,
+                           (n >> 8) & 255]
+            sd -= n
+
+        # Annotation type itself is stored as a single word:
+        #  - bits 0 to 9 store the sample difference (0 to 1023)
+        #  - bits 10 to 15 store the type code
+        data_bytes += [sd & 255, ((sd & 768) >> 8) + 4 * typecode]
 
     elif field == 'num':
         # First byte stores num
@@ -1653,8 +1662,11 @@ def rdann(record_name, extension, sampfrom=0, sampto=None, shift_samps=False,
                                              subtype, chan, num, aux_note)
 
     # Convert lists to numpy arrays dtype='int'
-    (sample, label_store, subtype,
-     chan, num) = lists_to_int_arrays(sample, label_store, subtype, chan, num)
+    (label_store, subtype,
+     chan, num) = lists_to_int_arrays(label_store, subtype, chan, num)
+
+    # Convert sample numbers to a numpy array of 'int64'
+    sample = np.array(sample, dtype='int64')
 
     # Try to get fs from the header file if it is not contained in the
     # annotation file
@@ -1748,8 +1760,8 @@ def load_byte_pairs(record_name, extension, pn_dir):
 
     Returns
     -------
-    filebytes : str
-        The input filestream converted to bytes.
+    filebytes : ndarray
+        The input filestream converted to an Nx2 array of unsigned bytes.
 
     """
     # local file
@@ -1769,8 +1781,8 @@ def proc_ann_bytes(filebytes, sampto):
 
     Parameters
     ----------
-    filebytes : str
-        The input filestream converted to bytes.
+    filebytes : ndarray
+        The input filestream converted to an Nx2 array of unsigned bytes.
     sampto : int
         The maximum sample number for annotations to be returned.
     
@@ -1852,8 +1864,8 @@ def proc_core_fields(filebytes, bpi):
 
     Parameters
     ----------
-    filebytes : str
-        The input filestream converted to bytes.
+    filebytes : ndarray
+        The input filestream converted to an Nx2 array of unsigned bytes.
     bpi : int
         The index to start the conversion.
 
@@ -1869,31 +1881,28 @@ def proc_core_fields(filebytes, bpi):
         The index to start the conversion.
 
     """
-    label_store = filebytes[bpi, 1] >> 2
+    sample_diff = 0
 
     # The current byte pair will contain either the actual d_sample + annotation store value,
     # or 0 + SKIP.
-
-    # Not a skip - it is the actual sample number + annotation type store value
-    if label_store != 59:
-        sample_diff = filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3)
-        bpi = bpi + 1
-    # Skip. Note: Could there be another skip after the first?
-    else:
+    while filebytes[bpi, 1] >> 2 == 59:
         # 4 bytes storing dt
-        sample_diff = 65536 * filebytes[bpi + 1,0] + 16777216 * filebytes[bpi + 1,1] \
-             + filebytes[bpi + 2,0] + 256 * filebytes[bpi + 2,1]
+        skip_diff = ((int(filebytes[bpi + 1, 0]) << 16)
+                     + (int(filebytes[bpi + 1, 1]) << 24)
+                     + (int(filebytes[bpi + 2, 0]) << 0)
+                     + (int(filebytes[bpi + 2, 1]) << 8))
 
         # Data type is long integer (stored in two's complement). Range -2**31 to 2**31 - 1
-        if sample_diff > 2147483647:
-            sample_diff = sample_diff - 4294967296
+        if skip_diff > 2147483647:
+            skip_diff = skip_diff - 4294967296
 
-        # After the 4 bytes, the next pair's samp is also added
-        sample_diff = sample_diff + filebytes[bpi + 3, 0] + 256 * (filebytes[bpi + 3, 1] & 3)
+        sample_diff += skip_diff
+        bpi = bpi + 3
 
-        # The label is stored after the 4 bytes. Samples here should be 0.
-        label_store = filebytes[bpi + 3, 1] >> 2
-        bpi = bpi + 4
+    # Not a skip - it is the actual sample number + annotation type store value
+    label_store = filebytes[bpi, 1] >> 2
+    sample_diff += int(filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3))
+    bpi = bpi + 1
 
     return sample_diff, label_store, bpi