Skip to content

Commit 0d41603

Browse files
authored
Merge pull request #328 from MIT-LCP/huge-skip
Handle sample numbers > 2**31 in annotation files
2 parents 7d30c20 + bfa0a37 commit 0d41603

File tree

4 files changed

+87
-44
lines changed

4 files changed

+87
-44
lines changed

sample-data/huge.qrs

34 Bytes
Binary file not shown.

tests/test_annotation.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
import os
12
import re
3+
import unittest
24

35
import numpy as np
46

57
import wfdb
68

7-
class test_annotation():
9+
10+
class TestAnnotation(unittest.TestCase):
811
"""
912
Testing read and write of WFDB annotations, including Physionet
1013
streaming.
@@ -183,3 +186,34 @@ def test_3(self):
183186
assert (comp == [True] * 6)
184187
assert annotation.__eq__(pn_annotation)
185188
assert annotation.__eq__(write_annotation)
189+
190+
def test_4(self):
191+
"""
192+
Read and write annotations with large time skips
193+
194+
Annotation file created by:
195+
echo "xxxxxxxxx 10000000000 N 0 0 0" | wrann -r huge -a qrs
196+
"""
197+
annotation = wfdb.rdann('sample-data/huge', 'qrs')
198+
self.assertEqual(annotation.sample[0], 10000000000)
199+
annotation.wrann()
200+
201+
annotation1 = wfdb.rdann('sample-data/huge', 'qrs')
202+
annotation2 = wfdb.rdann('huge', 'qrs')
203+
self.assertEqual(annotation1, annotation2)
204+
205+
@classmethod
206+
def tearDownClass(cls):
207+
writefiles = [
208+
'100.atr',
209+
'1003.atr',
210+
'12726.anI',
211+
'huge.qrs',
212+
]
213+
for file in writefiles:
214+
if os.path.isfile(file):
215+
os.remove(file)
216+
217+
218+
if __name__ == '__main__':
219+
unittest.main()

tests/test_record.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -550,9 +550,9 @@ def test_header_with_non_utf8(self):
550550
@classmethod
551551
def tearDownClass(cls):
552552
"Clean up written files"
553-
writefiles = ['03700181.dat','03700181.hea','100.atr','100.dat',
554-
'100.hea','1003.atr','100_3chan.dat','100_3chan.hea',
555-
'12726.anI','a103l.hea','a103l.mat','s0010_re.dat',
553+
writefiles = ['03700181.dat','03700181.hea','100.dat',
554+
'100.hea','100_3chan.dat','100_3chan.hea',
555+
'a103l.hea','a103l.mat','s0010_re.dat',
556556
's0010_re.hea','s0010_re.xyz','test01_00s.dat',
557557
'test01_00s.hea','test01_00s_skewframe.hea',
558558
'n8_evoked_raw_95_F1_R9.dat', 'n8_evoked_raw_95_F1_R9.hea']

wfdb/io/annotation.py

Lines changed: 49 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -466,8 +466,6 @@ def check_field(self, field):
466466
raise ValueError("The 'sample' field must only contain non-negative integers")
467467
if min(sampdiffs) < 0 :
468468
raise ValueError("The 'sample' field must contain monotonically increasing sample numbers")
469-
if max(sampdiffs) > 2147483648:
470-
raise ValueError('WFDB annotation files cannot store sample differences greater than 2**31')
471469

472470
elif field == 'label_store':
473471
if min(item) < 1 or max(item) > 49:
@@ -1370,19 +1368,30 @@ def field2bytes(field, value):
13701368
# sample difference
13711369
sd = value[0]
13721370

1373-
# Add SKIP element if value is too large for single byte
1374-
if sd>1023:
1375-
# 8 bytes in total:
1376-
# - [0, 59>>2] indicates SKIP
1377-
# - Next 4 gives sample difference
1378-
# - Final 2 give 0 and sym
1379-
data_bytes = [0, 236, (sd&16711680)>>16, (sd&4278190080)>>24, sd&255, (sd&65280)>>8, 0, 4*typecode]
1380-
# Just need samp and sym
1381-
else:
1382-
# - First byte stores low 8 bits of samp
1383-
# - Second byte stores high 2 bits of samp
1384-
# and sym
1385-
data_bytes = [sd & 255, ((sd & 768) >> 8) + 4*typecode]
1371+
data_bytes = []
1372+
1373+
# Add SKIP element(s) if the sample difference is too large to
1374+
# be stored in the annotation type word.
1375+
#
1376+
# Each SKIP element consists of three words (6 bytes):
1377+
# - Bytes 0-1 contain the SKIP indicator (59 << 10)
1378+
# - Bytes 2-3 contain the high 16 bits of the sample difference
1379+
# - Bytes 4-5 contain the low 16 bits of the sample difference
1380+
# If the total difference exceeds 2**31 - 1, multiple skips must
1381+
# be used.
1382+
while sd > 1023:
1383+
n = min(sd, 0x7fffffff)
1384+
data_bytes += [0, 59 << 2,
1385+
(n >> 16) & 255,
1386+
(n >> 24) & 255,
1387+
(n >> 0) & 255,
1388+
(n >> 8) & 255]
1389+
sd -= n
1390+
1391+
# Annotation type itself is stored as a single word:
1392+
# - bits 0 to 9 store the sample difference (0 to 1023)
1393+
# - bits 10 to 15 store the type code
1394+
data_bytes += [sd & 255, ((sd & 768) >> 8) + 4 * typecode]
13861395

13871396
elif field == 'num':
13881397
# First byte stores num
@@ -1653,8 +1662,11 @@ def rdann(record_name, extension, sampfrom=0, sampto=None, shift_samps=False,
16531662
subtype, chan, num, aux_note)
16541663

16551664
# Convert lists to numpy arrays dtype='int'
1656-
(sample, label_store, subtype,
1657-
chan, num) = lists_to_int_arrays(sample, label_store, subtype, chan, num)
1665+
(label_store, subtype,
1666+
chan, num) = lists_to_int_arrays(label_store, subtype, chan, num)
1667+
1668+
# Convert sample numbers to a numpy array of 'int64'
1669+
sample = np.array(sample, dtype='int64')
16581670

16591671
# Try to get fs from the header file if it is not contained in the
16601672
# annotation file
@@ -1748,8 +1760,8 @@ def load_byte_pairs(record_name, extension, pn_dir):
17481760
17491761
Returns
17501762
-------
1751-
filebytes : str
1752-
The input filestream converted to bytes.
1763+
filebytes : ndarray
1764+
The input filestream converted to an Nx2 array of unsigned bytes.
17531765
17541766
"""
17551767
# local file
@@ -1769,8 +1781,8 @@ def proc_ann_bytes(filebytes, sampto):
17691781
17701782
Parameters
17711783
----------
1772-
filebytes : str
1773-
The input filestream converted to bytes.
1784+
filebytes : ndarray
1785+
The input filestream converted to an Nx2 array of unsigned bytes.
17741786
sampto : int
17751787
The maximum sample number for annotations to be returned.
17761788
@@ -1852,8 +1864,8 @@ def proc_core_fields(filebytes, bpi):
18521864
18531865
Parameters
18541866
----------
1855-
filebytes : str
1856-
The input filestream converted to bytes.
1867+
filebytes : ndarray
1868+
The input filestream converted to an Nx2 array of unsigned bytes.
18571869
bpi : int
18581870
The index to start the conversion.
18591871
@@ -1869,31 +1881,28 @@ def proc_core_fields(filebytes, bpi):
18691881
The index to start the conversion.
18701882
18711883
"""
1872-
label_store = filebytes[bpi, 1] >> 2
1884+
sample_diff = 0
18731885

18741886
# The current byte pair will contain either the actual d_sample + annotation store value,
18751887
# or 0 + SKIP.
1876-
1877-
# Not a skip - it is the actual sample number + annotation type store value
1878-
if label_store != 59:
1879-
sample_diff = filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3)
1880-
bpi = bpi + 1
1881-
# Skip. Note: Could there be another skip after the first?
1882-
else:
1888+
while filebytes[bpi, 1] >> 2 == 59:
18831889
# 4 bytes storing dt
1884-
sample_diff = 65536 * filebytes[bpi + 1,0] + 16777216 * filebytes[bpi + 1,1] \
1885-
+ filebytes[bpi + 2,0] + 256 * filebytes[bpi + 2,1]
1890+
skip_diff = ((int(filebytes[bpi + 1, 0]) << 16)
1891+
+ (int(filebytes[bpi + 1, 1]) << 24)
1892+
+ (int(filebytes[bpi + 2, 0]) << 0)
1893+
+ (int(filebytes[bpi + 2, 1]) << 8))
18861894

18871895
# Data type is long integer (stored in two's complement). Range -2**31 to 2**31 - 1
1888-
if sample_diff > 2147483647:
1889-
sample_diff = sample_diff - 4294967296
1896+
if skip_diff > 2147483647:
1897+
skip_diff = skip_diff - 4294967296
18901898

1891-
# After the 4 bytes, the next pair's samp is also added
1892-
sample_diff = sample_diff + filebytes[bpi + 3, 0] + 256 * (filebytes[bpi + 3, 1] & 3)
1899+
sample_diff += skip_diff
1900+
bpi = bpi + 3
18931901

1894-
# The label is stored after the 4 bytes. Samples here should be 0.
1895-
label_store = filebytes[bpi + 3, 1] >> 2
1896-
bpi = bpi + 4
1902+
# Not a skip - it is the actual sample number + annotation type store value
1903+
label_store = filebytes[bpi, 1] >> 2
1904+
sample_diff += int(filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3))
1905+
bpi = bpi + 1
18971906

18981907
return sample_diff, label_store, bpi
18991908

0 commit comments

Comments
 (0)