Skip to content

Commit c2a1104

Browse files
committed
Add the ability to pass sig and hea streams to rdrecord and rdheader, in case the file is not read from disk.
1 parent 34b989e commit c2a1104

File tree

2 files changed

+178
-141
lines changed

2 files changed

+178
-141
lines changed

wfdb/io/_signal.py

Lines changed: 152 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,7 @@ def _rd_segment(
10661066
ignore_skew,
10671067
no_file=False,
10681068
sig_data=None,
1069+
sig_stream=None,
10691070
return_res=64,
10701071
):
10711072
"""
@@ -1211,6 +1212,7 @@ def _rd_segment(
12111212
sampto=sampto,
12121213
no_file=no_file,
12131214
sig_data=sig_data,
1215+
sig_stream=sig_stream,
12141216
)
12151217

12161218
# Copy over the wanted signals
@@ -1235,6 +1237,7 @@ def _rd_dat_signals(
12351237
sampto,
12361238
no_file=False,
12371239
sig_data=None,
1240+
sig_stream=None,
12381241
):
12391242
"""
12401243
Read all signals from a WFDB dat file.
@@ -1324,20 +1327,31 @@ def _rd_dat_signals(
13241327
if no_file:
13251328
data_to_read = sig_data
13261329
elif fmt in COMPRESSED_FMTS:
1327-
data_to_read = _rd_compressed_file(
1328-
file_name=file_name,
1329-
dir_name=dir_name,
1330-
pn_dir=pn_dir,
1331-
fmt=fmt,
1332-
sample_offset=byte_offset,
1333-
n_sig=n_sig,
1334-
samps_per_frame=samps_per_frame,
1335-
start_frame=sampfrom,
1336-
end_frame=sampto,
1337-
)
1330+
if sig_stream is not None:
1331+
data_to_read = _rd_compressed_stream(
1332+
fp=sig_stream,
1333+
fmt=fmt,
1334+
sample_offset=byte_offset,
1335+
n_sig=n_sig,
1336+
samps_per_frame=samps_per_frame,
1337+
start_frame=sampfrom,
1338+
end_frame=sampto,
1339+
)
1340+
else:
1341+
data_to_read = _rd_compressed_file(
1342+
file_name=file_name,
1343+
dir_name=dir_name,
1344+
pn_dir=pn_dir,
1345+
fmt=fmt,
1346+
sample_offset=byte_offset,
1347+
n_sig=n_sig,
1348+
samps_per_frame=samps_per_frame,
1349+
start_frame=sampfrom,
1350+
end_frame=sampto,
1351+
)
13381352
else:
13391353
data_to_read = _rd_dat_file(
1340-
file_name, dir_name, pn_dir, fmt, start_byte, n_read_samples
1354+
file_name, dir_name, pn_dir, fmt, start_byte, n_read_samples, sig_stream
13411355
)
13421356

13431357
if extra_flat_samples:
@@ -1577,7 +1591,7 @@ def _required_byte_num(mode, fmt, n_samp):
15771591
return int(n_bytes)
15781592

15791593

1580-
def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
1594+
def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp, sig_stream):
15811595
"""
15821596
Read data from a dat file, either local or remote, into a 1d numpy
15831597
array.
@@ -1635,8 +1649,14 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
16351649
element_count = n_samp
16361650
byte_count = n_samp * BYTES_PER_SAMPLE[fmt]
16371651

1652+
# Memory Stream
1653+
if sig_stream is not None:
1654+
sig_stream.seek(start_byte)
1655+
sig_data = np.frombuffer(
1656+
sig_stream.read(), dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count
1657+
)
16381658
# Local dat file
1639-
if pn_dir is None:
1659+
elif pn_dir is None:
16401660
with open(os.path.join(dir_name, file_name), "rb") as fp:
16411661
fp.seek(start_byte)
16421662
sig_data = np.fromfile(
@@ -1651,7 +1671,6 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
16511671

16521672
return sig_data
16531673

1654-
16551674
def _blocks_to_samples(sig_data, n_samp, fmt):
16561675
"""
16571676
Convert uint8 blocks into signal samples for unaligned dat formats.
@@ -1770,6 +1789,123 @@ def _blocks_to_samples(sig_data, n_samp, fmt):
17701789
return sig
17711790

17721791

1792+
def _rd_compressed_stream(
1793+
fp,
1794+
fmt,
1795+
sample_offset,
1796+
n_sig,
1797+
samps_per_frame,
1798+
start_frame,
1799+
end_frame,
1800+
):
1801+
signature = fp.read(4)
1802+
if signature != b"fLaC":
1803+
raise ValueError(f"{fp.name} is not a FLAC file")
1804+
fp.seek(0)
1805+
1806+
with soundfile.SoundFile(fp) as sf:
1807+
# Determine the actual resolution of the FLAC stream and the
1808+
# data type will use when reading it. Note that soundfile
1809+
# doesn't support int8.
1810+
if sf.subtype == "PCM_S8":
1811+
format_bits = 8
1812+
read_dtype = "int16"
1813+
elif sf.subtype == "PCM_16":
1814+
format_bits = 16
1815+
read_dtype = "int16"
1816+
elif sf.subtype == "PCM_24":
1817+
format_bits = 24
1818+
read_dtype = "int32"
1819+
else:
1820+
raise ValueError(f"unknown subtype in {fp.name} ({sf.subtype})")
1821+
1822+
max_bits = int(fmt) - 500
1823+
if format_bits > max_bits:
1824+
raise ValueError(
1825+
f"wrong resolution in {fp.name} "
1826+
f"({format_bits}, expected <= {max_bits})"
1827+
)
1828+
1829+
if sf.channels != n_sig:
1830+
raise ValueError(
1831+
f"wrong number of channels in {fp.name} "
1832+
f"({sf.channels}, expected {n_sig})"
1833+
)
1834+
1835+
# Read the samples.
1836+
start_samp = start_frame * samps_per_frame[0]
1837+
end_samp = end_frame * samps_per_frame[0]
1838+
sf.seek(start_samp + sample_offset)
1839+
1840+
# We could do this:
1841+
# sig_data = sf.read(end_samp - start_samp, dtype=read_dtype)
1842+
# However, sf.read fails for huge blocks (over 2**24 total
1843+
# samples) due to a bug in libsndfile:
1844+
# https://github.com/libsndfile/libsndfile/issues/431
1845+
# So read the data in chunks instead.
1846+
n_samp = end_samp - start_samp
1847+
sig_data = np.empty((n_samp, n_sig), dtype=read_dtype)
1848+
CHUNK_SIZE = 1024 * 1024
1849+
for chunk_start in range(0, n_samp, CHUNK_SIZE):
1850+
chunk_end = chunk_start + CHUNK_SIZE
1851+
chunk_data = sf.read(out=sig_data[chunk_start:chunk_end])
1852+
samples_read = chunk_data.shape[0]
1853+
if samples_read != CHUNK_SIZE:
1854+
sig_data = sig_data[: chunk_start + samples_read]
1855+
break
1856+
1857+
# If we read an 8-bit stream as int16 or a 24-bit stream as
1858+
# int32, soundfile shifts each sample left by 8 bits. We
1859+
# want to undo this shift (and, in the case of 8-bit data,
1860+
# convert to an int8 array.)
1861+
if format_bits == 8:
1862+
# np.right_shift(sig_data, 8, dtype='int8') doesn't work.
1863+
# This seems wrong, but the numpy documentation is unclear.
1864+
sig_data2 = np.empty(sig_data.shape, dtype="int8")
1865+
sig_data = np.right_shift(sig_data, 8, out=sig_data2)
1866+
elif format_bits == 24:
1867+
# Shift 32-bit array in-place.
1868+
np.right_shift(sig_data, 8, out=sig_data)
1869+
1870+
# Suppose we have 3 channels and 2 samples per frame. The array
1871+
# returned by sf.read looks like this:
1872+
#
1873+
# channel 0 channel 1 channel 2
1874+
# time 0 [0,0] [0,1] [0,2]
1875+
# time 1 [1,0] [1,1] [1,2]
1876+
# time 2 [2,0] [2,1] [2,2]
1877+
# time 3 [3,0] [3,1] [3,2]
1878+
#
1879+
# We reshape this first into the following:
1880+
#
1881+
# channel 0 channel 1 channel 2
1882+
# time 0 [0,0,0] [0,0,1] [0,0,2]
1883+
# time 1 [0,1,0] [0,1,1] [0,1,2]
1884+
# time 2 [1,0,0] [1,0,1] [1,0,2]
1885+
# time 3 [1,1,0] [1,1,1] [1,1,2]
1886+
#
1887+
# Then we transpose axes 1 and 2:
1888+
#
1889+
# channel 0 channel 1 channel 2
1890+
# time 0 [0,0,0] [0,1,0] [0,2,0]
1891+
# time 1 [0,0,1] [0,1,1] [0,2,1]
1892+
# time 2 [1,0,0] [1,1,0] [1,2,0]
1893+
# time 3 [1,0,1] [1,1,1] [1,2,1]
1894+
#
1895+
# Then when we reshape the array to 1D, the result is in dat file
1896+
# order:
1897+
#
1898+
# channel 0 channel 1 channel 2
1899+
# time 0 [0] [2] [4]
1900+
# time 1 [1] [3] [5]
1901+
# time 2 [6] [8] [10]
1902+
# time 3 [7] [9] [11]
1903+
1904+
sig_data = sig_data.reshape(-1, samps_per_frame[0], n_sig)
1905+
sig_data = sig_data.transpose(0, 2, 1)
1906+
return sig_data.reshape(-1)
1907+
1908+
17731909
def _rd_compressed_file(
17741910
file_name,
17751911
dir_name,
@@ -1834,112 +1970,7 @@ def _rd_compressed_file(
18341970
file_name = os.path.join(dir_name, file_name)
18351971

18361972
with _coreio._open_file(pn_dir, file_name, "rb") as fp:
1837-
signature = fp.read(4)
1838-
if signature != b"fLaC":
1839-
raise ValueError(f"{fp.name} is not a FLAC file")
1840-
fp.seek(0)
1841-
1842-
with soundfile.SoundFile(fp) as sf:
1843-
# Determine the actual resolution of the FLAC stream and the
1844-
# data type will use when reading it. Note that soundfile
1845-
# doesn't support int8.
1846-
if sf.subtype == "PCM_S8":
1847-
format_bits = 8
1848-
read_dtype = "int16"
1849-
elif sf.subtype == "PCM_16":
1850-
format_bits = 16
1851-
read_dtype = "int16"
1852-
elif sf.subtype == "PCM_24":
1853-
format_bits = 24
1854-
read_dtype = "int32"
1855-
else:
1856-
raise ValueError(f"unknown subtype in {fp.name} ({sf.subtype})")
1857-
1858-
max_bits = int(fmt) - 500
1859-
if format_bits > max_bits:
1860-
raise ValueError(
1861-
f"wrong resolution in {fp.name} "
1862-
f"({format_bits}, expected <= {max_bits})"
1863-
)
1864-
1865-
if sf.channels != n_sig:
1866-
raise ValueError(
1867-
f"wrong number of channels in {fp.name} "
1868-
f"({sf.channels}, expected {n_sig})"
1869-
)
1870-
1871-
# Read the samples.
1872-
start_samp = start_frame * samps_per_frame[0]
1873-
end_samp = end_frame * samps_per_frame[0]
1874-
sf.seek(start_samp + sample_offset)
1875-
1876-
# We could do this:
1877-
# sig_data = sf.read(end_samp - start_samp, dtype=read_dtype)
1878-
# However, sf.read fails for huge blocks (over 2**24 total
1879-
# samples) due to a bug in libsndfile:
1880-
# https://github.com/libsndfile/libsndfile/issues/431
1881-
# So read the data in chunks instead.
1882-
n_samp = end_samp - start_samp
1883-
sig_data = np.empty((n_samp, n_sig), dtype=read_dtype)
1884-
CHUNK_SIZE = 1024 * 1024
1885-
for chunk_start in range(0, n_samp, CHUNK_SIZE):
1886-
chunk_end = chunk_start + CHUNK_SIZE
1887-
chunk_data = sf.read(out=sig_data[chunk_start:chunk_end])
1888-
samples_read = chunk_data.shape[0]
1889-
if samples_read != CHUNK_SIZE:
1890-
sig_data = sig_data[: chunk_start + samples_read]
1891-
break
1892-
1893-
# If we read an 8-bit stream as int16 or a 24-bit stream as
1894-
# int32, soundfile shifts each sample left by 8 bits. We
1895-
# want to undo this shift (and, in the case of 8-bit data,
1896-
# convert to an int8 array.)
1897-
if format_bits == 8:
1898-
# np.right_shift(sig_data, 8, dtype='int8') doesn't work.
1899-
# This seems wrong, but the numpy documentation is unclear.
1900-
sig_data2 = np.empty(sig_data.shape, dtype="int8")
1901-
sig_data = np.right_shift(sig_data, 8, out=sig_data2)
1902-
elif format_bits == 24:
1903-
# Shift 32-bit array in-place.
1904-
np.right_shift(sig_data, 8, out=sig_data)
1905-
1906-
# Suppose we have 3 channels and 2 samples per frame. The array
1907-
# returned by sf.read looks like this:
1908-
#
1909-
# channel 0 channel 1 channel 2
1910-
# time 0 [0,0] [0,1] [0,2]
1911-
# time 1 [1,0] [1,1] [1,2]
1912-
# time 2 [2,0] [2,1] [2,2]
1913-
# time 3 [3,0] [3,1] [3,2]
1914-
#
1915-
# We reshape this first into the following:
1916-
#
1917-
# channel 0 channel 1 channel 2
1918-
# time 0 [0,0,0] [0,0,1] [0,0,2]
1919-
# time 1 [0,1,0] [0,1,1] [0,1,2]
1920-
# time 2 [1,0,0] [1,0,1] [1,0,2]
1921-
# time 3 [1,1,0] [1,1,1] [1,1,2]
1922-
#
1923-
# Then we transpose axes 1 and 2:
1924-
#
1925-
# channel 0 channel 1 channel 2
1926-
# time 0 [0,0,0] [0,1,0] [0,2,0]
1927-
# time 1 [0,0,1] [0,1,1] [0,2,1]
1928-
# time 2 [1,0,0] [1,1,0] [1,2,0]
1929-
# time 3 [1,0,1] [1,1,1] [1,2,1]
1930-
#
1931-
# Then when we reshape the array to 1D, the result is in dat file
1932-
# order:
1933-
#
1934-
# channel 0 channel 1 channel 2
1935-
# time 0 [0] [2] [4]
1936-
# time 1 [1] [3] [5]
1937-
# time 2 [6] [8] [10]
1938-
# time 3 [7] [9] [11]
1939-
1940-
sig_data = sig_data.reshape(-1, samps_per_frame[0], n_sig)
1941-
sig_data = sig_data.transpose(0, 2, 1)
1942-
return sig_data.reshape(-1)
1973+
return _rd_compressed_stream(fp, fmt, sample_offset, n_sig, samps_per_frame, start_frame, end_frame)
19431974

19441975

19451976
def _skew_sig(

0 commit comments

Comments
 (0)