From 3641b80eb57ea92fc12e506b96f1b0f7e8ac0a92 Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Fri, 13 Jul 2018 20:18:24 -0400 Subject: [PATCH 1/6] bpo-33361: Fix bug with seeking in StreamRecoders --- Lib/codecs.py | 6 +++++ Lib/test/test_codecs.py | 23 +++++++++++++++++++ .../2018-07-13-20-17-17.bpo-33361.dx2NVn.rst | 2 ++ 3 files changed, 31 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2018-07-13-20-17-17.bpo-33361.dx2NVn.rst diff --git a/Lib/codecs.py b/Lib/codecs.py index a70ed20f2bc794..322c1221847179 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -847,6 +847,12 @@ def reset(self): self.reader.reset() self.writer.reset() + def seek(self, offset, whence=0): + # Seeks must be propogated to both the readers and writers + # as they might need to clear their internal buffers + self.reader.seek(offset, whence) + self.writer.seek(offset, whence) + def __getattr__(self, name, getattr=getattr): diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index a59a5e21358e7b..6c9a52635dba4d 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1244,6 +1244,29 @@ def test_recoding(self): self.assertTrue(f.closed) + def test_seeking_read(self): + b = io.BytesIO('line1\nline2\nline3\n'.encode('utf-16-le')) + s = codecs.EncodedFile(b, 'utf-8', 'utf-16-le') + + self.assertEqual(s.readline(), b'line1\n') + s.seek(0) + self.assertEqual(s.readline(), b'line1\n') + self.assertEqual(s.readline(), b'line2\n') + self.assertEqual(s.readline(), b'line3\n') + self.assertEqual(s.readline(), b'') + + def test_seeking_write(self): + b = io.BytesIO('123456789\n'.encode('utf-16-le')) + s = codecs.EncodedFile(b, 'utf-8', 'utf-16-le') + + s.seek(2) + s.write(b'\nabc\n') + s.seek(0) + + self.assertEqual(s.readline(), b'1\n') + self.assertEqual(s.readline(), b'abc\n') + self.assertEqual(s.readline(), b'789\n') + # From RFC 3492 punycode_testcases = [ # A Arabic (Egyptian): diff --git a/Misc/NEWS.d/next/Library/2018-07-13-20-17-17.bpo-33361.dx2NVn.rst b/Misc/NEWS.d/next/Library/2018-07-13-20-17-17.bpo-33361.dx2NVn.rst new file mode 100644 index 00000000000000..67776c2904276f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-07-13-20-17-17.bpo-33361.dx2NVn.rst @@ -0,0 +1,2 @@ +Fix a bug in :class:`StreamRecoder` where seeking might leave old data in a +buffer and break subsequent read calls. Patch by Ammar Askar. From 9629df527dd696c4d727c9ff5d5a25bea0c48fba Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Tue, 17 Jul 2018 22:14:56 -0400 Subject: [PATCH 2/6] Fix typo in comment --- Lib/codecs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/codecs.py b/Lib/codecs.py index 322c1221847179..66123da7cf42fa 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -848,7 +848,7 @@ def reset(self): self.writer.reset() def seek(self, offset, whence=0): - # Seeks must be propogated to both the readers and writers + # Seeks must be propagated to both the readers and writers # as they might need to clear their internal buffers self.reader.seek(offset, whence) self.writer.seek(offset, whence) From cd2c2853c0ae25a3e6617c4a122d1a0293bf8145 Mon Sep 17 00:00:00 2001 From: Berker Peksag Date: Fri, 31 May 2019 21:37:42 +0300 Subject: [PATCH 3/6] Update 2018-07-13-20-17-17.bpo-33361.dx2NVn.rst --- .../next/Library/2018-07-13-20-17-17.bpo-33361.dx2NVn.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2018-07-13-20-17-17.bpo-33361.dx2NVn.rst b/Misc/NEWS.d/next/Library/2018-07-13-20-17-17.bpo-33361.dx2NVn.rst index 67776c2904276f..2b71095984a09e 100644 --- a/Misc/NEWS.d/next/Library/2018-07-13-20-17-17.bpo-33361.dx2NVn.rst +++ b/Misc/NEWS.d/next/Library/2018-07-13-20-17-17.bpo-33361.dx2NVn.rst @@ -1,2 +1,2 @@ -Fix a bug in :class:`StreamRecoder` where seeking might leave old data in a +Fix a bug in :class:`codecs.StreamRecoder` where seeking might leave old data in a buffer and break subsequent read calls. Patch by Ammar Askar. From a8a2e3c696a0b1c4f85fa676c718c4387c0ccc25 Mon Sep 17 00:00:00 2001 From: Berker Peksag Date: Fri, 31 May 2019 21:40:25 +0300 Subject: [PATCH 4/6] Update codecs.py --- Lib/codecs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/codecs.py b/Lib/codecs.py index 66123da7cf42fa..8ca5032aa304bd 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -849,7 +849,7 @@ def reset(self): def seek(self, offset, whence=0): # Seeks must be propagated to both the readers and writers - # as they might need to clear their internal buffers + # as they might need to clear their internal buffers. self.reader.seek(offset, whence) self.writer.seek(offset, whence) From 7838363f30fabf97c766613d6db09d7287984b07 Mon Sep 17 00:00:00 2001 From: Berker Peksag Date: Fri, 31 May 2019 21:47:07 +0300 Subject: [PATCH 5/6] Update codecs.py --- Lib/codecs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/codecs.py b/Lib/codecs.py index 8ca5032aa304bd..7db50222573786 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -849,7 +849,7 @@ def reset(self): def seek(self, offset, whence=0): # Seeks must be propagated to both the readers and writers - # as they might need to clear their internal buffers. + # as they might need to reset their internal buffers. self.reader.seek(offset, whence) self.writer.seek(offset, whence) From 856ca3ff123925be2e5881bcfd9a1fdcb43f3fa7 Mon Sep 17 00:00:00 2001 From: Berker Peksag Date: Fri, 31 May 2019 21:53:23 +0300 Subject: [PATCH 6/6] add one more assert --- Lib/test/test_codecs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 6c9a52635dba4d..922bd0619af341 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1259,10 +1259,12 @@ def test_seeking_write(self): b = io.BytesIO('123456789\n'.encode('utf-16-le')) s = codecs.EncodedFile(b, 'utf-8', 'utf-16-le') + # Test that seek() only resets its internal buffer when offset + # and whence are zero. s.seek(2) s.write(b'\nabc\n') + self.assertEqual(s.readline(), b'789\n') s.seek(0) - self.assertEqual(s.readline(), b'1\n') self.assertEqual(s.readline(), b'abc\n') self.assertEqual(s.readline(), b'789\n')