From 619895d1216cfc3b5da4270d3869cd31b7b55a14 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Thu, 5 Oct 2017 10:15:33 -0700 Subject: [PATCH 1/2] Fix Fetcher.PartitionRecords to handle fetch_offset in the middle of compressed messageset --- kafka/consumer/fetcher.py | 9 +++++++-- test/test_fetcher.py | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index f9fcb377f..659744884 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -835,12 +835,17 @@ def _parse_fetched_data(self, completed_fetch): return parsed_records - class PartitionRecords(six.Iterator): + class PartitionRecords(object): def __init__(self, fetch_offset, tp, messages): self.fetch_offset = fetch_offset self.topic_partition = tp self.messages = messages - self.message_idx = 0 + # When fetching an offset that is in the middle of a + # compressed batch, we will get all messages in the batch. + # But we want to start 'take' at the fetch_offset + for i, msg in enumerate(messages): + if msg.offset == fetch_offset: + self.message_idx = i # For truthiness evaluation we need to define __len__ or __nonzero__ def __len__(self): diff --git a/test/test_fetcher.py b/test/test_fetcher.py index 429071a72..aa8e9c3f1 100644 --- a/test/test_fetcher.py +++ b/test/test_fetcher.py @@ -498,3 +498,26 @@ def test__parse_fetched_data__out_of_range(fetcher, topic, mocker): partition_record = fetcher._parse_fetched_data(completed_fetch) assert partition_record is None assert fetcher._subscriptions.assignment[tp].awaiting_reset is True + + +def test_partition_records_offset(): + """Test that compressed messagesets are handle correctly + when fetch offset is in the middle of the message list + """ + batch_start = 120 + batch_end = 130 + fetch_offset = 123 + tp = TopicPartition('foo', 0) + messages = [ConsumerRecord(tp.topic, tp.partition, i, + None, None, 'key', 'value', 'checksum', 0, 0) + for i in range(batch_start, batch_end)] + records = Fetcher.PartitionRecords(fetch_offset, None, messages) + assert records.has_more() + msgs = records.take(1) + assert msgs[0].offset == 123 + assert records.fetch_offset == 124 + msgs = records.take(2) + assert len(msgs) == 2 + assert records.has_more() + records.discard() + assert not records.has_more() From c6b54036af4aebaca545898fe0d8100a15f93ee8 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 5 Feb 2018 14:02:24 -0800 Subject: [PATCH 2/2] Fix for PartitionRecords has no attribute message_idx; add test --- kafka/consumer/fetcher.py | 4 ++++ test/test_fetcher.py | 23 ++++++++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 659744884..c9bbb9717 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -846,6 +846,10 @@ def __init__(self, fetch_offset, tp, messages): for i, msg in enumerate(messages): if msg.offset == fetch_offset: self.message_idx = i + break + else: + self.message_idx = 0 + self.messages = None # For truthiness evaluation we need to define __len__ or __nonzero__ def __len__(self): diff --git a/test/test_fetcher.py b/test/test_fetcher.py index aa8e9c3f1..4547222bd 100644 --- a/test/test_fetcher.py +++ b/test/test_fetcher.py @@ -512,12 +512,29 @@ def test_partition_records_offset(): None, None, 'key', 'value', 'checksum', 0, 0) for i in range(batch_start, batch_end)] records = Fetcher.PartitionRecords(fetch_offset, None, messages) - assert records.has_more() + assert len(records) > 0 msgs = records.take(1) assert msgs[0].offset == 123 assert records.fetch_offset == 124 msgs = records.take(2) assert len(msgs) == 2 - assert records.has_more() + assert len(records) > 0 records.discard() - assert not records.has_more() + assert len(records) == 0 + + +def test_partition_records_empty(): + records = Fetcher.PartitionRecords(0, None, []) + assert len(records) == 0 + + +def test_partition_records_no_fetch_offset(): + batch_start = 0 + batch_end = 100 + fetch_offset = 123 + tp = TopicPartition('foo', 0) + messages = [ConsumerRecord(tp.topic, tp.partition, i, + None, None, 'key', 'value', 'checksum', 0, 0) + for i in range(batch_start, batch_end)] + records = Fetcher.PartitionRecords(fetch_offset, None, messages) + assert len(records) == 0