@@ -35,9 +35,9 @@ def get(self):
35
35
36
36
37
37
class ProducerBatch (object ):
38
- def __init__ (self , tp , records ):
38
+ def __init__ (self , tp , records , now = None ):
39
39
self .max_record_size = 0
40
- now = time .time ()
40
+ now = time .time () if now is None else now
41
41
self .created = now
42
42
self .drained = None
43
43
self .attempts = 0
@@ -52,13 +52,18 @@ def __init__(self, tp, records):
52
52
def record_count (self ):
53
53
return self .records .next_offset ()
54
54
55
- def try_append (self , timestamp_ms , key , value , headers ):
55
+ @property
56
+ def producer_id (self ):
57
+ return self .records .producer_id if self .records else None
58
+
59
+ def try_append (self , timestamp_ms , key , value , headers , now = None ):
56
60
metadata = self .records .append (timestamp_ms , key , value , headers )
57
61
if metadata is None :
58
62
return None
59
63
64
+ now = time .time () if now is None else now
60
65
self .max_record_size = max (self .max_record_size , metadata .size )
61
- self .last_append = time . time ()
66
+ self .last_append = now
62
67
future = FutureRecordMetadata (self .produce_future , metadata .offset ,
63
68
metadata .timestamp , metadata .crc ,
64
69
len (key ) if key is not None else - 1 ,
@@ -81,7 +86,7 @@ def done(self, base_offset=None, timestamp_ms=None, exception=None, log_start_of
81
86
log_start_offset , exception ) # trace
82
87
self .produce_future .failure (exception )
83
88
84
- def maybe_expire (self , request_timeout_ms , retry_backoff_ms , linger_ms , is_full ):
89
+ def maybe_expire (self , request_timeout_ms , retry_backoff_ms , linger_ms , is_full , now = None ):
85
90
"""Expire batches if metadata is not available
86
91
87
92
A batch whose metadata is not available should be expired if one
@@ -93,7 +98,7 @@ def maybe_expire(self, request_timeout_ms, retry_backoff_ms, linger_ms, is_full)
93
98
* the batch is in retry AND request timeout has elapsed after the
94
99
backoff period ended.
95
100
"""
96
- now = time .time ()
101
+ now = time .time () if now is None else now
97
102
since_append = now - self .last_append
98
103
since_ready = now - (self .created + linger_ms / 1000.0 )
99
104
since_backoff = now - (self .last_attempt + retry_backoff_ms / 1000.0 )
@@ -121,6 +126,10 @@ def in_retry(self):
121
126
def set_retry (self ):
122
127
self ._retry = True
123
128
129
+ @property
130
+ def is_done (self ):
131
+ return self .produce_future .is_done
132
+
124
133
def __str__ (self ):
125
134
return 'ProducerBatch(topic_partition=%s, record_count=%d)' % (
126
135
self .topic_partition , self .records .next_offset ())
@@ -161,6 +170,7 @@ class RecordAccumulator(object):
161
170
'compression_attrs' : 0 ,
162
171
'linger_ms' : 0 ,
163
172
'retry_backoff_ms' : 100 ,
173
+ 'transaction_state' : None ,
164
174
'message_version' : 0 ,
165
175
}
166
176
@@ -171,6 +181,7 @@ def __init__(self, **configs):
171
181
self .config [key ] = configs .pop (key )
172
182
173
183
self ._closed = False
184
+ self ._transaction_state = self .config ['transaction_state' ]
174
185
self ._flushes_in_progress = AtomicInteger ()
175
186
self ._appends_in_progress = AtomicInteger ()
176
187
self ._batches = collections .defaultdict (collections .deque ) # TopicPartition: [ProducerBatch]
@@ -233,6 +244,10 @@ def append(self, tp, timestamp_ms, key, value, headers):
233
244
batch_is_full = len (dq ) > 1 or last .records .is_full ()
234
245
return future , batch_is_full , False
235
246
247
+ if self ._transaction_state and self .config ['message_version' ] < 2 :
248
+ raise Errors .UnsupportedVersionError ("Attempting to use idempotence with a broker which"
249
+ " does not support the required message format (v2)."
250
+ " The broker must be version 0.11 or later." )
236
251
records = MemoryRecordsBuilder (
237
252
self .config ['message_version' ],
238
253
self .config ['compression_attrs' ],
@@ -310,9 +325,9 @@ def abort_expired_batches(self, request_timeout_ms, cluster):
310
325
311
326
return expired_batches
312
327
313
- def reenqueue (self , batch ):
328
+ def reenqueue (self , batch , now = None ):
314
329
"""Re-enqueue the given record batch in the accumulator to retry."""
315
- now = time .time ()
330
+ now = time .time () if now is None else now
316
331
batch .attempts += 1
317
332
batch .last_attempt = now
318
333
batch .last_append = now
@@ -323,7 +338,7 @@ def reenqueue(self, batch):
323
338
with self ._tp_locks [batch .topic_partition ]:
324
339
dq .appendleft (batch )
325
340
326
- def ready (self , cluster ):
341
+ def ready (self , cluster , now = None ):
327
342
"""
328
343
Get a list of nodes whose partitions are ready to be sent, and the
329
344
earliest time at which any non-sendable partition will be ready;
@@ -357,7 +372,7 @@ def ready(self, cluster):
357
372
ready_nodes = set ()
358
373
next_ready_check = 9999999.99
359
374
unknown_leaders_exist = False
360
- now = time .time ()
375
+ now = time .time () if now is None else now
361
376
362
377
# several threads are accessing self._batches -- to simplify
363
378
# concurrent access, we iterate over a snapshot of partitions
@@ -412,7 +427,7 @@ def has_unsent(self):
412
427
return True
413
428
return False
414
429
415
- def drain (self , cluster , nodes , max_size ):
430
+ def drain (self , cluster , nodes , max_size , now = None ):
416
431
"""
417
432
Drain all the data for the given nodes and collate them into a list of
418
433
batches that will fit within the specified size on a per-node basis.
@@ -430,7 +445,7 @@ def drain(self, cluster, nodes, max_size):
430
445
if not nodes :
431
446
return {}
432
447
433
- now = time .time ()
448
+ now = time .time () if now is None else now
434
449
batches = {}
435
450
for node_id in nodes :
436
451
size = 0
@@ -463,7 +478,26 @@ def drain(self, cluster, nodes, max_size):
463
478
# single request
464
479
break
465
480
else :
481
+ producer_id_and_epoch = None
482
+ if self ._transaction_state :
483
+ producer_id_and_epoch = self ._transaction_state .producer_id_and_epoch
484
+ if not producer_id_and_epoch .is_valid :
485
+ # we cannot send the batch until we have refreshed the PID
486
+ log .debug ("Waiting to send ready batches because transaction producer id is not valid" )
487
+ break
488
+
466
489
batch = dq .popleft ()
490
+ if producer_id_and_epoch and not batch .in_retry ():
491
+ # If the batch is in retry, then we should not change the pid and
492
+ # sequence number, since this may introduce duplicates. In particular,
493
+ # the previous attempt may actually have been accepted, and if we change
494
+ # the pid and sequence here, this attempt will also be accepted, causing
495
+ # a duplicate.
496
+ sequence_number = self ._transaction_state .sequence_number (batch .topic_partition )
497
+ log .debug ("Dest: %s: %s producer_id=%s epoch=%s sequence=%s" ,
498
+ node_id , batch .topic_partition , producer_id_and_epoch .producer_id , producer_id_and_epoch .epoch ,
499
+ sequence_number )
500
+ batch .records .set_producer_state (producer_id_and_epoch .producer_id , producer_id_and_epoch .epoch , sequence_number )
467
501
batch .records .close ()
468
502
size += batch .records .size_in_bytes ()
469
503
ready .append (batch )
0 commit comments