-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbroker.go
More file actions
539 lines (460 loc) · 13.5 KB
/
broker.go
File metadata and controls
539 lines (460 loc) · 13.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
package flow
import (
"context"
"sync"
"sync/atomic"
"time"
)
// MessageHandler represents a callback function for handling incoming messages.
// The binary parts of the passed message should be assumed to be valid
// only during the function call. It is the handler's responsibility to
// copy the data which should be reused.
type MessageHandler func(context.Context, Message)
// RequestHandler represents a callback function for handling incoming requests.
// The binary parts of the passed message should be assumed to be valid
// only during the function call. It is the handler's responsibility to
// copy the data which should be reused.
type RequestHandler func(context.Context, Message) Message
type reply struct {
data []byte
err error
}
type pendingReply struct {
receiver Key
replych chan reply
timer *time.Timer
}
// Broker represents a single node within a clique. It enables
// the publishing and subscribing capabilities of the pub/sub
// system. Each subscribed message is handled by the responsible
// broker, which is determined by the respective node key within a
// clique.
type Broker struct {
messagesInFlight uint64
requestsInFlight uint64
shuttingDown uint64
partitionLocks []sync.Mutex
clique string
ackTimeout time.Duration
reqTimeout time.Duration
codec Codec
onError func(error)
routing routingTable
pubsub pubsub
id uint64
wg sync.WaitGroup
leaving chan struct{}
pendingRepliesMtx sync.Mutex
pendingReplies map[uint64]pendingReply // id => pending reply
messageHandlers map[string]MessageHandler // stream => message handler
requestHandlers map[string]RequestHandler // stream => request handler
}
// NewBroker creates a new broker which uses the pub/sub system
// for publishing messages and subscribing to streams.
//
// Because pubsub could possibly be shared between multiple brokers,
// the caller of this function is responsible for closing all
// connections to the pub/sub system.
func NewBroker(ctx context.Context, pubsub PubSub, o ...Option) (*Broker, error) {
opts := defaultOptions()
if err := opts.apply(o...); err != nil {
return nil, err
}
b := &Broker{
partitionLocks: opts.partitionLocks,
clique: opts.clique,
ackTimeout: opts.ackTimeout,
reqTimeout: opts.reqTimeout,
codec: opts.codec,
onError: opts.errorHandler,
routing: newRoutingTable(opts),
pubsub: newPubSub(pubsub, opts),
leaving: make(chan struct{}),
pendingReplies: make(map[uint64]pendingReply),
messageHandlers: opts.messageHandlers,
requestHandlers: opts.requestHandlers,
}
if err := b.pubsub.subscribe(ctx, nodeStream(b.clique, b.routing.local), "", b.processCliqueProtocol); err != nil {
b.pubsub.shutdown(ctx)
return nil, err
}
if err := b.pubsub.subscribe(ctx, b.clique, "", b.processCliqueProtocol); err != nil {
b.pubsub.shutdown(ctx)
return nil, err
}
for stream := range b.messageHandlers {
if err := b.pubsub.subscribe(ctx, stream, b.clique, b.processMessage); err != nil {
b.pubsub.shutdown(ctx)
return nil, err
}
}
for stream := range b.requestHandlers {
if err := b.pubsub.subscribe(ctx, stream, b.clique, b.processRequest); err != nil {
b.pubsub.shutdown(ctx)
return nil, err
}
}
join := marshalJoin(join{sender: b.routing.local})
if err := b.broadcast(ctx, join); err != nil {
b.pubsub.shutdown(ctx)
return nil, err
}
b.wg.Add(1)
go b.stabilize(opts.stabilization.Interval)
return b, nil
}
// Close notifies all clique members about a leaving broker and
// disconnects from the pub/sub system.
func (b *Broker) Close() error {
return b.shutdown(context.Background(), func() error { return nil })
}
// Shutdown gracefully shuts down the broker. It notifies all clique
// members about a leaving broker and waits until all messages and
// requests are processed. If the given context expires before, the
// context's error will be returned.
func (b *Broker) Shutdown(ctx context.Context) error {
return b.shutdown(ctx, func() error {
ticker := time.NewTicker(250 * time.Millisecond)
defer ticker.Stop()
for atomic.LoadUint64(&b.messagesInFlight) != 0 || atomic.LoadUint64(&b.requestsInFlight) != 0 {
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
}
}
return nil
})
}
// Publish forwards the message directly to the pub/sub system.
// If the message does not contain any partition key, the message will
// be processed by a random broker within a clique.
// All binary data of the passed message needs to be valid only during
// the method call.
func (b *Broker) Publish(ctx context.Context, msg Message) error {
if b.isShuttingDown() {
return ErrClosed
}
if err := msg.validate(); err != nil {
return err
}
return b.pubsub.send(ctx, msg.Stream, b.codec.EncodeMessage(msg))
}
// Request sends a request message and waits for its response.
// If the message has no partition key, the request will be processed
// by a random broker within a clique.
// All binary data of the passed message needs to be valid only during
// the method call.
func (b *Broker) Request(ctx context.Context, request Message) (Message, error) {
if b.isShuttingDown() {
return Message{}, ErrClosed
}
if err := request.validate(); err != nil {
return Message{}, err
}
reply := b.awaitReply(ctx, b.routing.local, b.reqTimeout, func(ctx context.Context, id uint64) error {
return b.pubsub.send(ctx, request.Stream, marshalMsg(msg{
id: id,
reply: []byte(nodeStream(b.clique, b.routing.local)),
stream: []byte(request.Stream),
pkey: request.PartitionKey,
data: request.Data,
}))
})
return Message{Data: reply.data}, reply.err
}
func (b *Broker) processMessage(ctx context.Context, stream string, data []byte) {
atomic.AddUint64(&b.messagesInFlight, 1)
defer atomic.AddUint64(&b.messagesInFlight, ^uint64(0))
decoded, err := b.codec.DecodeMessage(stream, data)
if err != nil {
b.onError(errorf("decode message: %v", err))
return
}
b.forwardMsg(ctx, msg{
stream: []byte(decoded.Stream),
pkey: decoded.PartitionKey,
data: decoded.Data,
})
}
func (b *Broker) processRequest(ctx context.Context, stream string, data []byte) {
atomic.AddUint64(&b.requestsInFlight, 1)
defer atomic.AddUint64(&b.requestsInFlight, ^uint64(0))
f, err := unmarshalFrame(data)
switch {
case err != nil:
b.onError(errorf("request subscription: %v", err))
return
case f.typ() != frameTypeMsg:
b.onError(errorf("unexpected request frame type: %s", f.typ()))
return
}
msg, err := unmarshalMsg(f)
if err != nil {
b.onError(errorf("unmarshal msg: %v", err))
return
}
b.forwardMsg(ctx, msg)
}
func (b *Broker) processCliqueProtocol(ctx context.Context, stream string, data []byte) {
f, err := unmarshalFrame(data)
if err != nil {
b.onError(errorf("clique subscription: %v", err))
return
}
switch f.typ() {
case frameTypeJoin:
b.handleJoin(ctx, f)
case frameTypeLeave:
b.handleLeave(f)
case frameTypeInfo:
b.handleInfo(f)
case frameTypePing:
b.handlePing(ctx, f)
case frameTypeFwd:
b.handleFwd(ctx, f)
case frameTypeAck:
b.handleAck(f)
default:
b.onError(errorf("unexpected clique frame type: %s", f.typ()))
}
}
func (b *Broker) handleJoin(ctx context.Context, f frame) {
join, err := unmarshalJoin(f)
switch {
case err != nil:
b.onError(errorf("unmarshal join: %v", err))
return
case join.sender == b.routing.local:
return
}
neighbors := b.routing.neighbors()
b.routing.registerKey(join.sender)
err = b.sendTo(ctx, join.sender, marshalInfo(info{neighbors: neighbors}))
if err != nil {
b.onError(errorf("send info: %v", err))
}
}
func (b *Broker) handleLeave(f frame) {
leave, err := unmarshalLeave(f)
if err != nil {
b.onError(errorf("unmarshal leave: %v", err))
return
}
b.routing.unregister(leave.node)
}
func (b *Broker) handleInfo(f frame) {
info, err := unmarshalInfo(f)
if err != nil {
b.onError(errorf("unmarshal info: %v", err))
return
}
b.routing.registerKeys(info.neighbors)
b.notifyReply(info.id, reply{})
}
func (b *Broker) handlePing(ctx context.Context, f frame) {
ping, err := unmarshalPing(f)
if err != nil {
b.onError(errorf("unmarshal ping: %v", err))
return
}
err = b.sendTo(ctx, ping.sender, marshalInfo(info{
id: ping.id,
neighbors: b.routing.neighbors(),
}))
if err != nil {
b.onError(errorf("send info: %v", err))
}
}
func (b *Broker) handleFwd(ctx context.Context, f frame) {
fwd, err := unmarshalFwd(f)
if err != nil {
b.onError(errorf("unmarshal fwd: %v", err))
return
}
err = b.sendTo(ctx, fwd.ack, marshalAck(ack{id: fwd.id}))
if err != nil {
b.onError(errorf("send ack: %v", err))
}
b.forwardMsg(ctx, fwd.msg)
}
func (b *Broker) handleAck(frame frame) {
ack, err := unmarshalAck(frame)
if err != nil {
b.onError(errorf("unmarshal ack: %v", err))
return
}
b.notifyReply(ack.id, reply{data: ack.data})
}
func (b *Broker) forwardMsg(ctx context.Context, msg msg) {
partition := KeyFromBytes(msg.pkey)
if len(msg.pkey) == 0 {
b.dispatchMsg(ctx, msg, partition)
return
}
for {
succ := b.routing.successor(partition)
if succ == b.routing.local {
b.dispatchMsg(ctx, msg, partition)
return
}
reply := b.awaitReply(ctx, succ, b.ackTimeout, func(ctx context.Context, id uint64) error {
return b.sendTo(ctx, succ, marshalFwd(fwd{
id: id,
ack: b.routing.local,
msg: msg,
}))
})
if reply.err == nil {
return
} else if reply.err != ErrTimeout {
b.onError(reply.err)
return
}
// The node was suspected and removed from the
// valid keys. We look for the next successor
// to handle the message.
}
}
func (b *Broker) dispatchMsg(ctx context.Context, msg msg, partition Key) {
var lock, slot = sync.Locker(nullLock{}), -1
if len(msg.pkey) != 0 && len(b.partitionLocks) != 0 {
slot = int(partition % Key(len(b.partitionLocks)))
lock = &b.partitionLocks[slot]
}
var reply reply
if h := b.messageHandlers[string(msg.stream)]; h != nil {
lock.Lock()
h(ctx, Message{
Stream: string(msg.stream),
PartitionKey: msg.pkey,
Data: msg.data,
slot: slot,
})
lock.Unlock()
} else if h := b.requestHandlers[string(msg.stream)]; h != nil {
lock.Lock()
resp := h(ctx, Message{
Stream: string(msg.stream),
PartitionKey: msg.pkey,
Data: msg.data,
slot: slot,
})
lock.Unlock()
reply.data = resp.Data
} else {
return
}
if len(msg.reply) != 0 {
ack := ack{id: msg.id, data: reply.data}
err := b.pubsub.send(ctx, string(msg.reply), marshalAck(ack))
if err != nil {
b.onError(errorf("send ack: %v", err))
}
}
}
func (b *Broker) awaitReply(ctx context.Context, receiver Key, timeout time.Duration, send func(context.Context, uint64) error) reply {
id := atomic.AddUint64(&b.id, 1)
replych := make(chan reply, 1)
b.pendingRepliesMtx.Lock()
b.pendingReplies[id] = pendingReply{
receiver: receiver,
replych: replych,
timer: time.AfterFunc(timeout, func() {
b.notifyReply(id, reply{err: ErrTimeout})
}),
}
b.pendingRepliesMtx.Unlock()
if err := send(ctx, id); err != nil {
b.notifyReply(id, reply{err: err})
}
select {
case reply := <-replych:
return reply
case <-ctx.Done():
reply := reply{err: ctx.Err()}
b.notifyReply(id, reply)
return reply
}
}
func (b *Broker) notifyReply(id uint64, reply reply) {
b.pendingRepliesMtx.Lock()
pending, has := b.pendingReplies[id]
delete(b.pendingReplies, id)
b.pendingRepliesMtx.Unlock()
if has {
pending.timer.Stop()
if reply.err == ErrTimeout && pending.receiver != b.routing.local {
b.routing.suspect(pending.receiver)
}
pending.replych <- reply
}
}
func (b *Broker) sendTo(ctx context.Context, target Key, f frame) error {
return b.pubsub.send(ctx, nodeStream(b.clique, target), f)
}
func (b *Broker) broadcast(ctx context.Context, f frame) error {
return b.pubsub.send(ctx, b.clique, f)
}
func (b *Broker) isShuttingDown() bool {
return atomic.LoadUint64(&b.shuttingDown) != 0
}
func (b *Broker) shutdown(ctx context.Context, wait func() error) error {
atomic.StoreUint64(&b.shuttingDown, 1)
close(b.leaving)
leave := marshalLeave(leave{node: b.routing.local})
err := b.broadcast(ctx, leave)
if waitErr := wait(); waitErr != nil {
err = waitErr
}
b.pubsub.shutdown(ctx)
// cancel pending replies
b.pendingRepliesMtx.Lock()
ids := make([]uint64, 0, len(b.pendingReplies))
for id := range b.pendingReplies {
ids = append(ids, id)
}
b.pendingRepliesMtx.Unlock()
for _, id := range ids {
b.notifyReply(id, reply{err: ErrClosed})
}
b.wg.Wait()
return err
}
func (b *Broker) stabilize(interval time.Duration) {
defer b.wg.Done()
ping := ping{sender: b.routing.local}
ticker := time.NewTicker(interval)
defer ticker.Stop()
var frame frame
stabs := make([]Key, 1+b.routing.stabilizerCount) // successor + stabilizers
for {
select {
case <-b.leaving:
return
case <-ticker.C:
}
nstabs := b.routing.stabilizers(stabs)
for i := 0; i < nstabs; i++ {
stab := stabs[i]
reply := b.awaitReply(context.Background(), stab, b.ackTimeout, func(ctx context.Context, id uint64) error {
ping.id = id
frame = marshalPing(ping, frame)
return b.sendTo(ctx, stab, frame)
})
if reply.err != nil && reply.err != ErrClosed && reply.err != ErrTimeout {
b.onError(errorf("stabilization: %v", reply.err))
}
}
}
}
func nodeStream(clique string, node Key) string {
buf := alloc(len(clique)+1+2*keySize, nil)
n := copy(buf, clique)
buf[n] = '.'
node.writeString(buf[n+1:])
return string(buf)
}
type nullLock struct{}
func (l nullLock) Lock() {}
func (l nullLock) Unlock() {}