blob: 2a30c5c676c285c23ba6dd4af372f7958eeaea25 [file] [log] [blame]
Brian Waters13d96012017-12-08 16:53:31 -06001/*********************************************************************************************************
2* Software License Agreement (BSD License) *
3* Author: Sebastien Decugis <sdecugis@freediameter.net> *
4* *
5* Copyright (c) 2013, WIDE Project and NICT *
6* All rights reserved. *
7* *
8* Redistribution and use of this software in source and binary forms, with or without modification, are *
9* permitted provided that the following conditions are met: *
10* *
11* * Redistributions of source code must retain the above *
12* copyright notice, this list of conditions and the *
13* following disclaimer. *
14* *
15* * Redistributions in binary form must reproduce the above *
16* copyright notice, this list of conditions and the *
17* following disclaimer in the documentation and/or other *
18* materials provided with the distribution. *
19* *
20* * Neither the name of the WIDE Project or NICT nor the *
21* names of its contributors may be used to endorse or *
22* promote products derived from this software without *
23* specific prior written permission of WIDE Project and *
24* NICT. *
25* *
26* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED *
27* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A *
28* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
29* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT *
30* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
31* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR *
32* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF *
33* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
34*********************************************************************************************************/
35
36#include "fdcore-internal.h"
37
38/*
39This file implement a Peer State Machine which is a mix of:
40 - the state machine described in rfc3588bis
41 - the state machine described in rfc3539#section-3.4
42 - the following observations.
43
44The delivery of Diameter messages must not always be unordered: order is important at
45begining and end of a connection lifetime. It means we need agility to
46switch between "ordering enforced" and "ordering not enforced to counter
47Head of the Line Blocking" modes of operation.
48
49The connection state machine represented in RFC3588 (and RFC6733) is
50incomplete, because it lacks the SUSPECT state and the 3 DWR/DWA
51exchanges (section 5.1) when the peer recovers from this state.
52Personnally I don't see the rationale for exchanging 3 messages (why 3?)
53but, if we require at least 1 DWR/DWA exchange to be always performed
54after the CER/CEA exchange (and initiated by the peer that sent the
55CEA), we have a simple way to deal with our ordering problem, as resumed
56below. Peers are: [i]nitiator, [r]esponder.
57 (1) [i] SCTP connection attempt.
58 (2) [r] accept the connection.
59 (3) [i,r] (if secure port) DTLS handshake, close on failure.
60 (4) [i] Send CER
61 (5) [r] Receive CER, send CEA using stream 0, flag "unordered" cleared.
62 [r] Immediately send a DWR after the CEA, also using stream 0,
63flag "unordered" cleared.
64 [r] Move to STATE_OPEN_NEW state -- equivalent to OPEN except
65that all messages are sent ordered at the moment.
66 (6) [i] receive CEA, move to OPEN state. All messages can be sent
67unordered in OPEN state.
68 [i] As per normal operation, reply with DWA to the DWR.
69 (7) [r] Upon reception of the DWA, move to OPEN state, messages can be
70sent unordered from this point.
71
72Note about (5) and (6): if the Diameter Identity received in CER or CEA
73does not match the credentials from the certificate presented during
74TLS handshake, we may need to specify a path of clean disconnection
75(not blocking the remote peer waiting for something).
76
77This proposed mechanism removes the problem of application messages
78received before the CEA by the initiator. Note that if the "old" inband
79TLS handshake is used, this handshake plays the same synchronization
80role than the new DWR/DWA, which becomes useless.
81
82
83The other time where ordering is important is by the end of connection
84lifetime, when one peer is shutting down the link for some reason
85(reboot, overload, no activity, etc...). In case of unordered delivery,
86we may have:
87- peer A sends an application message followed by a DPR. Peer B receives
88the DPR first and tears down the connection. Application message is lost.
89- Peer B sends an application message, then receives a DPR and answers a
90DPA. Peer A receives the DPA before the application message. The
91application message is lost.
92
93This situation is actually happening easily because DPR/DPA messages are
94very short, while application messages can be quite large. Therefore,
95they require much more time to deliver.
96
97I really cannot see a way to counter this effect by using the ordering
98of the messages, except by applying a timer (state STATE_CLOSING_GRACE).
99This timer can be also useful when we detect that some messages has not
100yet received an answer on this link, to give time to the application to
101complete the exchange ongoing.
102
103However, this problem must be balanced with the fact that the message
104that is lost will be in many cases sent again as the failover mechanism
105specifies.
106*/
107
108/* The actual declaration of peer_state_str */
109DECLARE_STATE_STR();
110
111/* Helper for next macro */
112#define case_str( _val ) \
113 case _val : return #_val
114
115DECLARE_PEV_STR();
116
117/************************************************************************/
118/* Delayed startup */
119/************************************************************************/
120static int started = 0;
121static pthread_mutex_t started_mtx = PTHREAD_MUTEX_INITIALIZER;
122static pthread_cond_t started_cnd = PTHREAD_COND_INITIALIZER;
123
124/* Wait for start signal */
125static int fd_psm_waitstart()
126{
127 int ret = 0;
128 TRACE_ENTRY("");
129 CHECK_POSIX( pthread_mutex_lock(&started_mtx) );
130awake:
131 if (!ret && !started) {
132 pthread_cleanup_push( fd_cleanup_mutex, &started_mtx );
133 CHECK_POSIX_DO( ret = pthread_cond_wait(&started_cnd, &started_mtx), );
134 pthread_cleanup_pop( 0 );
135 goto awake;
136 }
137 CHECK_POSIX( pthread_mutex_unlock(&started_mtx) );
138 return ret;
139}
140
141/* Allow the state machines to start */
142int fd_psm_start()
143{
144 TRACE_ENTRY("");
145 CHECK_POSIX( pthread_mutex_lock(&started_mtx) );
146 started = 1;
147 CHECK_POSIX( pthread_cond_broadcast(&started_cnd) );
148 CHECK_POSIX( pthread_mutex_unlock(&started_mtx) );
149 return 0;
150}
151
152
153/************************************************************************/
154/* Manage the list of active peers */
155/************************************************************************/
156
157/* Enter/leave OPEN state */
158static int enter_open_state(struct fd_peer * peer)
159{
160 struct fd_list * li;
161 CHECK_PARAMS( FD_IS_LIST_EMPTY(&peer->p_actives) );
162
163 /* Callback registered by the credential validator (fd_peer_validate_register) */
164 if (peer->p_cb2) {
165 CHECK_FCT_DO( (*peer->p_cb2)(&peer->p_hdr.info),
166 {
167 TRACE_DEBUG(FULL, "Validation failed, terminating the connection");
168 fd_psm_terminate(peer, "DO_NOT_WANT_TO_TALK_TO_YOU" );
169 } );
170 peer->p_cb2 = NULL;
171 return 0;
172 }
173
174 /* Insert in the active peers list */
175 CHECK_POSIX( pthread_rwlock_wrlock(&fd_g_activ_peers_rw) );
176 for (li = fd_g_activ_peers.next; li != &fd_g_activ_peers; li = li->next) {
177 struct fd_peer * next_p = (struct fd_peer *)li->o;
178 int cmp = fd_os_cmp(peer->p_hdr.info.pi_diamid, peer->p_hdr.info.pi_diamidlen,
179 next_p->p_hdr.info.pi_diamid, next_p->p_hdr.info.pi_diamidlen);
180 if (cmp < 0)
181 break;
182 }
183 fd_list_insert_before(li, &peer->p_actives);
184 CHECK_POSIX( pthread_rwlock_unlock(&fd_g_activ_peers_rw) );
185
186 /* Callback registered when the peer was added, by fd_peer_add */
187 if (peer->p_cb) {
188 TRACE_DEBUG(FULL, "Calling add callback for peer %s", peer->p_hdr.info.pi_diamid);
189 (*peer->p_cb)(&peer->p_hdr.info, peer->p_cb_data); /* TODO: do this in a separate detached thread? */
190 peer->p_cb = NULL;
191 peer->p_cb_data = NULL;
192 }
193
194 /* Start the thread to handle outgoing messages */
195 CHECK_FCT( fd_out_start(peer) );
196
197 /* Update the expiry timer now */
198 CHECK_FCT( fd_p_expi_update(peer) );
199
200 return 0;
201}
202static int leave_open_state(struct fd_peer * peer, int skip_failover)
203{
204 /* Remove from active peers list */
205 CHECK_POSIX( pthread_rwlock_wrlock(&fd_g_activ_peers_rw) );
206 fd_list_unlink( &peer->p_actives );
207 CHECK_POSIX( pthread_rwlock_unlock(&fd_g_activ_peers_rw) );
208
209 /* Stop the "out" thread */
210 CHECK_FCT( fd_out_stop(peer) );
211
212 /* Failover the messages */
213 if (!skip_failover) {
214 fd_peer_failover_msg(peer);
215 }
216
217 return 0;
218}
219
220
221/************************************************************************/
222/* Helpers for state changes */
223/************************************************************************/
224
225/* Cleanup pending events in the peer */
226void fd_psm_events_free(struct fd_peer * peer)
227{
228 struct fd_event * ev;
229 /* Purge all events, and free the associated data if any */
230 while (fd_fifo_tryget( peer->p_events, &ev ) == 0) {
231 switch (ev->code) {
232 case FDEVP_CNX_ESTABLISHED: {
233 fd_cnx_destroy(ev->data);
234 }
235 break;
236
237 case FDEVP_TERMINATE:
238 /* Do not free the string since it is a constant */
239 break;
240
241 case FDEVP_CNX_INCOMING: {
242 struct cnx_incoming * evd = ev->data;
243 fd_hook_call(HOOK_MESSAGE_DROPPED, evd->cer, NULL, "Message discarded while cleaning peer state machine queue.", fd_msg_pmdl_get(evd->cer));
244 CHECK_FCT_DO( fd_msg_free(evd->cer), /* continue */);
245 fd_cnx_destroy(evd->cnx);
246 }
247 default:
248 free(ev->data);
249 }
250 free(ev);
251 }
252}
253
254/* Read state */
255int fd_peer_get_state(struct peer_hdr *peer)
256{
257 int ret;
258
259 struct fd_peer * p = (struct fd_peer *)peer;
260
261 if (!CHECK_PEER(p))
262 return -1;
263
264 CHECK_POSIX_DO( pthread_mutex_lock(&p->p_state_mtx), return -1 );
265 ret = p->p_state;
266 CHECK_POSIX_DO( pthread_mutex_unlock(&p->p_state_mtx), return -1 );
267
268 return ret;
269}
270
271
272/* Change state */
273int fd_psm_change_state(struct fd_peer * peer, int new_state)
274{
275 int old;
276
277 TRACE_ENTRY("%p %d(%s)", peer, new_state, STATE_STR(new_state));
278 CHECK_PARAMS( CHECK_PEER(peer) );
279
280 old = fd_peer_getstate(peer);
281 if (old == new_state)
282 return 0;
283
284 LOG(((old == STATE_OPEN) || (new_state == STATE_OPEN)) ? FD_LOG_NOTICE : FD_LOG_DEBUG, "'%s'\t-> '%s'\t'%s'",
285 STATE_STR(old),
286 STATE_STR(new_state),
287 peer->p_hdr.info.pi_diamid);
288
289
290 CHECK_POSIX( pthread_mutex_lock(&peer->p_state_mtx) );
291 peer->p_state = new_state;
292 CHECK_POSIX( pthread_mutex_unlock(&peer->p_state_mtx) );
293
294 if (old == STATE_OPEN) {
295 CHECK_FCT( leave_open_state(peer, new_state == STATE_CLOSING_GRACE) );
296 }
297 if (old == STATE_CLOSING_GRACE) {
298 fd_peer_failover_msg(peer);
299 }
300
301 if (new_state == STATE_OPEN) {
302 CHECK_FCT( enter_open_state(peer) );
303 }
304
305 if (new_state == STATE_CLOSED) {
306 /* Purge event list */
307 fd_psm_events_free(peer);
308
309 /* Reset the counter of pending anwers to send */
310 peer->p_reqin_count = 0;
311
312 /* If the peer is not persistant, we destroy it */
313 if (peer->p_hdr.info.config.pic_flags.persist == PI_PRST_NONE) {
314 CHECK_FCT( fd_event_send(peer->p_events, FDEVP_TERMINATE, 0, NULL) );
315 }
316 }
317
318 return 0;
319}
320
321/* Set timeout timer of next event */
322void fd_psm_next_timeout(struct fd_peer * peer, int add_random, int delay)
323{
324 TRACE_DEBUG(FULL, "Peer timeout reset to %d seconds%s", delay, add_random ? " (+/- 2)" : "" );
325
326 /* Initialize the timer */
327 CHECK_POSIX_DO( clock_gettime( CLOCK_REALTIME, &peer->p_psm_timer ), ASSERT(0) );
328
329 if (add_random) {
330 if (delay > 2)
331 delay -= 2;
332 else
333 delay = 0;
334
335 /* Add a random value between 0 and 4sec */
336 peer->p_psm_timer.tv_sec += random() % 4;
337 peer->p_psm_timer.tv_nsec+= random() % 1000000000L;
338 if (peer->p_psm_timer.tv_nsec >= 1000000000L) {
339 peer->p_psm_timer.tv_nsec -= 1000000000L;
340 peer->p_psm_timer.tv_sec ++;
341 }
342 }
343
344 peer->p_psm_timer.tv_sec += delay;
345
346#ifdef SLOW_PSM
347 /* temporary for debug */
348 peer->p_psm_timer.tv_sec += 10;
349#endif
350}
351
352/* Cleanup the peer */
353void fd_psm_cleanup(struct fd_peer * peer, int terminate)
354{
355 /* Move to CLOSED state: failover messages, stop OUT thread, unlink peer from active list */
356 if (fd_peer_getstate(peer) != STATE_ZOMBIE) {
357 CHECK_FCT_DO( fd_psm_change_state(peer, STATE_CLOSED), /* continue */ );
358 }
359
360 fd_p_cnx_abort(peer, terminate);
361
362 fd_p_ce_clear_cnx(peer, NULL);
363
364 if (peer->p_receiver) {
365 fd_cnx_destroy(peer->p_receiver);
366 peer->p_receiver = NULL;
367 }
368
369 if (terminate) {
370 fd_psm_events_free(peer);
371 CHECK_FCT_DO( fd_fifo_del(&peer->p_events), /* continue */ );
372 }
373
374}
375
376
377/************************************************************************/
378/* The PSM thread */
379/************************************************************************/
380/* Cancelation cleanup : set ZOMBIE state in the peer */
381void cleanup_setstate(void * arg)
382{
383 struct fd_peer * peer = (struct fd_peer *)arg;
384 CHECK_PARAMS_DO( CHECK_PEER(peer), return );
385 CHECK_POSIX_DO( pthread_mutex_lock(&peer->p_state_mtx), );
386 peer->p_state = STATE_ZOMBIE;
387 CHECK_POSIX_DO( pthread_mutex_unlock(&peer->p_state_mtx), );
388 return;
389}
390
391/* The state machine thread (controler) */
392static void * p_psm_th( void * arg )
393{
394 struct fd_peer * peer = (struct fd_peer *)arg;
395 int created_started = started ? 1 : 0;
396 int event;
397 size_t ev_sz;
398 void * ev_data;
399 int cur_state;
400
401 CHECK_PARAMS_DO( CHECK_PEER(peer), ASSERT(0) );
402
403 pthread_cleanup_push( cleanup_setstate, arg );
404
405 /* Set the thread name */
406 {
407 char buf[48];
408 snprintf(buf, sizeof(buf), "PSM/%s", peer->p_hdr.info.pi_diamid);
409 fd_log_threadname ( buf );
410 }
411
412 /* The state machine starts in CLOSED state */
413 CHECK_POSIX_DO( pthread_mutex_lock(&peer->p_state_mtx), goto psm_end );
414 peer->p_state = STATE_CLOSED;
415 CHECK_POSIX_DO( pthread_mutex_unlock(&peer->p_state_mtx), goto psm_end );
416
417 /* Wait that the PSM are authorized to start in the daemon */
418 CHECK_FCT_DO( fd_psm_waitstart(), goto psm_end );
419
420 /* Initialize the timer */
421 if (peer->p_flags.pf_responder) {
422 fd_psm_next_timeout(peer, 0, INCNX_TIMEOUT);
423 } else {
424 fd_psm_next_timeout(peer, created_started, 0);
425 }
426
427psm_loop:
428 /* Get next event */
429 TRACE_DEBUG(FULL, "'%s' in state '%s' waiting for next event.",
430 peer->p_hdr.info.pi_diamid, STATE_STR(fd_peer_getstate(peer)));
431 CHECK_FCT_DO( fd_event_timedget(peer->p_events, &peer->p_psm_timer, FDEVP_PSM_TIMEOUT, &event, &ev_sz, &ev_data), goto psm_end );
432
433 cur_state = fd_peer_getstate(peer);
434 if (cur_state == -1)
435 goto psm_end;
436
437 TRACE_DEBUG(FULL, "'%s'\t<-- '%s'\t(%p,%zd)\t'%s'",
438 STATE_STR(cur_state),
439 fd_pev_str(event), ev_data, ev_sz,
440 peer->p_hdr.info.pi_diamid);
441
442 /* Now, the action depends on the current state and the incoming event */
443
444 /* The following states are impossible */
445 ASSERT( cur_state != STATE_NEW );
446 ASSERT( cur_state != STATE_ZOMBIE );
447 ASSERT( cur_state != STATE_OPEN_HANDSHAKE ); /* because it should exist only between two loops */
448
449 /* Purge invalid events */
450 if (!CHECK_PEVENT(event)) {
451 TRACE_DEBUG(INFO, "Invalid event received in PSM '%s' : %d", peer->p_hdr.info.pi_diamid, event);
452 ASSERT(0); /* we should investigate this situation */
453 goto psm_loop;
454 }
455
456 /* Requests to terminate the peer object */
457 if (event == FDEVP_TERMINATE) {
458 switch (cur_state) {
459 case STATE_OPEN:
460 case STATE_OPEN_NEW:
461 case STATE_REOPEN:
462 /* We cannot just close the connection, we have to send a DPR first */
463 CHECK_FCT_DO( fd_p_dp_initiate(peer, ev_data), goto psm_end );
464 goto psm_loop;
465
466 /*
467 case STATE_CLOSING:
468 case STATE_CLOSING_GRACE:
469 case STATE_WAITCNXACK:
470 case STATE_WAITCNXACK_ELEC:
471 case STATE_WAITCEA:
472 case STATE_SUSPECT:
473 case STATE_CLOSED:
474 */
475 default:
476 /* In these cases, we just cleanup the peer object (if needed) and terminate */
477 goto psm_end;
478 }
479 }
480
481 /* A message was received */
482 if (event == FDEVP_CNX_MSG_RECV) {
483 struct msg * msg = NULL;
484 struct msg_hdr * hdr;
485 struct fd_cnx_rcvdata rcv_data;
486 struct fd_msg_pmdl * pmdl = NULL;
487
488 rcv_data.buffer = ev_data;
489 rcv_data.length = ev_sz;
490 pmdl = fd_msg_pmdl_get_inbuf(rcv_data.buffer, rcv_data.length);
491
492 /* Parse the received buffer */
493 CHECK_FCT_DO( fd_msg_parse_buffer( (void *)&ev_data, ev_sz, &msg),
494 {
495 fd_hook_call(HOOK_MESSAGE_PARSING_ERROR, NULL, peer, &rcv_data, pmdl );
496 free(ev_data);
497 CHECK_FCT_DO( fd_event_send(peer->p_events, FDEVP_CNX_ERROR, 0, NULL), goto psm_reset );
498 goto psm_loop;
499 } );
500
501 fd_hook_associate(msg, pmdl);
502 CHECK_FCT_DO( fd_msg_source_set( msg, peer->p_hdr.info.pi_diamid, peer->p_hdr.info.pi_diamidlen), goto psm_end);
503
504 /* If the current state does not allow receiving messages, just drop it */
505 if (cur_state == STATE_CLOSED) {
506 /* In such case, just discard the message */
507 fd_hook_call(HOOK_MESSAGE_DROPPED, msg, peer, "Message purged from queue, peer in CLOSED state", fd_msg_pmdl_get(msg));
508 fd_msg_free(msg);
509 goto psm_loop;
510 }
511
512 /* Extract the header */
513 CHECK_FCT_DO( fd_msg_hdr(msg, &hdr), goto psm_end );
514
515 /* If it is an answer, associate with the request or drop */
516 if (!(hdr->msg_flags & CMD_FLAG_REQUEST)) {
517 struct msg * req;
518 /* Search matching request (same hbhid) */
519 CHECK_FCT_DO( fd_p_sr_fetch(&peer->p_sr, hdr->msg_hbhid, &req), goto psm_end );
520 if (req == NULL) {
521 fd_hook_call(HOOK_MESSAGE_DROPPED, msg, peer, "Answer received with no corresponding sent request.", fd_msg_pmdl_get(msg));
522 fd_msg_free(msg);
523 goto psm_loop;
524 }
525
526 /* Associate */
527 CHECK_FCT_DO( fd_msg_answ_associate( msg, req ), goto psm_end );
528
529 }
530
531 /* Log incoming message */
532 fd_hook_call(HOOK_MESSAGE_RECEIVED, msg, peer, NULL, fd_msg_pmdl_get(msg));
533
534 if (cur_state == STATE_OPEN_NEW) {
535 /* OK, we have received something, so the connection is supposedly now in OPEN state at the remote site */
536 fd_psm_change_state(peer, STATE_OPEN );
537 }
538
539 /* Now handle non-link-local messages */
540 if (fd_msg_is_routable(msg)) {
541 switch (cur_state) {
542 /* To maximize compatibility -- should not be a security issue here */
543 case STATE_REOPEN:
544 case STATE_SUSPECT:
545 case STATE_CLOSING:
546 case STATE_CLOSING_GRACE:
547 TRACE_DEBUG(FULL, "Accepted a message while not in OPEN state... ");
548 /* The standard situation : */
549 case STATE_OPEN_NEW:
550 case STATE_OPEN:
551 /* We received a valid routable message, update the expiry timer */
552 CHECK_FCT_DO( fd_p_expi_update(peer), goto psm_end );
553
554 /* Set the message source and add the Route-Record */
555 CHECK_FCT_DO( fd_msg_source_setrr( msg, peer->p_hdr.info.pi_diamid, peer->p_hdr.info.pi_diamidlen, fd_g_config->cnf_dict ), goto psm_end);
556
557 if ((hdr->msg_flags & CMD_FLAG_REQUEST)) {
558 /* Mark the incoming request so that we know we have pending answers for this peer */
559 CHECK_POSIX_DO( pthread_mutex_lock(&peer->p_state_mtx), goto psm_end );
560 peer->p_reqin_count++;
561 CHECK_POSIX_DO( pthread_mutex_unlock(&peer->p_state_mtx), goto psm_end );
562 }
563
564 /* Requeue to the global incoming queue */
565 CHECK_FCT_DO(fd_fifo_post(fd_g_incoming, &msg), goto psm_end );
566
567 /* Update the peer timer (only in OPEN state) */
568 if ((cur_state == STATE_OPEN) && (!peer->p_flags.pf_dw_pending)) {
569 fd_psm_next_timeout(peer, 1, peer->p_hdr.info.config.pic_twtimer ?: fd_g_config->cnf_timer_tw);
570 }
571 break;
572
573 /* In other states, we discard the message, it is either old or invalid to send it for the remote peer */
574 case STATE_WAITCNXACK:
575 case STATE_WAITCNXACK_ELEC:
576 case STATE_WAITCEA:
577 case STATE_CLOSED:
578 default: {
579 /* In such case, just discard the message */
580 char buf[128];
581 snprintf(buf, sizeof(buf), "Received while peer state machine was in state %s.", STATE_STR(cur_state));
582 fd_hook_call(HOOK_MESSAGE_DROPPED, msg, peer, buf, fd_msg_pmdl_get(msg));
583 fd_msg_free(msg);
584 }
585 }
586 goto psm_loop;
587 }
588
589 /* Link-local message: They must be understood by our dictionary, otherwise we return an error */
590 {
591 struct msg * error = NULL;
592 int ret = fd_msg_parse_or_error( &msg, &error );
593 if (ret != EBADMSG) {
594 CHECK_FCT_DO( ret,
595 {
596 char buf[256];
597 snprintf(buf, sizeof(buf), "%s: An unexpected error occurred while parsing a link-local message", peer->p_hdr.info.pi_diamid);
598 fd_hook_call(HOOK_MESSAGE_DROPPED, msg, peer, buf, fd_msg_pmdl_get(msg));
599 fd_msg_free(msg);
600 goto psm_end;
601 } );
602 } else {
603 if (msg == NULL) {
604 /* Send the error back to the peer */
605 CHECK_FCT_DO( ret = fd_out_send(&error, NULL, peer, 0), );
606 if (error) {
607 char buf[256];
608 /* Only if an error occurred & the message was not saved / dumped */
609 snprintf(buf, sizeof(buf), "%s: error sending a message", peer->p_hdr.info.pi_diamid);
610 fd_hook_call(HOOK_MESSAGE_DROPPED, error, peer, buf, fd_msg_pmdl_get(error));
611 CHECK_FCT_DO( fd_msg_free(error), goto psm_end);
612 }
613 } else {
614 char buf[256];
615 /* We received an invalid answer, let's disconnect */
616 snprintf(buf, sizeof(buf), "%s: Received invalid answer to Base protocol message, disconnecting...", peer->p_hdr.info.pi_diamid);
617 fd_hook_call(HOOK_MESSAGE_DROPPED, msg, peer, buf, fd_msg_pmdl_get(msg));
618 CHECK_FCT_DO( fd_msg_free(msg), goto psm_end);
619 CHECK_FCT_DO( fd_event_send(peer->p_events, FDEVP_CNX_ERROR, 0, NULL), goto psm_reset );
620 }
621 goto psm_loop;
622 }
623 }
624
625 /* Handle the LL message and update the expiry timer appropriately */
626 switch (hdr->msg_code) {
627 case CC_CAPABILITIES_EXCHANGE:
628 CHECK_FCT_DO( fd_p_ce_msgrcv(&msg, (hdr->msg_flags & CMD_FLAG_REQUEST), peer),
629 {
630 if (msg)
631 CHECK_FCT_DO( fd_msg_free(msg), );
632 goto psm_reset;
633 } );
634 break;
635
636 case CC_DISCONNECT_PEER:
637 CHECK_FCT_DO( fd_p_dp_handle(&msg, (hdr->msg_flags & CMD_FLAG_REQUEST), peer), goto psm_reset );
638 if (fd_peer_getstate(peer) == STATE_CLOSING)
639 goto psm_end;
640
641 break;
642
643 case CC_DEVICE_WATCHDOG:
644 CHECK_FCT_DO( fd_p_dw_handle(&msg, (hdr->msg_flags & CMD_FLAG_REQUEST), peer), goto psm_reset );
645 break;
646
647 default:
648 /* Unknown / unexpected / invalid message -- but validated by our dictionary */
649 TRACE_DEBUG(INFO, "Invalid non-routable command received: %u.", hdr->msg_code);
650 if (hdr->msg_flags & CMD_FLAG_REQUEST) {
651 do {
652 /* Reply with an error code */
653 CHECK_FCT_DO( fd_msg_new_answer_from_req ( fd_g_config->cnf_dict, &msg, MSGFL_ANSW_ERROR ), break );
654
655 /* Set the error code */
656 CHECK_FCT_DO( fd_msg_rescode_set(msg, "DIAMETER_COMMAND_UNSUPPORTED", "Or maybe the P-bit or application Id are erroneous.", NULL, 1 ), break );
657
658 /* Send the answer */
659 CHECK_FCT_DO( fd_out_send(&msg, peer->p_cnxctx, peer, 0), break );
660 } while (0);
661 } else {
662 /* We did ASK for it ??? */
663 TRACE_DEBUG(INFO, "Received answer with erroneous 'is_routable' result...");
664 }
665
666 /* Cleanup the message if not done */
667 if (msg) {
668 char buf[256];
669 snprintf(buf, sizeof(buf), "Received un-handled non-routable command from peer '%s'.", peer->p_hdr.info.pi_diamid);
670 fd_hook_call(HOOK_MESSAGE_DROPPED, msg, NULL, buf, fd_msg_pmdl_get(msg));
671 CHECK_FCT_DO( fd_msg_free(msg), /* continue */);
672 msg = NULL;
673 }
674 };
675
676 /* At this point the message must have been fully handled already */
677 if (msg) {
678 char buf[256];
679 snprintf(buf, sizeof(buf), "Internal error ('%s'): unhandled message.", peer->p_hdr.info.pi_diamid);
680 fd_hook_call(HOOK_MESSAGE_DROPPED, msg, NULL, buf, fd_msg_pmdl_get(msg));
681 fd_msg_free(msg);
682 }
683
684 goto psm_loop;
685 }
686
687 /* The connection object is broken */
688 if (event == FDEVP_CNX_ERROR) {
689 switch (cur_state) {
690 case STATE_WAITCNXACK_ELEC:
691 /* Abort the initiating side */
692 fd_p_cnx_abort(peer, 0);
693 /* Process the receiver side */
694 CHECK_FCT_DO( fd_p_ce_process_receiver(peer), goto psm_end );
695 break;
696
697 case STATE_WAITCEA:
698 case STATE_OPEN:
699 case STATE_OPEN_NEW:
700 case STATE_REOPEN:
701 case STATE_WAITCNXACK:
702 case STATE_SUSPECT:
703 default:
704 /* Mark the connection problem */
705 peer->p_flags.pf_cnx_pb = 1;
706
707 fd_hook_call(HOOK_PEER_CONNECT_FAILED, NULL, peer, "The connection was broken", NULL);
708
709 /* Destroy the connection, restart the timer to a new connection attempt */
710 fd_psm_next_timeout(peer, 1, peer->p_hdr.info.config.pic_tctimer ?: fd_g_config->cnf_timer_tc);
711
712 case STATE_CLOSED:
713 goto psm_reset;
714
715 case STATE_CLOSING:
716 /* We sent a DPR so we are terminating, do not wait for DPA */
717 goto psm_end;
718
719 case STATE_CLOSING_GRACE:
720 if (peer->p_flags.pf_localterm) /* initiated here */
721 goto psm_end;
722
723 fd_psm_cleanup(peer, 0);
724
725 /* Reset the timer for next connection attempt */
726 fd_psm_next_timeout(peer, 1, fd_p_dp_newdelay(peer));
727 goto psm_loop;
728 }
729 goto psm_loop;
730 }
731
732 /* The connection notified a change in endpoints */
733 if (event == FDEVP_CNX_EP_CHANGE) {
734 /* We actually don't care if we are in OPEN state here... */
735
736 /* Cleanup the remote LL and primary addresses */
737 CHECK_FCT_DO( fd_ep_filter( &peer->p_hdr.info.pi_endpoints, EP_FL_CONF | EP_FL_DISC | EP_FL_ADV ), /* ignore the error */);
738 CHECK_FCT_DO( fd_ep_clearflags( &peer->p_hdr.info.pi_endpoints, EP_FL_PRIMARY ), /* ignore the error */);
739
740 /* Get the new ones */
741 CHECK_FCT_DO( fd_cnx_getremoteeps(peer->p_cnxctx, &peer->p_hdr.info.pi_endpoints), /* ignore the error */);
742
743 /* We do not support local endpoints change currently, but it could be added here if needed (refresh fd_g_config->cnf_endpoints) */
744 {
745 char * buf = NULL;
746 size_t len = 0;
747 LOG_D("Got low layer notification (IGNORED): remote endpoint(s) changed: %s", fd_ep_dump(&buf, &len, NULL, 0, 0, &peer->p_hdr.info.pi_endpoints) ?: "error");
748 free(buf);
749 }
750
751 /* Done */
752 goto psm_loop;
753 }
754
755 /* A new connection was established and CER containing this peer id was received */
756 if (event == FDEVP_CNX_INCOMING) {
757 struct cnx_incoming * params = ev_data;
758 ASSERT(params);
759
760 /* Handle the message */
761 CHECK_FCT_DO( fd_p_ce_handle_newCER(&params->cer, peer, &params->cnx, params->validate), goto psm_end );
762
763 /* Cleanup if needed */
764 if (params->cnx) {
765 fd_cnx_destroy(params->cnx);
766 params->cnx = NULL;
767 }
768 if (params->cer) {
769 CHECK_FCT_DO( fd_msg_free(params->cer), );
770 params->cer = NULL;
771 }
772
773 /* Loop */
774 free(ev_data);
775 goto psm_loop;
776 }
777
778 /* A new connection has been established with the remote peer */
779 if (event == FDEVP_CNX_ESTABLISHED) {
780 struct cnxctx * cnx = ev_data;
781
782 /* Release the resources of the connecting thread */
783 CHECK_POSIX_DO( pthread_join( peer->p_ini_thr, NULL), /* ignore, it is not a big deal */);
784 peer->p_ini_thr = (pthread_t)NULL;
785
786 switch (cur_state) {
787 case STATE_WAITCNXACK_ELEC:
788 case STATE_WAITCNXACK:
789 LOG_D("%s: Connection established, %s", peer->p_hdr.info.pi_diamid, fd_cnx_getid(cnx));
790 fd_p_ce_handle_newcnx(peer, cnx);
791 break;
792
793 default:
794 /* Just abort the attempt and continue */
795 TRACE_DEBUG(FULL, "Connection attempt successful but current state is %s, closing... (too slow?)", STATE_STR(cur_state));
796 fd_cnx_destroy(cnx);
797 }
798
799 goto psm_loop;
800 }
801
802 /* A new connection has not been established with the remote peer */
803 if (event == FDEVP_CNX_FAILED) {
804
805 /* Release the resources of the connecting thread */
806 CHECK_POSIX_DO( pthread_join( peer->p_ini_thr, NULL), /* ignore, it is not a big deal */);
807 peer->p_ini_thr = (pthread_t)NULL;
808
809 switch (cur_state) {
810 case STATE_WAITCNXACK_ELEC:
811 /* Abort the initiating side */
812 fd_p_cnx_abort(peer, 0);
813 /* Process the receiver side */
814 CHECK_FCT_DO( fd_p_ce_process_receiver(peer), goto psm_end );
815 break;
816
817 case STATE_WAITCNXACK:
818 /* Go back to CLOSE */
819 fd_psm_next_timeout(peer, 1, peer->p_hdr.info.config.pic_tctimer ?: fd_g_config->cnf_timer_tc);
820 goto psm_reset;
821
822 default:
823 /* Just ignore */
824 TRACE_DEBUG(FULL, "Connection attempt failed but current state is %s, ignoring...", STATE_STR(cur_state));
825 }
826
827 goto psm_loop;
828 }
829
830 /* The timeout for the current state has been reached */
831 if (event == FDEVP_PSM_TIMEOUT) {
832 switch (cur_state) {
833 case STATE_OPEN:
834 case STATE_REOPEN:
835 case STATE_OPEN_NEW:
836 CHECK_FCT_DO( fd_p_dw_timeout(peer), goto psm_end );
837 goto psm_loop;
838
839 case STATE_CLOSED:
840 LOG_D("%s: Connecting...", peer->p_hdr.info.pi_diamid);
841 CHECK_FCT_DO( fd_psm_change_state(peer, STATE_WAITCNXACK), goto psm_end );
842 fd_psm_next_timeout(peer, 0, CNX_TIMEOUT);
843 CHECK_FCT_DO( fd_p_cnx_init(peer), goto psm_end );
844 goto psm_loop;
845
846 case STATE_SUSPECT:
847 /* Mark the connection problem */
848 peer->p_flags.pf_cnx_pb = 1;
849 case STATE_WAITCNXACK:
850 case STATE_WAITCEA:
851 fd_hook_call(HOOK_PEER_CONNECT_FAILED, NULL, peer, "Timeout while waiting for remote peer", NULL);
852 case STATE_CLOSING:
853 /* Destroy the connection, restart the timer to a new connection attempt */
854 fd_psm_next_timeout(peer, 1, peer->p_hdr.info.config.pic_tctimer ?: fd_g_config->cnf_timer_tc);
855 goto psm_reset;
856
857 case STATE_CLOSING_GRACE:
858 /* The grace period is completed, now close */
859 if (peer->p_flags.pf_localterm)
860 goto psm_end;
861
862 fd_psm_cleanup(peer, 0);
863 /* Reset the timer for next connection attempt */
864 fd_psm_next_timeout(peer, 1, fd_p_dp_newdelay(peer));
865 goto psm_loop;
866
867 case STATE_WAITCNXACK_ELEC:
868 /* Abort the initiating side */
869 fd_p_cnx_abort(peer, 0);
870 /* Process the receiver side */
871 CHECK_FCT_DO( fd_p_ce_process_receiver(peer), goto psm_end );
872 goto psm_loop;
873
874 default:
875 ASSERT(0); /* implementation problem, we did not foresee this case? */
876 }
877 }
878
879 /* Default action : the handling has not yet been implemented. [for debug only] */
880 TRACE_DEBUG(INFO, "Missing handler in PSM for '%s'\t<-- '%s'", STATE_STR(cur_state), fd_pev_str(event));
881psm_reset:
882 if (peer->p_flags.pf_delete)
883 goto psm_end;
884 fd_psm_cleanup(peer, 0);
885 goto psm_loop;
886
887psm_end:
888 LOG_E("%s: Going to ZOMBIE state (no more activity)", peer->p_hdr.info.pi_diamid);
889 fd_psm_cleanup(peer, 1);
890 TRACE_DEBUG(INFO, "'%s'\t-> 'STATE_ZOMBIE' (terminated)\t'%s'",
891 STATE_STR(fd_peer_getstate(peer)),
892 peer->p_hdr.info.pi_diamid);
893 pthread_cleanup_pop(1); /* set STATE_ZOMBIE */
894 peer->p_psm = (pthread_t)NULL;
895 pthread_detach(pthread_self());
896 return NULL;
897}
898
899
900/************************************************************************/
901/* Functions to control the PSM */
902/************************************************************************/
903/* Create the PSM thread of one peer structure */
904int fd_psm_begin(struct fd_peer * peer )
905{
906 TRACE_ENTRY("%p", peer);
907
908 /* Check the peer and state are OK */
909 CHECK_PARAMS( fd_peer_getstate(peer) == STATE_NEW );
910
911 /* Create the FIFO for events */
912 CHECK_FCT( fd_fifo_new(&peer->p_events, 0) );
913
914 /* Create the PSM controler thread */
915 CHECK_POSIX( pthread_create( &peer->p_psm, NULL, p_psm_th, peer ) );
916
917 /* We're done */
918 return 0;
919}
920
921/* End the PSM (clean ending) */
922int fd_psm_terminate(struct fd_peer * peer, char * reason )
923{
924 TRACE_ENTRY("%p", peer);
925 CHECK_PARAMS( CHECK_PEER(peer) );
926
927 if (fd_peer_getstate(peer) != STATE_ZOMBIE) {
928 CHECK_FCT( fd_event_send(peer->p_events, FDEVP_TERMINATE, 0, reason) );
929 } else {
930 TRACE_DEBUG(FULL, "Peer '%s' was already terminated", peer->p_hdr.info.pi_diamid);
931 }
932 return 0;
933}
934
935/* End the PSM & cleanup the peer structure */
936void fd_psm_abord(struct fd_peer * peer )
937{
938 TRACE_ENTRY("%p", peer);
939
940 /* Cancel PSM thread */
941 CHECK_FCT_DO( fd_thr_term(&peer->p_psm), /* continue */ );
942
943 /* Cleanup the data */
944 fd_psm_cleanup(peer, 1);
945
946 /* Destroy the event list */
947 CHECK_FCT_DO( fd_fifo_del(&peer->p_events), /* continue */ );
948
949 /* Remaining cleanups are performed in fd_peer_free */
950 return;
951}
952