Blame - libfdcore/p_psm.c - freeDiameter-old

blob: 2a30c5c676c285c23ba6dd4af372f7958eeaea25 [file] [log] [blame]

Brian Waters	13d9601	2017-12-08 16:53:31 -0600	[diff] [blame]	1	/*********************************************************************************************************
				2	* Software License Agreement (BSD License) *
				3	* Author: Sebastien Decugis <sdecugis@freediameter.net> *
				4	* *
				5	* Copyright (c) 2013, WIDE Project and NICT *
				6	* All rights reserved. *
				7	* *
				8	* Redistribution and use of this software in source and binary forms, with or without modification, are *
				9	* permitted provided that the following conditions are met: *
				10	* *
				11	* * Redistributions of source code must retain the above *
				12	* copyright notice, this list of conditions and the *
				13	* following disclaimer. *
				14	* *
				15	* * Redistributions in binary form must reproduce the above *
				16	* copyright notice, this list of conditions and the *
				17	* following disclaimer in the documentation and/or other *
				18	* materials provided with the distribution. *
				19	* *
				20	* * Neither the name of the WIDE Project or NICT nor the *
				21	* names of its contributors may be used to endorse or *
				22	* promote products derived from this software without *
				23	* specific prior written permission of WIDE Project and *
				24	* NICT. *
				25	* *
				26	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED *
				27	* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A *
				28	* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
				29	* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT *
				30	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
				31	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR *
				32	* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF *
				33	* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
				34	*********************************************************************************************************/
				35
				36	#include "fdcore-internal.h"
				37
				38	/*
				39	This file implement a Peer State Machine which is a mix of:
				40	- the state machine described in rfc3588bis
				41	- the state machine described in rfc3539#section-3.4
				42	- the following observations.
				43
				44	The delivery of Diameter messages must not always be unordered: order is important at
				45	begining and end of a connection lifetime. It means we need agility to
				46	switch between "ordering enforced" and "ordering not enforced to counter
				47	Head of the Line Blocking" modes of operation.
				48
				49	The connection state machine represented in RFC3588 (and RFC6733) is
				50	incomplete, because it lacks the SUSPECT state and the 3 DWR/DWA
				51	exchanges (section 5.1) when the peer recovers from this state.
				52	Personnally I don't see the rationale for exchanging 3 messages (why 3?)
				53	but, if we require at least 1 DWR/DWA exchange to be always performed
				54	after the CER/CEA exchange (and initiated by the peer that sent the
				55	CEA), we have a simple way to deal with our ordering problem, as resumed
				56	below. Peers are: [i]nitiator, [r]esponder.
				57	(1) [i] SCTP connection attempt.
				58	(2) [r] accept the connection.
				59	(3) [i,r] (if secure port) DTLS handshake, close on failure.
				60	(4) [i] Send CER
				61	(5) [r] Receive CER, send CEA using stream 0, flag "unordered" cleared.
				62	[r] Immediately send a DWR after the CEA, also using stream 0,
				63	flag "unordered" cleared.
				64	[r] Move to STATE_OPEN_NEW state -- equivalent to OPEN except
				65	that all messages are sent ordered at the moment.
				66	(6) [i] receive CEA, move to OPEN state. All messages can be sent
				67	unordered in OPEN state.
				68	[i] As per normal operation, reply with DWA to the DWR.
				69	(7) [r] Upon reception of the DWA, move to OPEN state, messages can be
				70	sent unordered from this point.
				71
				72	Note about (5) and (6): if the Diameter Identity received in CER or CEA
				73	does not match the credentials from the certificate presented during
				74	TLS handshake, we may need to specify a path of clean disconnection
				75	(not blocking the remote peer waiting for something).
				76
				77	This proposed mechanism removes the problem of application messages
				78	received before the CEA by the initiator. Note that if the "old" inband
				79	TLS handshake is used, this handshake plays the same synchronization
				80	role than the new DWR/DWA, which becomes useless.
				81
				82
				83	The other time where ordering is important is by the end of connection
				84	lifetime, when one peer is shutting down the link for some reason
				85	(reboot, overload, no activity, etc...). In case of unordered delivery,
				86	we may have:
				87	- peer A sends an application message followed by a DPR. Peer B receives
				88	the DPR first and tears down the connection. Application message is lost.
				89	- Peer B sends an application message, then receives a DPR and answers a
				90	DPA. Peer A receives the DPA before the application message. The
				91	application message is lost.
				92
				93	This situation is actually happening easily because DPR/DPA messages are
				94	very short, while application messages can be quite large. Therefore,
				95	they require much more time to deliver.
				96
				97	I really cannot see a way to counter this effect by using the ordering
				98	of the messages, except by applying a timer (state STATE_CLOSING_GRACE).
				99	This timer can be also useful when we detect that some messages has not
				100	yet received an answer on this link, to give time to the application to
				101	complete the exchange ongoing.
				102
				103	However, this problem must be balanced with the fact that the message
				104	that is lost will be in many cases sent again as the failover mechanism
				105	specifies.
				106	*/
				107
				108	/* The actual declaration of peer_state_str */
				109	DECLARE_STATE_STR();
				110
				111	/* Helper for next macro */
				112	#define case_str( _val ) \
				113	case _val : return #_val
				114
				115	DECLARE_PEV_STR();
				116
				117	/************************************************************************/
				118	/* Delayed startup */
				119	/************************************************************************/
				120	static int started = 0;
				121	static pthread_mutex_t started_mtx = PTHREAD_MUTEX_INITIALIZER;
				122	static pthread_cond_t started_cnd = PTHREAD_COND_INITIALIZER;
				123
				124	/* Wait for start signal */
				125	static int fd_psm_waitstart()
				126	{
				127	int ret = 0;
				128	TRACE_ENTRY("");
				129	CHECK_POSIX( pthread_mutex_lock(&started_mtx) );
				130	awake:
				131	if (!ret && !started) {
				132	pthread_cleanup_push( fd_cleanup_mutex, &started_mtx );
				133	CHECK_POSIX_DO( ret = pthread_cond_wait(&started_cnd, &started_mtx), );
				134	pthread_cleanup_pop( 0 );
				135	goto awake;
				136	}
				137	CHECK_POSIX( pthread_mutex_unlock(&started_mtx) );
				138	return ret;
				139	}
				140
				141	/* Allow the state machines to start */
				142	int fd_psm_start()
				143	{
				144	TRACE_ENTRY("");
				145	CHECK_POSIX( pthread_mutex_lock(&started_mtx) );
				146	started = 1;
				147	CHECK_POSIX( pthread_cond_broadcast(&started_cnd) );
				148	CHECK_POSIX( pthread_mutex_unlock(&started_mtx) );
				149	return 0;
				150	}
				151
				152
				153	/************************************************************************/
				154	/* Manage the list of active peers */
				155	/************************************************************************/
				156
				157	/* Enter/leave OPEN state */
				158	static int enter_open_state(struct fd_peer * peer)
				159	{
				160	struct fd_list * li;
				161	CHECK_PARAMS( FD_IS_LIST_EMPTY(&peer->p_actives) );
				162
				163	/* Callback registered by the credential validator (fd_peer_validate_register) */
				164	if (peer->p_cb2) {
				165	CHECK_FCT_DO( (*peer->p_cb2)(&peer->p_hdr.info),
				166	{
				167	TRACE_DEBUG(FULL, "Validation failed, terminating the connection");
				168	fd_psm_terminate(peer, "DO_NOT_WANT_TO_TALK_TO_YOU" );
				169	} );
				170	peer->p_cb2 = NULL;
				171	return 0;
				172	}
				173
				174	/* Insert in the active peers list */
				175	CHECK_POSIX( pthread_rwlock_wrlock(&fd_g_activ_peers_rw) );
				176	for (li = fd_g_activ_peers.next; li != &fd_g_activ_peers; li = li->next) {
				177	struct fd_peer * next_p = (struct fd_peer *)li->o;
				178	int cmp = fd_os_cmp(peer->p_hdr.info.pi_diamid, peer->p_hdr.info.pi_diamidlen,
				179	next_p->p_hdr.info.pi_diamid, next_p->p_hdr.info.pi_diamidlen);
				180	if (cmp < 0)
				181	break;
				182	}
				183	fd_list_insert_before(li, &peer->p_actives);
				184	CHECK_POSIX( pthread_rwlock_unlock(&fd_g_activ_peers_rw) );
				185
				186	/* Callback registered when the peer was added, by fd_peer_add */
				187	if (peer->p_cb) {
				188	TRACE_DEBUG(FULL, "Calling add callback for peer %s", peer->p_hdr.info.pi_diamid);
				189	(peer->p_cb)(&peer->p_hdr.info, peer->p_cb_data); / TODO: do this in a separate detached thread? */
				190	peer->p_cb = NULL;
				191	peer->p_cb_data = NULL;
				192	}
				193
				194	/* Start the thread to handle outgoing messages */
				195	CHECK_FCT( fd_out_start(peer) );
				196
				197	/* Update the expiry timer now */
				198	CHECK_FCT( fd_p_expi_update(peer) );
				199
				200	return 0;
				201	}
				202	static int leave_open_state(struct fd_peer * peer, int skip_failover)
				203	{
				204	/* Remove from active peers list */
				205	CHECK_POSIX( pthread_rwlock_wrlock(&fd_g_activ_peers_rw) );
				206	fd_list_unlink( &peer->p_actives );
				207	CHECK_POSIX( pthread_rwlock_unlock(&fd_g_activ_peers_rw) );
				208
				209	/* Stop the "out" thread */
				210	CHECK_FCT( fd_out_stop(peer) );
				211
				212	/* Failover the messages */
				213	if (!skip_failover) {
				214	fd_peer_failover_msg(peer);
				215	}
				216
				217	return 0;
				218	}
				219
				220
				221	/************************************************************************/
				222	/* Helpers for state changes */
				223	/************************************************************************/
				224
				225	/* Cleanup pending events in the peer */
				226	void fd_psm_events_free(struct fd_peer * peer)
				227	{
				228	struct fd_event * ev;
				229	/* Purge all events, and free the associated data if any */
				230	while (fd_fifo_tryget( peer->p_events, &ev ) == 0) {
				231	switch (ev->code) {
				232	case FDEVP_CNX_ESTABLISHED: {
				233	fd_cnx_destroy(ev->data);
				234	}
				235	break;
				236
				237	case FDEVP_TERMINATE:
				238	/* Do not free the string since it is a constant */
				239	break;
				240
				241	case FDEVP_CNX_INCOMING: {
				242	struct cnx_incoming * evd = ev->data;
				243	fd_hook_call(HOOK_MESSAGE_DROPPED, evd->cer, NULL, "Message discarded while cleaning peer state machine queue.", fd_msg_pmdl_get(evd->cer));
				244	CHECK_FCT_DO( fd_msg_free(evd->cer), /* continue */);
				245	fd_cnx_destroy(evd->cnx);
				246	}
				247	default:
				248	free(ev->data);
				249	}
				250	free(ev);
				251	}
				252	}
				253
				254	/* Read state */
				255	int fd_peer_get_state(struct peer_hdr *peer)
				256	{
				257	int ret;
				258
				259	struct fd_peer * p = (struct fd_peer *)peer;
				260
				261	if (!CHECK_PEER(p))
				262	return -1;
				263
				264	CHECK_POSIX_DO( pthread_mutex_lock(&p->p_state_mtx), return -1 );
				265	ret = p->p_state;
				266	CHECK_POSIX_DO( pthread_mutex_unlock(&p->p_state_mtx), return -1 );
				267
				268	return ret;
				269	}
				270
				271
				272	/* Change state */
				273	int fd_psm_change_state(struct fd_peer * peer, int new_state)
				274	{
				275	int old;
				276
				277	TRACE_ENTRY("%p %d(%s)", peer, new_state, STATE_STR(new_state));
				278	CHECK_PARAMS( CHECK_PEER(peer) );
				279
				280	old = fd_peer_getstate(peer);
				281	if (old == new_state)
				282	return 0;
				283
				284	LOG(((old == STATE_OPEN) \|\| (new_state == STATE_OPEN)) ? FD_LOG_NOTICE : FD_LOG_DEBUG, "'%s'\t-> '%s'\t'%s'",
				285	STATE_STR(old),
				286	STATE_STR(new_state),
				287	peer->p_hdr.info.pi_diamid);
				288
				289
				290	CHECK_POSIX( pthread_mutex_lock(&peer->p_state_mtx) );
				291	peer->p_state = new_state;
				292	CHECK_POSIX( pthread_mutex_unlock(&peer->p_state_mtx) );
				293
				294	if (old == STATE_OPEN) {
				295	CHECK_FCT( leave_open_state(peer, new_state == STATE_CLOSING_GRACE) );
				296	}
				297	if (old == STATE_CLOSING_GRACE) {
				298	fd_peer_failover_msg(peer);
				299	}
				300
				301	if (new_state == STATE_OPEN) {
				302	CHECK_FCT( enter_open_state(peer) );
				303	}
				304
				305	if (new_state == STATE_CLOSED) {
				306	/* Purge event list */
				307	fd_psm_events_free(peer);
				308
				309	/* Reset the counter of pending anwers to send */
				310	peer->p_reqin_count = 0;
				311
				312	/* If the peer is not persistant, we destroy it */
				313	if (peer->p_hdr.info.config.pic_flags.persist == PI_PRST_NONE) {
				314	CHECK_FCT( fd_event_send(peer->p_events, FDEVP_TERMINATE, 0, NULL) );
				315	}
				316	}
				317
				318	return 0;
				319	}
				320
				321	/* Set timeout timer of next event */
				322	void fd_psm_next_timeout(struct fd_peer * peer, int add_random, int delay)
				323	{
				324	TRACE_DEBUG(FULL, "Peer timeout reset to %d seconds%s", delay, add_random ? " (+/- 2)" : "" );
				325
				326	/* Initialize the timer */
				327	CHECK_POSIX_DO( clock_gettime( CLOCK_REALTIME, &peer->p_psm_timer ), ASSERT(0) );
				328
				329	if (add_random) {
				330	if (delay > 2)
				331	delay -= 2;
				332	else
				333	delay = 0;
				334
				335	/* Add a random value between 0 and 4sec */
				336	peer->p_psm_timer.tv_sec += random() % 4;
				337	peer->p_psm_timer.tv_nsec+= random() % 1000000000L;
				338	if (peer->p_psm_timer.tv_nsec >= 1000000000L) {
				339	peer->p_psm_timer.tv_nsec -= 1000000000L;
				340	peer->p_psm_timer.tv_sec ++;
				341	}
				342	}
				343
				344	peer->p_psm_timer.tv_sec += delay;
				345
				346	#ifdef SLOW_PSM
				347	/* temporary for debug */
				348	peer->p_psm_timer.tv_sec += 10;
				349	#endif
				350	}
				351
				352	/* Cleanup the peer */
				353	void fd_psm_cleanup(struct fd_peer * peer, int terminate)
				354	{
				355	/* Move to CLOSED state: failover messages, stop OUT thread, unlink peer from active list */
				356	if (fd_peer_getstate(peer) != STATE_ZOMBIE) {
				357	CHECK_FCT_DO( fd_psm_change_state(peer, STATE_CLOSED), /* continue */ );
				358	}
				359
				360	fd_p_cnx_abort(peer, terminate);
				361
				362	fd_p_ce_clear_cnx(peer, NULL);
				363
				364	if (peer->p_receiver) {
				365	fd_cnx_destroy(peer->p_receiver);
				366	peer->p_receiver = NULL;
				367	}
				368
				369	if (terminate) {
				370	fd_psm_events_free(peer);
				371	CHECK_FCT_DO( fd_fifo_del(&peer->p_events), /* continue */ );
				372	}
				373
				374	}
				375
				376
				377	/************************************************************************/
				378	/* The PSM thread */
				379	/************************************************************************/
				380	/* Cancelation cleanup : set ZOMBIE state in the peer */
				381	void cleanup_setstate(void * arg)
				382	{
				383	struct fd_peer * peer = (struct fd_peer *)arg;
				384	CHECK_PARAMS_DO( CHECK_PEER(peer), return );
				385	CHECK_POSIX_DO( pthread_mutex_lock(&peer->p_state_mtx), );
				386	peer->p_state = STATE_ZOMBIE;
				387	CHECK_POSIX_DO( pthread_mutex_unlock(&peer->p_state_mtx), );
				388	return;
				389	}
				390
				391	/* The state machine thread (controler) */
				392	static void * p_psm_th( void * arg )
				393	{
				394	struct fd_peer * peer = (struct fd_peer *)arg;
				395	int created_started = started ? 1 : 0;
				396	int event;
				397	size_t ev_sz;
				398	void * ev_data;
				399	int cur_state;
				400
				401	CHECK_PARAMS_DO( CHECK_PEER(peer), ASSERT(0) );
				402
				403	pthread_cleanup_push( cleanup_setstate, arg );
				404
				405	/* Set the thread name */
				406	{
				407	char buf[48];
				408	snprintf(buf, sizeof(buf), "PSM/%s", peer->p_hdr.info.pi_diamid);
				409	fd_log_threadname ( buf );
				410	}
				411
				412	/* The state machine starts in CLOSED state */
				413	CHECK_POSIX_DO( pthread_mutex_lock(&peer->p_state_mtx), goto psm_end );
				414	peer->p_state = STATE_CLOSED;
				415	CHECK_POSIX_DO( pthread_mutex_unlock(&peer->p_state_mtx), goto psm_end );
				416
				417	/* Wait that the PSM are authorized to start in the daemon */
				418	CHECK_FCT_DO( fd_psm_waitstart(), goto psm_end );
				419
				420	/* Initialize the timer */
				421	if (peer->p_flags.pf_responder) {
				422	fd_psm_next_timeout(peer, 0, INCNX_TIMEOUT);
				423	} else {
				424	fd_psm_next_timeout(peer, created_started, 0);
				425	}
				426
				427	psm_loop:
				428	/* Get next event */
				429	TRACE_DEBUG(FULL, "'%s' in state '%s' waiting for next event.",
				430	peer->p_hdr.info.pi_diamid, STATE_STR(fd_peer_getstate(peer)));
				431	CHECK_FCT_DO( fd_event_timedget(peer->p_events, &peer->p_psm_timer, FDEVP_PSM_TIMEOUT, &event, &ev_sz, &ev_data), goto psm_end );
				432
				433	cur_state = fd_peer_getstate(peer);
				434	if (cur_state == -1)
				435	goto psm_end;
				436
				437	TRACE_DEBUG(FULL, "'%s'\t<-- '%s'\t(%p,%zd)\t'%s'",
				438	STATE_STR(cur_state),
				439	fd_pev_str(event), ev_data, ev_sz,
				440	peer->p_hdr.info.pi_diamid);
				441
				442	/* Now, the action depends on the current state and the incoming event */
				443
				444	/* The following states are impossible */
				445	ASSERT( cur_state != STATE_NEW );
				446	ASSERT( cur_state != STATE_ZOMBIE );
				447	ASSERT( cur_state != STATE_OPEN_HANDSHAKE ); /* because it should exist only between two loops */
				448
				449	/* Purge invalid events */
				450	if (!CHECK_PEVENT(event)) {
				451	TRACE_DEBUG(INFO, "Invalid event received in PSM '%s' : %d", peer->p_hdr.info.pi_diamid, event);
				452	ASSERT(0); /* we should investigate this situation */
				453	goto psm_loop;
				454	}
				455
				456	/* Requests to terminate the peer object */
				457	if (event == FDEVP_TERMINATE) {
				458	switch (cur_state) {
				459	case STATE_OPEN:
				460	case STATE_OPEN_NEW:
				461	case STATE_REOPEN:
				462	/* We cannot just close the connection, we have to send a DPR first */
				463	CHECK_FCT_DO( fd_p_dp_initiate(peer, ev_data), goto psm_end );
				464	goto psm_loop;
				465
				466	/*
				467	case STATE_CLOSING:
				468	case STATE_CLOSING_GRACE:
				469	case STATE_WAITCNXACK:
				470	case STATE_WAITCNXACK_ELEC:
				471	case STATE_WAITCEA:
				472	case STATE_SUSPECT:
				473	case STATE_CLOSED:
				474	*/
				475	default:
				476	/* In these cases, we just cleanup the peer object (if needed) and terminate */
				477	goto psm_end;
				478	}
				479	}
				480
				481	/* A message was received */
				482	if (event == FDEVP_CNX_MSG_RECV) {
				483	struct msg * msg = NULL;
				484	struct msg_hdr * hdr;
				485	struct fd_cnx_rcvdata rcv_data;
				486	struct fd_msg_pmdl * pmdl = NULL;
				487
				488	rcv_data.buffer = ev_data;
				489	rcv_data.length = ev_sz;
				490	pmdl = fd_msg_pmdl_get_inbuf(rcv_data.buffer, rcv_data.length);
				491
				492	/* Parse the received buffer */
				493	CHECK_FCT_DO( fd_msg_parse_buffer( (void *)&ev_data, ev_sz, &msg),
				494	{
				495	fd_hook_call(HOOK_MESSAGE_PARSING_ERROR, NULL, peer, &rcv_data, pmdl );
				496	free(ev_data);
				497	CHECK_FCT_DO( fd_event_send(peer->p_events, FDEVP_CNX_ERROR, 0, NULL), goto psm_reset );
				498	goto psm_loop;
				499	} );
				500
				501	fd_hook_associate(msg, pmdl);
				502	CHECK_FCT_DO( fd_msg_source_set( msg, peer->p_hdr.info.pi_diamid, peer->p_hdr.info.pi_diamidlen), goto psm_end);
				503
				504	/* If the current state does not allow receiving messages, just drop it */
				505	if (cur_state == STATE_CLOSED) {
				506	/* In such case, just discard the message */
				507	fd_hook_call(HOOK_MESSAGE_DROPPED, msg, peer, "Message purged from queue, peer in CLOSED state", fd_msg_pmdl_get(msg));
				508	fd_msg_free(msg);
				509	goto psm_loop;
				510	}
				511
				512	/* Extract the header */
				513	CHECK_FCT_DO( fd_msg_hdr(msg, &hdr), goto psm_end );
				514
				515	/* If it is an answer, associate with the request or drop */
				516	if (!(hdr->msg_flags & CMD_FLAG_REQUEST)) {
				517	struct msg * req;
				518	/* Search matching request (same hbhid) */
				519	CHECK_FCT_DO( fd_p_sr_fetch(&peer->p_sr, hdr->msg_hbhid, &req), goto psm_end );
				520	if (req == NULL) {
				521	fd_hook_call(HOOK_MESSAGE_DROPPED, msg, peer, "Answer received with no corresponding sent request.", fd_msg_pmdl_get(msg));
				522	fd_msg_free(msg);
				523	goto psm_loop;
				524	}
				525
				526	/* Associate */
				527	CHECK_FCT_DO( fd_msg_answ_associate( msg, req ), goto psm_end );
				528
				529	}
				530
				531	/* Log incoming message */
				532	fd_hook_call(HOOK_MESSAGE_RECEIVED, msg, peer, NULL, fd_msg_pmdl_get(msg));
				533
				534	if (cur_state == STATE_OPEN_NEW) {
				535	/* OK, we have received something, so the connection is supposedly now in OPEN state at the remote site */
				536	fd_psm_change_state(peer, STATE_OPEN );
				537	}
				538
				539	/* Now handle non-link-local messages */
				540	if (fd_msg_is_routable(msg)) {
				541	switch (cur_state) {
				542	/* To maximize compatibility -- should not be a security issue here */
				543	case STATE_REOPEN:
				544	case STATE_SUSPECT:
				545	case STATE_CLOSING:
				546	case STATE_CLOSING_GRACE:
				547	TRACE_DEBUG(FULL, "Accepted a message while not in OPEN state... ");
				548	/* The standard situation : */
				549	case STATE_OPEN_NEW:
				550	case STATE_OPEN:
				551	/* We received a valid routable message, update the expiry timer */
				552	CHECK_FCT_DO( fd_p_expi_update(peer), goto psm_end );
				553
				554	/* Set the message source and add the Route-Record */
				555	CHECK_FCT_DO( fd_msg_source_setrr( msg, peer->p_hdr.info.pi_diamid, peer->p_hdr.info.pi_diamidlen, fd_g_config->cnf_dict ), goto psm_end);
				556
				557	if ((hdr->msg_flags & CMD_FLAG_REQUEST)) {
				558	/* Mark the incoming request so that we know we have pending answers for this peer */
				559	CHECK_POSIX_DO( pthread_mutex_lock(&peer->p_state_mtx), goto psm_end );
				560	peer->p_reqin_count++;
				561	CHECK_POSIX_DO( pthread_mutex_unlock(&peer->p_state_mtx), goto psm_end );
				562	}
				563
				564	/* Requeue to the global incoming queue */
				565	CHECK_FCT_DO(fd_fifo_post(fd_g_incoming, &msg), goto psm_end );
				566
				567	/* Update the peer timer (only in OPEN state) */
				568	if ((cur_state == STATE_OPEN) && (!peer->p_flags.pf_dw_pending)) {
				569	fd_psm_next_timeout(peer, 1, peer->p_hdr.info.config.pic_twtimer ?: fd_g_config->cnf_timer_tw);
				570	}
				571	break;
				572
				573	/* In other states, we discard the message, it is either old or invalid to send it for the remote peer */
				574	case STATE_WAITCNXACK:
				575	case STATE_WAITCNXACK_ELEC:
				576	case STATE_WAITCEA:
				577	case STATE_CLOSED:
				578	default: {
				579	/* In such case, just discard the message */
				580	char buf[128];
				581	snprintf(buf, sizeof(buf), "Received while peer state machine was in state %s.", STATE_STR(cur_state));
				582	fd_hook_call(HOOK_MESSAGE_DROPPED, msg, peer, buf, fd_msg_pmdl_get(msg));
				583	fd_msg_free(msg);
				584	}
				585	}
				586	goto psm_loop;
				587	}
				588
				589	/* Link-local message: They must be understood by our dictionary, otherwise we return an error */
				590	{
				591	struct msg * error = NULL;
				592	int ret = fd_msg_parse_or_error( &msg, &error );
				593	if (ret != EBADMSG) {
				594	CHECK_FCT_DO( ret,
				595	{
				596	char buf[256];
				597	snprintf(buf, sizeof(buf), "%s: An unexpected error occurred while parsing a link-local message", peer->p_hdr.info.pi_diamid);
				598	fd_hook_call(HOOK_MESSAGE_DROPPED, msg, peer, buf, fd_msg_pmdl_get(msg));
				599	fd_msg_free(msg);
				600	goto psm_end;
				601	} );
				602	} else {
				603	if (msg == NULL) {
				604	/* Send the error back to the peer */
				605	CHECK_FCT_DO( ret = fd_out_send(&error, NULL, peer, 0), );
				606	if (error) {
				607	char buf[256];
				608	/* Only if an error occurred & the message was not saved / dumped */
				609	snprintf(buf, sizeof(buf), "%s: error sending a message", peer->p_hdr.info.pi_diamid);
				610	fd_hook_call(HOOK_MESSAGE_DROPPED, error, peer, buf, fd_msg_pmdl_get(error));
				611	CHECK_FCT_DO( fd_msg_free(error), goto psm_end);
				612	}
				613	} else {
				614	char buf[256];
				615	/* We received an invalid answer, let's disconnect */
				616	snprintf(buf, sizeof(buf), "%s: Received invalid answer to Base protocol message, disconnecting...", peer->p_hdr.info.pi_diamid);
				617	fd_hook_call(HOOK_MESSAGE_DROPPED, msg, peer, buf, fd_msg_pmdl_get(msg));
				618	CHECK_FCT_DO( fd_msg_free(msg), goto psm_end);
				619	CHECK_FCT_DO( fd_event_send(peer->p_events, FDEVP_CNX_ERROR, 0, NULL), goto psm_reset );
				620	}
				621	goto psm_loop;
				622	}
				623	}
				624
				625	/* Handle the LL message and update the expiry timer appropriately */
				626	switch (hdr->msg_code) {
				627	case CC_CAPABILITIES_EXCHANGE:
				628	CHECK_FCT_DO( fd_p_ce_msgrcv(&msg, (hdr->msg_flags & CMD_FLAG_REQUEST), peer),
				629	{
				630	if (msg)
				631	CHECK_FCT_DO( fd_msg_free(msg), );
				632	goto psm_reset;
				633	} );
				634	break;
				635
				636	case CC_DISCONNECT_PEER:
				637	CHECK_FCT_DO( fd_p_dp_handle(&msg, (hdr->msg_flags & CMD_FLAG_REQUEST), peer), goto psm_reset );
				638	if (fd_peer_getstate(peer) == STATE_CLOSING)
				639	goto psm_end;
				640
				641	break;
				642
				643	case CC_DEVICE_WATCHDOG:
				644	CHECK_FCT_DO( fd_p_dw_handle(&msg, (hdr->msg_flags & CMD_FLAG_REQUEST), peer), goto psm_reset );
				645	break;
				646
				647	default:
				648	/* Unknown / unexpected / invalid message -- but validated by our dictionary */
				649	TRACE_DEBUG(INFO, "Invalid non-routable command received: %u.", hdr->msg_code);
				650	if (hdr->msg_flags & CMD_FLAG_REQUEST) {
				651	do {
				652	/* Reply with an error code */
				653	CHECK_FCT_DO( fd_msg_new_answer_from_req ( fd_g_config->cnf_dict, &msg, MSGFL_ANSW_ERROR ), break );
				654
				655	/* Set the error code */
				656	CHECK_FCT_DO( fd_msg_rescode_set(msg, "DIAMETER_COMMAND_UNSUPPORTED", "Or maybe the P-bit or application Id are erroneous.", NULL, 1 ), break );
				657
				658	/* Send the answer */
				659	CHECK_FCT_DO( fd_out_send(&msg, peer->p_cnxctx, peer, 0), break );
				660	} while (0);
				661	} else {
				662	/* We did ASK for it ??? */
				663	TRACE_DEBUG(INFO, "Received answer with erroneous 'is_routable' result...");
				664	}
				665
				666	/* Cleanup the message if not done */
				667	if (msg) {
				668	char buf[256];
				669	snprintf(buf, sizeof(buf), "Received un-handled non-routable command from peer '%s'.", peer->p_hdr.info.pi_diamid);
				670	fd_hook_call(HOOK_MESSAGE_DROPPED, msg, NULL, buf, fd_msg_pmdl_get(msg));
				671	CHECK_FCT_DO( fd_msg_free(msg), /* continue */);
				672	msg = NULL;
				673	}
				674	};
				675
				676	/* At this point the message must have been fully handled already */
				677	if (msg) {
				678	char buf[256];
				679	snprintf(buf, sizeof(buf), "Internal error ('%s'): unhandled message.", peer->p_hdr.info.pi_diamid);
				680	fd_hook_call(HOOK_MESSAGE_DROPPED, msg, NULL, buf, fd_msg_pmdl_get(msg));
				681	fd_msg_free(msg);
				682	}
				683
				684	goto psm_loop;
				685	}
				686
				687	/* The connection object is broken */
				688	if (event == FDEVP_CNX_ERROR) {
				689	switch (cur_state) {
				690	case STATE_WAITCNXACK_ELEC:
				691	/* Abort the initiating side */
				692	fd_p_cnx_abort(peer, 0);
				693	/* Process the receiver side */
				694	CHECK_FCT_DO( fd_p_ce_process_receiver(peer), goto psm_end );
				695	break;
				696
				697	case STATE_WAITCEA:
				698	case STATE_OPEN:
				699	case STATE_OPEN_NEW:
				700	case STATE_REOPEN:
				701	case STATE_WAITCNXACK:
				702	case STATE_SUSPECT:
				703	default:
				704	/* Mark the connection problem */
				705	peer->p_flags.pf_cnx_pb = 1;
				706
				707	fd_hook_call(HOOK_PEER_CONNECT_FAILED, NULL, peer, "The connection was broken", NULL);
				708
				709	/* Destroy the connection, restart the timer to a new connection attempt */
				710	fd_psm_next_timeout(peer, 1, peer->p_hdr.info.config.pic_tctimer ?: fd_g_config->cnf_timer_tc);
				711
				712	case STATE_CLOSED:
				713	goto psm_reset;
				714
				715	case STATE_CLOSING:
				716	/* We sent a DPR so we are terminating, do not wait for DPA */
				717	goto psm_end;
				718
				719	case STATE_CLOSING_GRACE:
				720	if (peer->p_flags.pf_localterm) /* initiated here */
				721	goto psm_end;
				722
				723	fd_psm_cleanup(peer, 0);
				724
				725	/* Reset the timer for next connection attempt */
				726	fd_psm_next_timeout(peer, 1, fd_p_dp_newdelay(peer));
				727	goto psm_loop;
				728	}
				729	goto psm_loop;
				730	}
				731
				732	/* The connection notified a change in endpoints */
				733	if (event == FDEVP_CNX_EP_CHANGE) {
				734	/* We actually don't care if we are in OPEN state here... */
				735
				736	/* Cleanup the remote LL and primary addresses */
				737	CHECK_FCT_DO( fd_ep_filter( &peer->p_hdr.info.pi_endpoints, EP_FL_CONF \| EP_FL_DISC \| EP_FL_ADV ), /* ignore the error */);
				738	CHECK_FCT_DO( fd_ep_clearflags( &peer->p_hdr.info.pi_endpoints, EP_FL_PRIMARY ), /* ignore the error */);
				739
				740	/* Get the new ones */
				741	CHECK_FCT_DO( fd_cnx_getremoteeps(peer->p_cnxctx, &peer->p_hdr.info.pi_endpoints), /* ignore the error */);
				742
				743	/* We do not support local endpoints change currently, but it could be added here if needed (refresh fd_g_config->cnf_endpoints) */
				744	{
				745	char * buf = NULL;
				746	size_t len = 0;
				747	LOG_D("Got low layer notification (IGNORED): remote endpoint(s) changed: %s", fd_ep_dump(&buf, &len, NULL, 0, 0, &peer->p_hdr.info.pi_endpoints) ?: "error");
				748	free(buf);
				749	}
				750
				751	/* Done */
				752	goto psm_loop;
				753	}
				754
				755	/* A new connection was established and CER containing this peer id was received */
				756	if (event == FDEVP_CNX_INCOMING) {
				757	struct cnx_incoming * params = ev_data;
				758	ASSERT(params);
				759
				760	/* Handle the message */
				761	CHECK_FCT_DO( fd_p_ce_handle_newCER(&params->cer, peer, &params->cnx, params->validate), goto psm_end );
				762
				763	/* Cleanup if needed */
				764	if (params->cnx) {
				765	fd_cnx_destroy(params->cnx);
				766	params->cnx = NULL;
				767	}
				768	if (params->cer) {
				769	CHECK_FCT_DO( fd_msg_free(params->cer), );
				770	params->cer = NULL;
				771	}
				772
				773	/* Loop */
				774	free(ev_data);
				775	goto psm_loop;
				776	}
				777
				778	/* A new connection has been established with the remote peer */
				779	if (event == FDEVP_CNX_ESTABLISHED) {
				780	struct cnxctx * cnx = ev_data;
				781
				782	/* Release the resources of the connecting thread */
				783	CHECK_POSIX_DO( pthread_join( peer->p_ini_thr, NULL), /* ignore, it is not a big deal */);
				784	peer->p_ini_thr = (pthread_t)NULL;
				785
				786	switch (cur_state) {
				787	case STATE_WAITCNXACK_ELEC:
				788	case STATE_WAITCNXACK:
				789	LOG_D("%s: Connection established, %s", peer->p_hdr.info.pi_diamid, fd_cnx_getid(cnx));
				790	fd_p_ce_handle_newcnx(peer, cnx);
				791	break;
				792
				793	default:
				794	/* Just abort the attempt and continue */
				795	TRACE_DEBUG(FULL, "Connection attempt successful but current state is %s, closing... (too slow?)", STATE_STR(cur_state));
				796	fd_cnx_destroy(cnx);
				797	}
				798
				799	goto psm_loop;
				800	}
				801
				802	/* A new connection has not been established with the remote peer */
				803	if (event == FDEVP_CNX_FAILED) {
				804
				805	/* Release the resources of the connecting thread */
				806	CHECK_POSIX_DO( pthread_join( peer->p_ini_thr, NULL), /* ignore, it is not a big deal */);
				807	peer->p_ini_thr = (pthread_t)NULL;
				808
				809	switch (cur_state) {
				810	case STATE_WAITCNXACK_ELEC:
				811	/* Abort the initiating side */
				812	fd_p_cnx_abort(peer, 0);
				813	/* Process the receiver side */
				814	CHECK_FCT_DO( fd_p_ce_process_receiver(peer), goto psm_end );
				815	break;
				816
				817	case STATE_WAITCNXACK:
				818	/* Go back to CLOSE */
				819	fd_psm_next_timeout(peer, 1, peer->p_hdr.info.config.pic_tctimer ?: fd_g_config->cnf_timer_tc);
				820	goto psm_reset;
				821
				822	default:
				823	/* Just ignore */
				824	TRACE_DEBUG(FULL, "Connection attempt failed but current state is %s, ignoring...", STATE_STR(cur_state));
				825	}
				826
				827	goto psm_loop;
				828	}
				829
				830	/* The timeout for the current state has been reached */
				831	if (event == FDEVP_PSM_TIMEOUT) {
				832	switch (cur_state) {
				833	case STATE_OPEN:
				834	case STATE_REOPEN:
				835	case STATE_OPEN_NEW:
				836	CHECK_FCT_DO( fd_p_dw_timeout(peer), goto psm_end );
				837	goto psm_loop;
				838
				839	case STATE_CLOSED:
				840	LOG_D("%s: Connecting...", peer->p_hdr.info.pi_diamid);
				841	CHECK_FCT_DO( fd_psm_change_state(peer, STATE_WAITCNXACK), goto psm_end );
				842	fd_psm_next_timeout(peer, 0, CNX_TIMEOUT);
				843	CHECK_FCT_DO( fd_p_cnx_init(peer), goto psm_end );
				844	goto psm_loop;
				845
				846	case STATE_SUSPECT:
				847	/* Mark the connection problem */
				848	peer->p_flags.pf_cnx_pb = 1;
				849	case STATE_WAITCNXACK:
				850	case STATE_WAITCEA:
				851	fd_hook_call(HOOK_PEER_CONNECT_FAILED, NULL, peer, "Timeout while waiting for remote peer", NULL);
				852	case STATE_CLOSING:
				853	/* Destroy the connection, restart the timer to a new connection attempt */
				854	fd_psm_next_timeout(peer, 1, peer->p_hdr.info.config.pic_tctimer ?: fd_g_config->cnf_timer_tc);
				855	goto psm_reset;
				856
				857	case STATE_CLOSING_GRACE:
				858	/* The grace period is completed, now close */
				859	if (peer->p_flags.pf_localterm)
				860	goto psm_end;
				861
				862	fd_psm_cleanup(peer, 0);
				863	/* Reset the timer for next connection attempt */
				864	fd_psm_next_timeout(peer, 1, fd_p_dp_newdelay(peer));
				865	goto psm_loop;
				866
				867	case STATE_WAITCNXACK_ELEC:
				868	/* Abort the initiating side */
				869	fd_p_cnx_abort(peer, 0);
				870	/* Process the receiver side */
				871	CHECK_FCT_DO( fd_p_ce_process_receiver(peer), goto psm_end );
				872	goto psm_loop;
				873
				874	default:
				875	ASSERT(0); /* implementation problem, we did not foresee this case? */
				876	}
				877	}
				878
				879	/* Default action : the handling has not yet been implemented. [for debug only] */
				880	TRACE_DEBUG(INFO, "Missing handler in PSM for '%s'\t<-- '%s'", STATE_STR(cur_state), fd_pev_str(event));
				881	psm_reset:
				882	if (peer->p_flags.pf_delete)
				883	goto psm_end;
				884	fd_psm_cleanup(peer, 0);
				885	goto psm_loop;
				886
				887	psm_end:
				888	LOG_E("%s: Going to ZOMBIE state (no more activity)", peer->p_hdr.info.pi_diamid);
				889	fd_psm_cleanup(peer, 1);
				890	TRACE_DEBUG(INFO, "'%s'\t-> 'STATE_ZOMBIE' (terminated)\t'%s'",
				891	STATE_STR(fd_peer_getstate(peer)),
				892	peer->p_hdr.info.pi_diamid);
				893	pthread_cleanup_pop(1); /* set STATE_ZOMBIE */
				894	peer->p_psm = (pthread_t)NULL;
				895	pthread_detach(pthread_self());
				896	return NULL;
				897	}
				898
				899
				900	/************************************************************************/
				901	/* Functions to control the PSM */
				902	/************************************************************************/
				903	/* Create the PSM thread of one peer structure */
				904	int fd_psm_begin(struct fd_peer * peer )
				905	{
				906	TRACE_ENTRY("%p", peer);
				907
				908	/* Check the peer and state are OK */
				909	CHECK_PARAMS( fd_peer_getstate(peer) == STATE_NEW );
				910
				911	/* Create the FIFO for events */
				912	CHECK_FCT( fd_fifo_new(&peer->p_events, 0) );
				913
				914	/* Create the PSM controler thread */
				915	CHECK_POSIX( pthread_create( &peer->p_psm, NULL, p_psm_th, peer ) );
				916
				917	/* We're done */
				918	return 0;
				919	}
				920
				921	/* End the PSM (clean ending) */
				922	int fd_psm_terminate(struct fd_peer * peer, char * reason )
				923	{
				924	TRACE_ENTRY("%p", peer);
				925	CHECK_PARAMS( CHECK_PEER(peer) );
				926
				927	if (fd_peer_getstate(peer) != STATE_ZOMBIE) {
				928	CHECK_FCT( fd_event_send(peer->p_events, FDEVP_TERMINATE, 0, reason) );
				929	} else {
				930	TRACE_DEBUG(FULL, "Peer '%s' was already terminated", peer->p_hdr.info.pi_diamid);
				931	}
				932	return 0;
				933	}
				934
				935	/* End the PSM & cleanup the peer structure */
				936	void fd_psm_abord(struct fd_peer * peer )
				937	{
				938	TRACE_ENTRY("%p", peer);
				939
				940	/* Cancel PSM thread */
				941	CHECK_FCT_DO( fd_thr_term(&peer->p_psm), /* continue */ );
				942
				943	/* Cleanup the data */
				944	fd_psm_cleanup(peer, 1);
				945
				946	/* Destroy the event list */
				947	CHECK_FCT_DO( fd_fifo_del(&peer->p_events), /* continue */ );
				948
				949	/* Remaining cleanups are performed in fd_peer_free */
				950	return;
				951	}
				952