blob: 36b5a557861fe19a5dd5c439980fb9c3a27b200f [file] [log] [blame]
Brian Waters13d96012017-12-08 16:53:31 -06001/*********************************************************************************************************
2* Software License Agreement (BSD License) *
3* Author: Sebastien Decugis <sdecugis@freediameter.net> *
4* *
5* Copyright (c) 2013, WIDE Project and NICT *
6* All rights reserved. *
7* *
8* Redistribution and use of this software in source and binary forms, with or without modification, are *
9* permitted provided that the following conditions are met: *
10* *
11* * Redistributions of source code must retain the above *
12* copyright notice, this list of conditions and the *
13* following disclaimer. *
14* *
15* * Redistributions in binary form must reproduce the above *
16* copyright notice, this list of conditions and the *
17* following disclaimer in the documentation and/or other *
18* materials provided with the distribution. *
19* *
20* * Neither the name of the WIDE Project or NICT nor the *
21* names of its contributors may be used to endorse or *
22* promote products derived from this software without *
23* specific prior written permission of WIDE Project and *
24* NICT. *
25* *
26* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED *
27* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A *
28* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
29* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT *
30* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS *
31* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR *
32* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF *
33* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
34*********************************************************************************************************/
35
36#include "fdcore-internal.h"
37
38/* Structure to store a sent request */
39struct sentreq {
40 struct fd_list chain; /* the "o" field points directly to the (new) hop-by-hop of the request (uint32_t *) */
41 struct msg *req; /* A request that was sent and not yet answered. */
42 uint32_t prevhbh;/* The value to set back in the hbh header when the message is retrieved */
43 struct fd_list expire; /* the list of expiring requests */
44 struct timespec timeout; /* Cache the expire date of the request so that the timeout thread does not need to get it each time. */
45 struct timespec added_on; /* the time the request was added */
46};
47
48/* Find an element in the hbh list, or the following one */
49static struct fd_list * find_or_next(struct fd_list * srlist, uint32_t hbh, int * match)
50{
51 struct fd_list * li;
52 *match = 0;
53 for (li = srlist->next; li != srlist; li = li->next) {
54 uint32_t * nexthbh = li->o;
55 if (*nexthbh < hbh)
56 continue;
57 if (*nexthbh == hbh)
58 *match = 1;
59 break;
60 }
61 return li;
62}
63
64/* Similar but start from the end, since we add requests in growing hbh order usually */
65static struct fd_list * find_or_prev(struct fd_list * srlist, uint32_t hbh, int * match)
66{
67 struct fd_list * li;
68 *match = 0;
69 for (li = srlist->prev; li != srlist; li = li->prev) {
70 uint32_t * prevhbh = li->o;
71 if (*prevhbh > hbh)
72 continue;
73 if (*prevhbh == hbh)
74 *match = 1;
75 break;
76 }
77 return li;
78}
79
80static void srl_dump(const char * text, struct fd_list * srlist)
81{
82 struct fd_list * li;
83 struct timespec now;
84
85 LOG_D("%sSentReq list @%p:", text, srlist);
86
87 CHECK_SYS_DO( clock_gettime(CLOCK_REALTIME, &now), );
88
89 for (li = srlist->next; li != srlist; li = li->next) {
90 struct sentreq * sr = (struct sentreq *)li;
91 uint32_t * nexthbh = li->o;
92
93 LOG_D(" - Next req (hbh:0x%x, prev:0x%x): [since %ld.%06ld sec]", *nexthbh, sr->prevhbh,
94 (long)((now.tv_nsec >= sr->added_on.tv_nsec) ? (now.tv_sec - sr->added_on.tv_sec) : (now.tv_sec - sr->added_on.tv_sec - 1)),
95 (long)((now.tv_nsec >= sr->added_on.tv_nsec) ? ((now.tv_nsec - sr->added_on.tv_nsec) / 1000) : ((now.tv_nsec - sr->added_on.tv_nsec + 1000000000) / 1000)));
96 }
97}
98
99/* thread that handles messages expiring. The thread is started only when needed */
100static void * sr_expiry_th(void * arg) {
101 struct sr_list * srlist = arg;
102
103 TRACE_ENTRY("%p", arg);
104 CHECK_PARAMS_DO( arg, return NULL );
105
106 /* Set the thread name */
107 {
108 char buf[48];
109 snprintf(buf, sizeof(buf), "ReqExp/%s", ((struct fd_peer *)(srlist->exp.o))->p_hdr.info.pi_diamid);
110 fd_log_threadname ( buf );
111 }
112
113 do {
114 struct timespec now;
115 struct sentreq * first;
116 struct msg * request;
117 struct fd_peer * sentto;
118 void (*expirecb)(void *, DiamId_t, size_t, struct msg **);
119 void * data;
120 int no_error;
121
122 CHECK_POSIX_DO( pthread_mutex_lock(&srlist->mtx), return NULL );
123 pthread_cleanup_push( fd_cleanup_mutex, &srlist->mtx );
124
125loop:
126 no_error = 0;
127
128 /* Check if there are expiring requests available */
129 if (FD_IS_LIST_EMPTY(&srlist->exp)) {
130 /* Just wait for a change or cancelation */
131 CHECK_POSIX_DO( pthread_cond_wait( &srlist->cnd, &srlist->mtx ), goto unlock );
132 /* Restart the loop on wakeup */
133 goto loop;
134 }
135
136 /* Get the pointer to the request that expires first */
137 first = (struct sentreq *)(srlist->exp.next->o);
138
139 /* Get the current time */
140 CHECK_SYS_DO( clock_gettime(CLOCK_REALTIME, &now), goto unlock );
141
142 /* If first request is not expired, we just wait until it happens */
143 if ( TS_IS_INFERIOR( &now, &first->timeout ) ) {
144
145 CHECK_POSIX_DO2( pthread_cond_timedwait( &srlist->cnd, &srlist->mtx, &first->timeout ),
146 ETIMEDOUT, /* ETIMEDOUT is a normal return value, continue */,
147 /* on other error, */ goto unlock );
148
149 /* on wakeup, loop */
150 goto loop;
151 }
152
153 /* Now, the first request in the list is expired; remove it and call the expirecb for it */
154 request = first->req;
155 sentto = first->chain.head->o;
156
157 TRACE_DEBUG(FULL, "Request %x was not answered by %s within the timer delay", *((uint32_t *)first->chain.o), sentto->p_hdr.info.pi_diamid);
158
159 /* Restore the hbhid */
160 *((uint32_t *)first->chain.o) = first->prevhbh;
161
162 /* Free the sentreq information */
163 fd_list_unlink(&first->chain);
164 srlist->cnt--;
165 srlist->cnt_lost++; /* We are not waiting for this answer anymore, but the remote peer may still be processing it. */
166 fd_list_unlink(&first->expire);
167 free(first);
168
169 no_error = 1;
170unlock:
171 ; /* pthread_cleanup_pop sometimes expands as "} ..." and the label before this cause some compilers to complain... */
172 pthread_cleanup_pop( 1 ); /* unlock the mutex */
173 if (!no_error)
174 break;
175
176
177 /* Retrieve callback in the message */
178 CHECK_FCT_DO( fd_msg_anscb_get( request, NULL, &expirecb, &data ), break);
179 ASSERT(expirecb);
180
181 /* Clean up this expirecb from the message */
182 CHECK_FCT_DO( fd_msg_anscb_reset( request, 0, 1 ), break);
183
184 /* Call it */
185 (*expirecb)(data, sentto->p_hdr.info.pi_diamid, sentto->p_hdr.info.pi_diamidlen, &request);
186
187 /* If the callback did not dispose of the message, do it now */
188 if (request) {
189 fd_hook_call(HOOK_MESSAGE_DROPPED, request, NULL, "Expiration period completed without an answer, and the expiry callback did not dispose of the message.", fd_msg_pmdl_get(request));
190 CHECK_FCT_DO( fd_msg_free(request), /* ignore */ );
191 }
192
193 } while (1);
194
195 ASSERT(0); /* we have encountered a problem, maybe time to signal the framework to terminate? */
196 return NULL;
197}
198
199
200/* Store a new sent request */
201int fd_p_sr_store(struct sr_list * srlist, struct msg **req, uint32_t *hbhloc, uint32_t hbh_restore)
202{
203 struct sentreq * sr;
204 struct fd_list * prev;
205 int match;
206 struct timespec * ts;
207
208 TRACE_ENTRY("%p %p %p %x", srlist, req, hbhloc, hbh_restore);
209 CHECK_PARAMS(srlist && req && *req && hbhloc);
210
211 CHECK_MALLOC( sr = malloc(sizeof(struct sentreq)) );
212 memset(sr, 0, sizeof(struct sentreq));
213 fd_list_init(&sr->chain, hbhloc);
214 sr->req = *req;
215 sr->prevhbh = hbh_restore;
216 fd_list_init(&sr->expire, sr);
217 CHECK_SYS( clock_gettime(CLOCK_REALTIME, &sr->added_on) );
218
219 /* Search the place in the list */
220 CHECK_POSIX( pthread_mutex_lock(&srlist->mtx) );
221 prev = find_or_prev(&srlist->srs, *hbhloc, &match);
222 if (match) {
223 TRACE_DEBUG(INFO, "A request with the same hop-by-hop Id (0x%x) was already sent: error", *hbhloc);
224 free(sr);
225 srl_dump("Current list of SR: ", &srlist->srs);
226 CHECK_POSIX_DO( pthread_mutex_unlock(&srlist->mtx), /* ignore */ );
227 return EINVAL;
228 }
229
230 /* Save in the list */
231 *req = NULL;
232 fd_list_insert_after(prev, &sr->chain);
233 srlist->cnt++;
234
235 /* In case of request with a timeout, also store in the timeout list */
236 ts = fd_msg_anscb_gettimeout( sr->req );
237 if (ts) {
238 struct fd_list * li;
239
240 memcpy(&sr->timeout, ts, sizeof(struct timespec));
241
242 /* browse srlist->exp from the end */
243 for (li = srlist->exp.prev; li != &srlist->exp; li = li->prev) {
244 struct sentreq * s = (struct sentreq *)(li->o);
245 if (TS_IS_INFERIOR(&s->timeout, ts))
246 break;
247 }
248
249 fd_list_insert_after(li, &sr->expire);
250
251 /* if the thread does not exist yet, create it */
252 if (srlist->thr == (pthread_t)NULL) {
253 CHECK_POSIX_DO( pthread_create(&srlist->thr, NULL, sr_expiry_th, srlist), /* continue anyway */);
254 } else {
255 /* or, if added in first position, signal the condvar to update the sleep time of the thread */
256 if (li == &srlist->exp) {
257 CHECK_POSIX_DO( pthread_cond_signal(&srlist->cnd), /* continue anyway */);
258 }
259 }
260 }
261
262 CHECK_POSIX( pthread_mutex_unlock(&srlist->mtx) );
263 return 0;
264}
265
266/* Fetch a request by hbh */
267int fd_p_sr_fetch(struct sr_list * srlist, uint32_t hbh, struct msg **req)
268{
269 struct sentreq * sr;
270 int match;
271
272 TRACE_ENTRY("%p %x %p", srlist, hbh, req);
273 CHECK_PARAMS(srlist && req);
274
275 /* Search the request in the list */
276 CHECK_POSIX( pthread_mutex_lock(&srlist->mtx) );
277 sr = (struct sentreq *)find_or_next(&srlist->srs, hbh, &match);
278 if (!match) {
279 TRACE_DEBUG(INFO, "There is no saved request with this hop-by-hop id (%x)", hbh);
280 srl_dump("Current list of SR: ", &srlist->srs);
281 *req = NULL;
282 if (srlist->cnt_lost > 0) {
283 srlist->cnt_lost--; /* This is probably an answer for a request we already timedout. */
284 } /* else, probably a bug in the remote peer */
285 } else {
286 /* Restore hop-by-hop id */
287 *((uint32_t *)sr->chain.o) = sr->prevhbh;
288 /* Unlink */
289 fd_list_unlink(&sr->chain);
290 srlist->cnt--;
291 fd_list_unlink(&sr->expire);
292 *req = sr->req;
293 free(sr);
294 }
295 CHECK_POSIX( pthread_mutex_unlock(&srlist->mtx) );
296
297 /* do not stop the expire thread here, it might cause creating/destroying it very often otherwise */
298
299 /* Done */
300 return 0;
301}
302
303/* Failover requests (free or requeue routables) */
304void fd_p_sr_failover(struct sr_list * srlist)
305{
306 CHECK_POSIX_DO( pthread_mutex_lock(&srlist->mtx), /* continue anyway */ );
307 while (!FD_IS_LIST_EMPTY(&srlist->srs)) {
308 struct sentreq * sr = (struct sentreq *)(srlist->srs.next);
309 fd_list_unlink(&sr->chain);
310 srlist->cnt--;
311 fd_list_unlink(&sr->expire);
312 if (fd_msg_is_routable(sr->req)) {
313 struct msg_hdr * hdr = NULL;
314 int ret;
315
316 /* Set the 'T' flag */
317 CHECK_FCT_DO(fd_msg_hdr(sr->req, &hdr), /* continue */);
318 if (hdr)
319 hdr->msg_flags |= CMD_FLAG_RETRANSMIT;
320
321 /* Restore the original hop-by-hop id of the request */
322 *((uint32_t *)sr->chain.o) = sr->prevhbh;
323
324 fd_hook_call(HOOK_MESSAGE_FAILOVER, sr->req, (struct fd_peer *)srlist->srs.o, NULL, fd_msg_pmdl_get(sr->req));
325
326 /* Requeue for sending to another peer */
327 CHECK_FCT_DO( ret = fd_fifo_post_noblock(fd_g_outgoing, (void *)&sr->req),
328 {
329 char buf[256];
330 snprintf(buf, sizeof(buf), "Internal error: error while requeuing during failover: %s", strerror(ret));
331 fd_hook_call(HOOK_MESSAGE_DROPPED, sr->req, NULL, buf, fd_msg_pmdl_get(sr->req));
332 CHECK_FCT_DO(fd_msg_free(sr->req), /* What can we do more? */)
333 });
334 } else {
335 /* Just free the request. */
336 /* fd_hook_call(HOOK_MESSAGE_DROPPED, sr->req, NULL, "Sent & unanswered local message discarded during failover.", fd_msg_pmdl_get(sr->req)); */
337 CHECK_FCT_DO(fd_msg_free(sr->req), /* Ignore */);
338 }
339 free(sr);
340 }
341 /* The list of expiring requests must be empty now */
342 ASSERT( FD_IS_LIST_EMPTY(&srlist->exp) );
343 ASSERT( srlist->cnt == 0 ); /* debug the counter management if needed */
344
345 CHECK_POSIX_DO( pthread_mutex_unlock(&srlist->mtx), /* continue anyway */ );
346
347 /* Terminate the expiry thread (must be done when the lock can be taken) */
348 CHECK_FCT_DO( fd_thr_term(&srlist->thr), /* ignore error */ );
349}
350