From havanna_moon@gmx.net Fri Jan 17 23:37:49 2003
Date: Sat, 11 Jan 2003 23:26:28 +0100 (CET)
From: Yon Uriarte <havanna_moon@gmx.net>
To: "the list(tm) Zebra" <zebra@zebra.org>
Subject: [zebra 17217] [PATCH] show thread CPU

Hi,

 a little patch from the 'stupid preprocessor tricks' collection to record
thread statistics.

Usage: "show thread cpu [r][w][t][e][x]"

Output Fields: self explaining I hope. Type is one of RWTEX for:
   Read, Write (fd threads), Timer, Event, Execute.

 Overhead vs. vanilla zebra: almost nothing. Vanilla CVS zebra already
collects thread run times.


 Caveats: Under linux getrusage has a granularity of 10ms, which is almost
useless in this case. Run ./configure, edit config.h and comment out
"#define HAVE_RUSAGE",  this way it will use getimeofday which has a much
better granularity. IMHO this is better, as cooperative threads are
effectively running during all that wall time (dont care if CPU
utilization was 3% or 99% during the time the thread was running (an
effective rusage combined with getimeofday could give that info)).

 Maybe someone can give tips for other platforms on API granularity.


TODO: change some of the calls to thread_add_$KIND to
funcname_thread_add_$KIND with a meaningfull funcname, so users will get a
better idea of what's going on.

F.ex. (AFAIK):
ospf_spf_calculate_timer -> "Routes Step 1, areas SPF"
ospf_ase_calculate_timer -> "Routes Step 2, externals"



 Could this be added to the unofficial patch collection?

Could someone with BGP keepalive problems run their bgpd with this patch
and post the results?


TIA, HTH, HAND, regards
 yon


Example output:
--------------------------------
ospfd# show thread cpu
 Runtime(ms)    Invoked Avg uSecs   Max uSecs  Type Thread
      14.829         31       478         585   T   ospf_ase_calculate_timer
      82.132       9838         8         291    EX ospf_nsm_event
       0.029          1        29          29    E  ospf_default_originate_timer
       0.254          9        28          34   T   ospf_db_desc_timer
       0.026          7         3          11   T   ospf_wait_timer
     669.015        523      1279      490696 R     vty_read
       4.415         45        98         173   TE  ospf_network_lsa_refresh_timer
      15.026         31       484         588   T   ospf_spf_calculate_timer
      29.478       1593        18         122    E  ospf_ls_upd_send_queue_event
       0.173          1       173         173   T   vty_timeout
       4.173        242        17          58    E  ospf_ls_ack_send_event
     637.767     121223         5          55   T   ospf_ls_ack_timer
      39.373        244       161        2691 R     zclient_read
      12.169         98       124         726    EX ospf_ism_event
       0.226          2       113         125 R     vty_accept
     537.776      14256        37        3813  W    ospf_write
       4.967         41       121         250   T   ospf_router_lsa_timer
       0.672          1       672         672    E  zclient_connect
       7.901       1658         4          26   T   ospf_ls_req_timer
       0.459          2       229         266    E  ospf_external_lsa_originate_timer
       3.203         60        53         305   T   ospf_maxage_lsa_remover
     108.341       9772        11          65   T   ospf_ls_upd_timer
      33.302        525        63        8628  W    vty_flush
       0.101          1       101         101   T   ospf_router_lsa_update_timer
       0.016          1        16          16   T   ospf_router_id_update_timer
      26.970        407        66         176   T   ospf_lsa_maxage_walker
     381.949      12244        31          69   T   ospf_hello_timer
       0.114         22         5          14   T   ospf_inactivity_timer
      34.290       1223        28         310   T   ospf_lsa_refresh_walker
     470.645       6592        71         665 R     ospf_read
    3119.791     180693        17      490696 RWTEX TOTAL
ospfd#

bgpd# sh t c TeX
 Runtime(ms)    Invoked Avg uSecs   Max uSecs  Type Thread
      21.504        476        45          71   T   bgp_keepalive_timer
      17.784       1157        15         131   T   bgp_reuse_timer
      29.080        193       150         249   T   bgp_scan
      23.606        995        23         420    E  bgp_event
     317.734      28572        11          69   T   bgp_routeadv_timer
       0.084          1        84          84    E  zlookup_connect
       0.526          1       526         526    E  zclient_connect
       1.348         13       103         147   T   bgp_start_timer
      19.443        142       136         420   T   bgp_connect_timer
      16.032        772        20          27   T   bgp_import
     447.141      32322        13         526   TEX TOTAL
bgpd#
bgpd# show thread cpu rw
 Runtime(ms)    Invoked Avg uSecs   Max uSecs  Type Thread
     155.043          7     22149      150659 R     bgp_accept
     129.638        180       720       53844 R     vty_read
       1.734         56        30         129 R     zclient_read
       0.255          2       127         148 R     vty_accept
      58.483        983        59         340 R     bgp_read
     171.495      29190         5         245  W    bgp_write
      13.884        181        76        2542  W    vty_flush
     530.532      30599        17      150659 RW    TOTAL
bgpd#
--------------------------------
diff --git a/lib/thread.c b/lib/thread.c
index 31bbcd7..ff8f739 100644
--- a/lib/thread.c
+++ b/lib/thread.c
@@ -26,6 +26,10 @@
 #include "thread.h"
 #include "memory.h"
 #include "log.h"
+#include "hash.h"
+#include "command.h"
+
+static struct hash *cpu_record = NULL;
 
 /* Struct timeval's tv_usec one second value.  */
 #define TIMER_SECOND_MICRO 1000000L
@@ -82,6 +86,141 @@
 	  + (a.tv_usec - b.tv_usec));
 }
 
+static unsigned int 
+cpu_record_hash_key (struct cpu_thread_history *a)
+{
+  return (unsigned int) a->func;
+}
+
+static int 
+cpu_record_hash_cmp (struct cpu_thread_history *a,
+		     struct cpu_thread_history *b)
+{
+  return a->func == b->func;
+}
+
+static void*  
+cpu_record_hash_alloc (struct cpu_thread_history *a)
+{
+  struct cpu_thread_history *new;
+  new = XMALLOC( MTYPE_TMP/*XXX*/, sizeof *new);
+  memset(new, 0, sizeof *new);
+  new->func = a->func;
+  new->funcname = XSTRDUP(MTYPE_TMP/*XXX*/,a->funcname);
+  return new;
+}
+
+static inline void 
+vty_out_cpu_thread_history(struct vty* vty,
+			   struct cpu_thread_history *a)
+{
+  vty_out(vty, " %7ld.%03ld  %9d  %8ld  %10ld %c%c%c%c%c %s%s",
+	  a->total/1000, a->total%1000, a->total_calls,
+	  a->total/a->total_calls, a->max,
+	  a->types & (1 << THREAD_READ) ? 'R':' ',
+	  a->types & (1 << THREAD_WRITE) ? 'W':' ',
+	  a->types & (1 << THREAD_TIMER) ? 'T':' ',
+	  a->types & (1 << THREAD_EVENT) ? 'E':' ',
+	  a->types & (1 << THREAD_EXECUTE) ? 'X':' ',
+	  a->funcname, VTY_NEWLINE);
+}
+
+static void
+cpu_record_hash_print(struct hash_backet *bucket, 
+		      void *args[])
+{
+  struct cpu_thread_history *totals = args[0];
+  struct vty *vty = args[1];
+  unsigned char *filter = args[2];
+  struct cpu_thread_history *a = bucket->data;
+
+
+  a = bucket->data;
+  if ( !(a->types & *filter) )
+       return;
+  vty_out_cpu_thread_history(vty,a);
+  totals->total += a->total;
+  totals->total_calls += a->total_calls;
+  if (totals->max < a->max)
+    totals->max = a->max;
+}
+
+static void
+cpu_record_print(struct vty *vty, unsigned char filter)
+{
+  struct cpu_thread_history tmp;
+  void *args[3] = {&tmp, vty, &filter};
+
+  memset(&tmp, 0, sizeof tmp);
+  tmp.funcname = "TOTAL";
+  tmp.types = filter;
+
+  vty_out(vty, 
+	  " Runtime(ms)    Invoked Avg uSecs   Max uSecs  Type Thread%s", 
+	  VTY_NEWLINE);
+  hash_iterate(cpu_record,
+	       (void(*)(struct hash_backet*,void*))cpu_record_hash_print,
+	       args);
+
+  if (tmp.total_calls > 0)
+    vty_out_cpu_thread_history(vty, &tmp);
+}
+
+DEFUN(show_thread_cpu,
+      show_thread_cpu_cmd,
+      "show thread cpu [FILTER]",
+      SHOW_STR
+      "Thread information\n"
+      "Thread CPU usage\n"
+      "Display filter (rwtex)\n")
+{
+  int i = 0;
+  unsigned char filter = 0xff;
+
+  if (argc > 0)
+    {
+      filter = 0;
+      while (argv[0][i] != '\0')
+	{
+	  switch ( argv[0][i] )
+	    {
+	    case 'r':
+	    case 'R':
+	      filter |= (1 << THREAD_READ);
+	      break;
+	    case 'w':
+	    case 'W':
+	      filter |= (1 << THREAD_WRITE);
+	      break;
+	    case 't':
+	    case 'T':
+	      filter |= (1 << THREAD_TIMER);
+	      break;
+	    case 'e':
+	    case 'E':
+	      filter |= (1 << THREAD_EVENT);
+	      break;
+	    case 'x':
+	    case 'X':
+	      filter |= (1 << THREAD_EXECUTE);
+	      break;
+	    default:
+	      break;
+	    }
+	  ++i;
+	}
+      if (filter == 0)
+	{
+	  vty_out(vty, "Invalid filter \"%s\" specified, must contain at least one of 'RWTEX'%s",
+		  argv[0], VTY_NEWLINE);
+	  return CMD_WARNING;
+	}
+    }
+
+  cpu_record_print(vty, filter);
+  return CMD_SUCCESS;
+}
+
 /* List allocation and head/tail print out. */
 static void
 thread_list_debug (struct thread_list *list)
@@ -113,6 +252,10 @@
 struct thread_master *
 thread_master_create ()
 {
+  if (cpu_record == NULL) 
+    {
+      cpu_record = hash_create_size( 1011, cpu_record_hash_key, cpu_record_hash_cmp);
+    }
   return (struct thread_master *) XCALLOC (MTYPE_THREAD_MASTER,
 					   sizeof (struct thread_master));
 }
@@ -235,10 +378,37 @@
     return 0;
 }
 
+/* Trim blankspace and "()"s */
+static char *
+strip_funcname (char *funcname) 
+{
+  char buff[100];
+  char tmp, *ret, *e, *b = buff;
+
+  strncpy(buff, funcname, sizeof(buff));
+  buff[ sizeof(buff) -1] = '\0';
+  e = buff +strlen(buff) -1;
+
+  /* Wont work for funcname ==  "Word (explanation)"  */
+
+  while (*b == ' ' || *b == '(')
+    ++b;
+  while (*e == ' ' || *e == ')')
+    --e;
+  e++;
+
+  tmp = *e;
+  *e = '\0';
+  ret  = XSTRDUP (MTYPE_TMP, b);
+  *e = tmp;
+
+  return ret;
+}
+
 /* Get new thread.  */
 static struct thread *
 thread_get (struct thread_master *m, u_char type,
-	    int (*func) (struct thread *), void *arg)
+	    int (*func) (struct thread *), void *arg, char* funcname)
 {
   struct thread *thread;
 
@@ -250,17 +420,20 @@
       m->alloc++;
     }
   thread->type = type;
+  thread->add_type = type;
   thread->master = m;
   thread->func = func;
   thread->arg = arg;
   
+  thread->funcname = strip_funcname(funcname);
+
   return thread;
 }
 
 /* Add new read thread. */
 struct thread *
-thread_add_read (struct thread_master *m, 
-		 int (*func) (struct thread *), void *arg, int fd)
+funcname_thread_add_read (struct thread_master *m, 
+		 int (*func) (struct thread *), void *arg, int fd, char* funcname)
 {
   struct thread *thread;
 
@@ -272,7 +445,7 @@
       return NULL;
     }
 
-  thread = thread_get (m, THREAD_READ, func, arg);
+  thread = thread_get (m, THREAD_READ, func, arg, funcname);
   FD_SET (fd, &m->readfd);
   thread->u.fd = fd;
   thread_list_add (&m->read, thread);
@@ -282,8 +455,8 @@
 
 /* Add new write thread. */
 struct thread *
-thread_add_write (struct thread_master *m,
-		 int (*func) (struct thread *), void *arg, int fd)
+funcname_thread_add_write (struct thread_master *m,
+		 int (*func) (struct thread *), void *arg, int fd, char* funcname)
 {
   struct thread *thread;
 
@@ -295,7 +468,7 @@
       return NULL;
     }
 
-  thread = thread_get (m, THREAD_WRITE, func, arg);
+  thread = thread_get (m, THREAD_WRITE, func, arg, funcname);
   FD_SET (fd, &m->writefd);
   thread->u.fd = fd;
   thread_list_add (&m->write, thread);
@@ -305,8 +478,8 @@
 
 /* Add timer event thread. */
 struct thread *
-thread_add_timer (struct thread_master *m,
-		  int (*func) (struct thread *), void *arg, long timer)
+funcname_thread_add_timer (struct thread_master *m,
+		  int (*func) (struct thread *), void *arg, long timer, char* funcname)
 {
   struct timeval timer_now;
   struct thread *thread;
@@ -316,7 +489,7 @@
 
   assert (m != NULL);
 
-  thread = thread_get (m, THREAD_TIMER, func, arg);
+  thread = thread_get (m, THREAD_TIMER, func, arg, funcname);
 
   /* Do we need jitter here? */
   gettimeofday (&timer_now, NULL);
@@ -342,14 +515,14 @@
 
 /* Add simple event thread. */
 struct thread *
-thread_add_event (struct thread_master *m,
-		  int (*func) (struct thread *), void *arg, int val)
+funcname_thread_add_event (struct thread_master *m,
+		  int (*func) (struct thread *), void *arg, int val, char* funcname)
 {
   struct thread *thread;
 
   assert (m != NULL);
 
-  thread = thread_get (m, THREAD_EVENT, func, arg);
+  thread = thread_get (m, THREAD_EVENT, func, arg, funcname);
   thread->u.val = val;
   thread_list_add (&m->event, thread);
 
@@ -621,6 +794,11 @@
 {
   unsigned long thread_time;
   RUSAGE_T ru;
+  struct cpu_thread_history tmp, *cpu;
+  
+  tmp.func = thread->func;
+  tmp.funcname = thread->funcname;
+  cpu = hash_get(cpu_record, &tmp, cpu_record_hash_alloc);
 
   GETRUSAGE (&thread->ru);
 
@@ -629,6 +807,12 @@
   GETRUSAGE (&ru);
 
   thread_time = thread_consumed_time (&ru, &thread->ru);
+  cpu->total += thread_time;
+  if (cpu->max < thread_time)
+    cpu->max = thread_time;
+
+  ++cpu->total_calls;
+  cpu->types |= (1 << thread->add_type);
 
 #ifdef THREAD_CONSUMED_TIME_CHECK
   if (thread_time > 200000L)
@@ -638,8 +822,8 @@
        * Whinge about it now, so we're aware this is yet another task
        * to fix.
        */
-      zlog_err ("CPU HOG task %lx ran for %ldms",
-                /* FIXME: report the name of the function somehow */
+      zlog_err ("CPU HOG task %s (%lx) ran for %ldms",
+		thread->funcname,
 		(unsigned long) thread->func,
 		thread_time / 1000L);
     }
@@ -648,20 +832,23 @@
 
 /* Execute thread */
 struct thread *
-thread_execute (struct thread_master *m,
+funcname_thread_execute (struct thread_master *m,
                 int (*func)(struct thread *), 
                 void *arg,
-                int val)
+                int val,
+		char* funcname)
 {
   struct thread dummy; 
 
   memset (&dummy, 0, sizeof (struct thread));
 
   dummy.type = THREAD_EVENT;
+  dummy.add_type = THREAD_EXECUTE;
   dummy.master = NULL;
   dummy.func = func;
   dummy.arg = arg;
   dummy.u.val = val;
+  dummy.funcname = strip_funcname (funcname);
   thread_call (&dummy);
 
   return NULL;