lib: track worst case # of cycles and don't allow granularity to go above
* The workqueue code at present errs towards optimising the granularity
for throughput of queue items in runs. This perhaps is at the cost
of risking excessive delays at times. Make the workqueue take
worst-cases into account.
* thread.c: (thread_should_yield) When thread should yield, we can
return the time taken for free, as it might be useful to caller.
work_queue_run
* workqueue.h: (struct work_queue) Add fields for worst # of cycles,
and (independently) worst time taken.
* workqueue.c: (work_queue_new) Worst starts high.
(work_queue_run) Track the worst number of cycles taken, where a
queue run had to yield before clearing out the queue. Use this as an
upper-bound on the granularity, so the granulity can never increase.
Track the worst-case delay per work-queue, where it had to yield, thanks
to the thread_should_yield return value change. Note that "show thread
cpu" already shows stats for the work_queue_run function, inc average and
worst cases.
Deficiencies:
- A spurious outside delay (e.g. process not run in ages) could cause
'worst' to be very low in some particular invocation of a process,
and it will stay that way for life of process.
- The whole thing of trying to calculate suitable granularities is just
fragile and impossible to get 100% right.
diff --git a/lib/thread.c b/lib/thread.c
index b65078c..de4d76d 100644
--- a/lib/thread.c
+++ b/lib/thread.c
@@ -1264,8 +1264,8 @@
thread_should_yield (struct thread *thread)
{
quagga_get_relative (NULL);
- return (timeval_elapsed(relative_time, thread->real) >
- THREAD_YIELD_TIME_SLOT);
+ unsigned long t = timeval_elapsed(relative_time, thread->real);
+ return ((t > THREAD_YIELD_TIME_SLOT) ? t : 0);
}
void
diff --git a/lib/workqueue.c b/lib/workqueue.c
index b1a5d5b..6453e7b 100644
--- a/lib/workqueue.c
+++ b/lib/workqueue.c
@@ -85,7 +85,8 @@
listnode_add (work_queues, new);
new->cycles.granularity = WORK_QUEUE_MIN_GRANULARITY;
-
+ new->cycles.worst = UINT_MAX;
+
/* Default values, can be overriden by caller */
new->spec.hold = WORK_QUEUE_DEFAULT_HOLD;
@@ -184,29 +185,33 @@
struct work_queue *wq;
vty_out (vty,
- "%c %8s %5s %8s %21s%s",
+ "%c %8s %5s %8s %21s %6s %5s%s",
' ', "List","(ms) ","Q. Runs","Cycle Counts ",
+ " ","Worst",
VTY_NEWLINE);
vty_out (vty,
- "%c %8s %5s %8s %7s %6s %6s %s%s",
+ "%c %8s %5s %8s %7s %6s %6s %6s %5s %s%s",
'P',
"Items",
"Hold",
"Total",
- "Best","Gran.","Avg.",
+ "Best","Worst","Gran.","Avg.", "Lat.",
"Name",
VTY_NEWLINE);
for (ALL_LIST_ELEMENTS_RO (work_queues, node, wq))
{
- vty_out (vty,"%c %8d %5d %8ld %7d %6d %6u %s%s",
+ vty_out (vty,"%c %8u %5u %8lu %7u %6u %6u %6u %5lu %s%s",
(CHECK_FLAG (wq->flags, WQ_UNPLUGGED) ? ' ' : 'P'),
listcount (wq->items),
wq->spec.hold,
wq->runs,
- wq->cycles.best, wq->cycles.granularity,
+ wq->cycles.best,
+ MIN(wq->cycles.best, wq->cycles.worst),
+ wq->cycles.granularity,
(wq->runs) ?
(unsigned int) (wq->cycles.total / wq->runs) : 0,
+ wq->worst_usec,
wq->name,
VTY_NEWLINE);
}
@@ -249,6 +254,7 @@
{
struct work_queue *wq;
struct work_queue_item *item;
+ unsigned long took;
wq_item_status ret;
unsigned int cycles = 0;
struct listnode *node, *nnode;
@@ -268,6 +274,8 @@
*
* Best: starts low, can only increase
*
+ * Worst: starts at MAX, can only decrease.
+ *
* Granularity: starts at WORK_QUEUE_MIN_GRANULARITY, can be decreased
* if we run to end of time slot, can increase otherwise
* by a small factor.
@@ -342,7 +350,7 @@
/* test if we should yield */
if ( !(cycles % wq->cycles.granularity)
- && thread_should_yield (thread))
+ && (took = thread_should_yield (thread)))
{
yielded = 1;
goto stats;
@@ -353,24 +361,32 @@
#define WQ_HYSTERESIS_FACTOR 4
+ if (cycles > wq->cycles.best)
+ wq->cycles.best = cycles;
+
+ if (took > wq->worst_usec)
+ wq->worst_usec = took;
+
/* we yielded, check whether granularity should be reduced */
if (yielded && (cycles < wq->cycles.granularity))
{
wq->cycles.granularity = ((cycles > 0) ? cycles
: WORK_QUEUE_MIN_GRANULARITY);
+ if (cycles < wq->cycles.worst)
+ wq->cycles.worst = cycles;
}
/* otherwise, should granularity increase? */
else if (cycles >= (wq->cycles.granularity))
{
- if (cycles > wq->cycles.best)
- wq->cycles.best = cycles;
-
- /* along with yielded check, provides hysteresis for granularity */
+ /* along with yielded check, provides hysteresis for granularity */
if (cycles > (wq->cycles.granularity * WQ_HYSTERESIS_FACTOR
* WQ_HYSTERESIS_FACTOR))
wq->cycles.granularity *= WQ_HYSTERESIS_FACTOR; /* quick ramp-up */
else if (cycles > (wq->cycles.granularity * WQ_HYSTERESIS_FACTOR))
wq->cycles.granularity += WQ_HYSTERESIS_FACTOR;
+
+ /* clamp granularity down to the worst yielded cycle count */
+ wq->cycles.granularity = MIN(wq->cycles.granularity, wq->cycles.worst);
}
#undef WQ_HYSTERIS_FACTOR
diff --git a/lib/workqueue.h b/lib/workqueue.h
index 5ad2589..aac7860 100644
--- a/lib/workqueue.h
+++ b/lib/workqueue.h
@@ -89,9 +89,11 @@
/* remaining fields should be opaque to users */
struct list *items; /* queue item list */
unsigned long runs; /* runs count */
+ unsigned long worst_usec;
struct {
unsigned int best;
+ unsigned int worst;
unsigned int granularity;
unsigned long total;
} cycles; /* cycle counts */