2004-11-04 Andrew J. Schorr <ajschorr@alumni.princeton.edu>

	* vty.c: Vtysh connections to daemons should use buffering.
	  (vty_out) Remove exception for vty_shell_serv, just use buffer_write.
	  (vty_new) Increase output buffer size to 4096 rounded up to a
	  multiple of pagesize.
	  (vtysh_read) After command has been executed and all output buffered,
	  call buffer_flush_available and schedule further writes if the
	  buffers are not yet empty.
	  (vtysh_write) New function to flush output to vtysh when the socket
	  is writeable.
	  (vty_event) Added new VTYSH_WRITE event for flushing buffers to vtysh
	  clients.  Also, should save read thread in vty->t_read so the
	  thread can be cancelled in vty_close.
	* buffer.h: In struct buffer_data, remove unused "parent" field.
	  Convert "unsigned char *data" to "unsigned char data[0]" to save
	  a malloc.  Declare new function buffer_flush_available that works
	  with non-blocking sockets.
	* buffer.c: (buffer_data_new) Use a single malloc now that data is
	  a variable-size array at end of structure.
	  (buffer_data_free) Just a single free now that data is part of the
	  structure.
	  (buffer_write) Simplify the logic to make behavior more transparent.
	  (buffer_flush) Decrease b->length as data is written out.
	  (buffer_flush_vty_all) Decrease b->length as buffers are freed.
	  (buffer_flush_vty) Decrease b->length as data is written out.
	  (buffer_flush_available) New function to flush non-blocking sockets.
diff --git a/lib/buffer.c b/lib/buffer.c
index 904b4aa..9d931a9 100644
--- a/lib/buffer.c
+++ b/lib/buffer.c
@@ -24,25 +24,22 @@
 
 #include "memory.h"
 #include "buffer.h"
+#include <stddef.h>
 
 /* Make buffer data. */
-struct buffer_data *
+static struct buffer_data *
 buffer_data_new (size_t size)
 {
   struct buffer_data *d;
 
-  d = XMALLOC (MTYPE_BUFFER_DATA, sizeof (struct buffer_data));
-  memset (d, 0, sizeof (struct buffer_data));
-  d->data = XMALLOC (MTYPE_BUFFER_DATA, size);
-
+  d = XMALLOC (MTYPE_BUFFER_DATA, offsetof(struct buffer_data,data[size]));
+  d->cp = d->sp = 0;
   return d;
 }
 
-void
+static void
 buffer_data_free (struct buffer_data *d)
 {
-  if (d->data)
-    XFREE (MTYPE_BUFFER_DATA, d->data);
   XFREE (MTYPE_BUFFER_DATA, d);
 }
 
@@ -159,6 +156,8 @@
   /* We use even last one byte of data buffer. */
   while (size)    
     {
+      size_t chunk;
+
       /* If there is no data buffer add it. */
       if (data == NULL || data->cp == b->size)
 	{
@@ -166,23 +165,11 @@
 	  data = b->tail;
 	}
 
-      /* Last data. */
-      if (size <= (b->size - data->cp))
-	{
-	  memcpy ((data->data + data->cp), ptr, size);
-
-	  data->cp += size;
-	  size = 0;
-	}
-      else
-	{
-	  memcpy ((data->data + data->cp), ptr, (b->size - data->cp));
-
-	  size -= (b->size - data->cp);
-	  ptr += (b->size - data->cp);
-
-	  data->cp = b->size;
-	}
+      chunk = ((size <= (b->size - data->cp)) ? size : (b->size - data->cp));
+      memcpy ((data->data + data->cp), ptr, chunk);
+      size -= chunk;
+      ptr += chunk;
+      data->cp += chunk;
     }
   return 1;
 }
@@ -235,6 +222,7 @@
 	{
 	  iovec[iov_index++].iov_len = size;
 	  data->sp += size;
+	  b->length -= size;
 	  if (data->sp == data->cp)
 	    data = data->next;
 	  break;
@@ -242,6 +230,7 @@
       else
 	{
 	  iovec[iov_index++].iov_len = data->cp - data->sp;
+	  b->length -= (data->cp - data->sp);
 	  size -= data->cp - data->sp;
 	  data->sp = data->cp;
 	}
@@ -369,6 +358,7 @@
 	b->tail = next;
       b->head = next;
 
+      b->length -= (out->cp-out->sp);
       buffer_data_free (out);
       b->alloc--;
     }
@@ -430,6 +420,7 @@
 	{
 	  iov[iov_index++].iov_len = size;
 	  data->sp += size;
+	  b->length -= size;
 	  if (data->sp == data->cp)
 	    data = data->next;
 	  break;
@@ -438,6 +429,7 @@
 	{
 	  iov[iov_index++].iov_len = data->cp - data->sp;
 	  size -= (data->cp - data->sp);
+	  b->length -= (data->cp - data->sp);
 	  data->sp = data->cp;
 	}
     }
@@ -566,3 +558,73 @@
 
   return buffer_flush_vty (b, fd, size, erase, no_more);
 }
+
+/* This function (unlike other buffer_flush* functions above) is designed
+to work with non-blocking sockets.  It does not attempt to write out
+all of the queued data, just a "big" chunk.  It returns 0 if it was
+able to empty out the buffers completely, or 1 if more flushing is
+required later. */
+int
+buffer_flush_available(struct buffer *b, int fd)
+{
+
+/* These are just reasonable values to make sure a significant amount of
+data is written.  There's no need to go crazy and try to write it all
+in one shot. */
+#ifdef IOV_MAX
+#define MAX_CHUNKS ((IOV_MAX >= 16) ? 16 : IOV_MAX)
+#else
+#define MAX_CHUNKS 16
+#endif
+#define MAX_FLUSH 131072
+
+  struct buffer_data *d;
+  struct buffer_data *next;
+  ssize_t written;
+  struct iovec iov[MAX_CHUNKS];
+  int iovcnt = 0;
+  size_t nbyte = 0;
+
+  for (d = b->head; d && (iovcnt < MAX_CHUNKS) && (nbyte < MAX_FLUSH);
+       d = d->next, iovcnt++)
+    {
+      iov[iovcnt].iov_base = d->data+d->sp;
+      nbyte += (iov[iovcnt].iov_len = d->cp-d->sp);
+    }
+
+  if ((written = writev(fd,iov,iovcnt)) < 0)
+    {
+      if ((errno != EAGAIN) && (errno != EINTR))
+        zlog_warn("buffer_flush_available write error on fd %d: %s",
+		  fd,strerror(errno));
+      return 1;
+    }
+
+  /* Free printed buffer data. */
+  for (d = b->head; (written > 0) && d; d = next)
+    {
+      if (written < d->cp-d->sp)
+        {
+	  d->sp += written;
+	  b->length -= written;
+	  return 1;
+	}
+
+      written -= (d->cp-d->sp);
+      next = d->next;
+      if (next)
+	next->prev = NULL;
+      else
+	b->tail = next;
+      b->head = next;
+
+      b->length -= (d->cp-d->sp);
+      buffer_data_free (d);
+      b->alloc--;
+    }
+
+  return (b->head != NULL);
+
+#undef MAX_CHUNKS
+#undef MAX_FLUSH
+}