2004-11-04 Andrew J. Schorr <ajschorr@alumni.princeton.edu>

	* vty.c: Vtysh connections to daemons should use buffering.
	  (vty_out) Remove exception for vty_shell_serv, just use buffer_write.
	  (vty_new) Increase output buffer size to 4096 rounded up to a
	  multiple of pagesize.
	  (vtysh_read) After command has been executed and all output buffered,
	  call buffer_flush_available and schedule further writes if the
	  buffers are not yet empty.
	  (vtysh_write) New function to flush output to vtysh when the socket
	  is writeable.
	  (vty_event) Added new VTYSH_WRITE event for flushing buffers to vtysh
	  clients.  Also, should save read thread in vty->t_read so the
	  thread can be cancelled in vty_close.
	* buffer.h: In struct buffer_data, remove unused "parent" field.
	  Convert "unsigned char *data" to "unsigned char data[0]" to save
	  a malloc.  Declare new function buffer_flush_available that works
	  with non-blocking sockets.
	* buffer.c: (buffer_data_new) Use a single malloc now that data is
	  a variable-size array at end of structure.
	  (buffer_data_free) Just a single free now that data is part of the
	  structure.
	  (buffer_write) Simplify the logic to make behavior more transparent.
	  (buffer_flush) Decrease b->length as data is written out.
	  (buffer_flush_vty_all) Decrease b->length as buffers are freed.
	  (buffer_flush_vty) Decrease b->length as data is written out.
	  (buffer_flush_available) New function to flush non-blocking sockets.
diff --git a/lib/ChangeLog b/lib/ChangeLog
index 0e4a223..75ff444 100644
--- a/lib/ChangeLog
+++ b/lib/ChangeLog
@@ -1,3 +1,31 @@
+2004-11-04 Andrew J. Schorr <ajschorr@alumni.princeton.edu>
+
+	* vty.c: Vtysh connections to daemons should use buffering.
+	  (vty_out) Remove exception for vty_shell_serv, just use buffer_write.
+	  (vty_new) Increase output buffer size to 4096 rounded up to a
+	  multiple of pagesize.
+	  (vtysh_read) After command has been executed and all output buffered,
+	  call buffer_flush_available and schedule further writes if the
+	  buffers are not yet empty.
+	  (vtysh_write) New function to flush output to vtysh when the socket
+	  is writeable.
+	  (vty_event) Added new VTYSH_WRITE event for flushing buffers to vtysh
+	  clients.  Also, should save read thread in vty->t_read so the
+	  thread can be cancelled in vty_close.
+	* buffer.h: In struct buffer_data, remove unused "parent" field.
+	  Convert "unsigned char *data" to "unsigned char data[0]" to save
+	  a malloc.  Declare new function buffer_flush_available that works
+	  with non-blocking sockets.
+	* buffer.c: (buffer_data_new) Use a single malloc now that data is
+	  a variable-size array at end of structure.
+	  (buffer_data_free) Just a single free now that data is part of the
+	  structure.
+	  (buffer_write) Simplify the logic to make behavior more transparent.
+	  (buffer_flush) Decrease b->length as data is written out.
+	  (buffer_flush_vty_all) Decrease b->length as buffers are freed.
+	  (buffer_flush_vty) Decrease b->length as data is written out.
+	  (buffer_flush_available) New function to flush non-blocking sockets.
+	  
 2004-11-01 Paul Jakma <paul@dishone.st>
 
 	* sockopt.c: (setsockopt_pktinfo) remove, its unused.
diff --git a/lib/buffer.c b/lib/buffer.c
index 904b4aa..9d931a9 100644
--- a/lib/buffer.c
+++ b/lib/buffer.c
@@ -24,25 +24,22 @@
 
 #include "memory.h"
 #include "buffer.h"
+#include <stddef.h>
 
 /* Make buffer data. */
-struct buffer_data *
+static struct buffer_data *
 buffer_data_new (size_t size)
 {
   struct buffer_data *d;
 
-  d = XMALLOC (MTYPE_BUFFER_DATA, sizeof (struct buffer_data));
-  memset (d, 0, sizeof (struct buffer_data));
-  d->data = XMALLOC (MTYPE_BUFFER_DATA, size);
-
+  d = XMALLOC (MTYPE_BUFFER_DATA, offsetof(struct buffer_data,data[size]));
+  d->cp = d->sp = 0;
   return d;
 }
 
-void
+static void
 buffer_data_free (struct buffer_data *d)
 {
-  if (d->data)
-    XFREE (MTYPE_BUFFER_DATA, d->data);
   XFREE (MTYPE_BUFFER_DATA, d);
 }
 
@@ -159,6 +156,8 @@
   /* We use even last one byte of data buffer. */
   while (size)    
     {
+      size_t chunk;
+
       /* If there is no data buffer add it. */
       if (data == NULL || data->cp == b->size)
 	{
@@ -166,23 +165,11 @@
 	  data = b->tail;
 	}
 
-      /* Last data. */
-      if (size <= (b->size - data->cp))
-	{
-	  memcpy ((data->data + data->cp), ptr, size);
-
-	  data->cp += size;
-	  size = 0;
-	}
-      else
-	{
-	  memcpy ((data->data + data->cp), ptr, (b->size - data->cp));
-
-	  size -= (b->size - data->cp);
-	  ptr += (b->size - data->cp);
-
-	  data->cp = b->size;
-	}
+      chunk = ((size <= (b->size - data->cp)) ? size : (b->size - data->cp));
+      memcpy ((data->data + data->cp), ptr, chunk);
+      size -= chunk;
+      ptr += chunk;
+      data->cp += chunk;
     }
   return 1;
 }
@@ -235,6 +222,7 @@
 	{
 	  iovec[iov_index++].iov_len = size;
 	  data->sp += size;
+	  b->length -= size;
 	  if (data->sp == data->cp)
 	    data = data->next;
 	  break;
@@ -242,6 +230,7 @@
       else
 	{
 	  iovec[iov_index++].iov_len = data->cp - data->sp;
+	  b->length -= (data->cp - data->sp);
 	  size -= data->cp - data->sp;
 	  data->sp = data->cp;
 	}
@@ -369,6 +358,7 @@
 	b->tail = next;
       b->head = next;
 
+      b->length -= (out->cp-out->sp);
       buffer_data_free (out);
       b->alloc--;
     }
@@ -430,6 +420,7 @@
 	{
 	  iov[iov_index++].iov_len = size;
 	  data->sp += size;
+	  b->length -= size;
 	  if (data->sp == data->cp)
 	    data = data->next;
 	  break;
@@ -438,6 +429,7 @@
 	{
 	  iov[iov_index++].iov_len = data->cp - data->sp;
 	  size -= (data->cp - data->sp);
+	  b->length -= (data->cp - data->sp);
 	  data->sp = data->cp;
 	}
     }
@@ -566,3 +558,73 @@
 
   return buffer_flush_vty (b, fd, size, erase, no_more);
 }
+
+/* This function (unlike other buffer_flush* functions above) is designed
+to work with non-blocking sockets.  It does not attempt to write out
+all of the queued data, just a "big" chunk.  It returns 0 if it was
+able to empty out the buffers completely, or 1 if more flushing is
+required later. */
+int
+buffer_flush_available(struct buffer *b, int fd)
+{
+
+/* These are just reasonable values to make sure a significant amount of
+data is written.  There's no need to go crazy and try to write it all
+in one shot. */
+#ifdef IOV_MAX
+#define MAX_CHUNKS ((IOV_MAX >= 16) ? 16 : IOV_MAX)
+#else
+#define MAX_CHUNKS 16
+#endif
+#define MAX_FLUSH 131072
+
+  struct buffer_data *d;
+  struct buffer_data *next;
+  ssize_t written;
+  struct iovec iov[MAX_CHUNKS];
+  int iovcnt = 0;
+  size_t nbyte = 0;
+
+  for (d = b->head; d && (iovcnt < MAX_CHUNKS) && (nbyte < MAX_FLUSH);
+       d = d->next, iovcnt++)
+    {
+      iov[iovcnt].iov_base = d->data+d->sp;
+      nbyte += (iov[iovcnt].iov_len = d->cp-d->sp);
+    }
+
+  if ((written = writev(fd,iov,iovcnt)) < 0)
+    {
+      if ((errno != EAGAIN) && (errno != EINTR))
+        zlog_warn("buffer_flush_available write error on fd %d: %s",
+		  fd,strerror(errno));
+      return 1;
+    }
+
+  /* Free printed buffer data. */
+  for (d = b->head; (written > 0) && d; d = next)
+    {
+      if (written < d->cp-d->sp)
+        {
+	  d->sp += written;
+	  b->length -= written;
+	  return 1;
+	}
+
+      written -= (d->cp-d->sp);
+      next = d->next;
+      if (next)
+	next->prev = NULL;
+      else
+	b->tail = next;
+      b->head = next;
+
+      b->length -= (d->cp-d->sp);
+      buffer_data_free (d);
+      b->alloc--;
+    }
+
+  return (b->head != NULL);
+
+#undef MAX_CHUNKS
+#undef MAX_FLUSH
+}
diff --git a/lib/buffer.h b/lib/buffer.h
index 9d453b1..2acd571 100644
--- a/lib/buffer.h
+++ b/lib/buffer.h
@@ -33,7 +33,7 @@
   /* Current allocated data. */
   unsigned long alloc;
 
-  /* Total length of buffer. */
+  /* Size of each buffer_data chunk. */
   unsigned long size;
 
   /* For allocation. */
@@ -47,18 +47,17 @@
 /* Data container. */
 struct buffer_data
 {
-  struct buffer *parent;
   struct buffer_data *next;
   struct buffer_data *prev;
 
-  /* Acctual data stream. */
-  unsigned char *data;
-
   /* Current pointer. */
   unsigned long cp;
 
   /* Start pointer. */
   unsigned long sp;
+
+  /* Actual data stream (variable length). */
+  unsigned char data[0];  /* real dimension is buffer->size */
 };
 
 /* Buffer prototypes. */
@@ -74,4 +73,11 @@
 int buffer_flush_window (struct buffer *, int, int, int, int, int);
 int buffer_empty (struct buffer *);
 
+/* buffer_flush_available attempts to flush the queued data to the given
+   file descriptor.  It returns 0 if the buffers are now empty (after
+   flushing), or 1 if more data remains on the buffer queue (must be flushed
+   later).  This function (unlike the other buffer_flush* functions) is
+   designed to work with non-blocking file descriptors. */
+int buffer_flush_available(struct buffer *, int fd);
+
 #endif /* _ZEBRA_BUFFER_H */
diff --git a/lib/vty.c b/lib/vty.c
index e37c99f..e6bcb61 100644
--- a/lib/vty.c
+++ b/lib/vty.c
@@ -45,10 +45,14 @@
   VTY_TIMEOUT_RESET,
 #ifdef VTYSH
   VTYSH_SERV,
-  VTYSH_READ
+  VTYSH_READ,
+  VTYSH_WRITE
 #endif /* VTYSH */
 };
 
+/* Minimum size of output buffers; to be rounded up to multiple of pagesize. */
+#define VTY_OBUF_SIZE	4096
+
 static void vty_event (enum event, int, struct vty *);
 
 /* Extern host structure from command.c */
@@ -127,10 +131,7 @@
 	p = buf;
 
       /* Pointer p must point out buffer. */
-      if (vty_shell_serv (vty))
-	write (vty->fd, (u_char *) p, len);
-      else
-	buffer_write (vty->obuf, (u_char *) p, len);
+      buffer_write (vty->obuf, (u_char *) p, len);
 
       /* If p is not different with buf, it is allocated buffer.  */
       if (p != buf)
@@ -264,8 +265,9 @@
 vty_new ()
 {
   struct vty *new = XCALLOC (MTYPE_VTY, sizeof (struct vty));
+  int pgsz = getpagesize();
 
-  new->obuf = (struct buffer *) buffer_new (100);
+  new->obuf = (struct buffer *) buffer_new ((((VTY_OBUF_SIZE-1)/pgsz)+1)*pgsz);
   new->buf = XCALLOC (MTYPE_VTY, VTY_BUFSIZ);
   new->max = VTY_BUFSIZ;
   new->sb_buffer = NULL;
@@ -2014,12 +2016,25 @@
 #endif /* VTYSH_DEBUG */
 
   header[3] = ret;
-  write (vty->fd, header, 4);
+  buffer_write(vty->obuf, header, 4);
 
+  if (!vty->t_write && buffer_flush_available(vty->obuf, vty->fd))
+    vty_event (VTYSH_WRITE, vty->fd, vty);
   vty_event (VTYSH_READ, sock, vty);
 
   return 0;
 }
+
+static int
+vtysh_write (struct thread *thread)
+{
+  struct vty *vty = THREAD_ARG (thread);
+
+  vty->t_write = NULL;
+  if (buffer_flush_available(vty->obuf, vty->fd))
+    vty_event (VTYSH_WRITE, vty->fd, vty);
+}
+
 #endif /* VTYSH */
 
 /* Determine address family to bind. */
@@ -2364,7 +2379,10 @@
       thread_add_read (master, vtysh_accept, vty, sock);
       break;
     case VTYSH_READ:
-      thread_add_read (master, vtysh_read, vty, sock);
+      vty->t_read = thread_add_read (master, vtysh_read, vty, sock);
+      break;
+    case VTYSH_WRITE:
+      vty->t_write = thread_add_write (master, vtysh_write, vty, sock);
       break;
 #endif /* VTYSH */
     case VTY_READ: