VOL-393 VOL-395 This update implements a replicated filesystem allowing
fluentd logs to be accessible from all cluster servers. The replicated
filesystem also limits the aggregate size of the logs to limit the
amount of disk space that they consume. This update also
creates a barrier filesystem for consul ensuring that the consul data
can't be affected by a disk full condition. Finally, some serious
changes were made to the fluentd clustering implememtation which was
very sub-optimal and wouldn't start correctly 9 times out of 10.
Added improved debygguing log collection that checks if the log file is
still growing before it kills the collection process. Added consul
release 0.9.2 to the production containers list. Removed fluent/fluentd
from containers.cfg. Added configuration options to the installer to set
the size of the barrier files.

Change-Id: I89389d76b081b0e6c3961e62d24277b1d2cf3cfb
diff --git a/compose/docker-compose-fluentd-agg-cluster.yml b/compose/docker-compose-fluentd-agg-cluster.yml
index 1e74e79..3c6bafe 100644
--- a/compose/docker-compose-fluentd-agg-cluster.yml
+++ b/compose/docker-compose-fluentd-agg-cluster.yml
@@ -13,30 +13,34 @@
 #
 
   fluentdactv:
-    image: fluent/fluentd
+    image: cord/fluentd
     deploy:
       replicas: 1
       restart_policy:
         condition: on-failure
+    environment:
+        FLUENTD_CONF: fluent-agg.conf
     networks:
       - voltha-net
     ports:
       - "24224"
     volumes:
-      - "/var/log/voltha:/fluentd/log"
+      - "/var/log/voltha/logging_volume:/fluentd/log"
 
   fluentdstby:
-    image: fluent/fluentd
+    image: cord/fluentd
     deploy:
       replicas: 1
       restart_policy:
         condition: on-failure
+    environment:
+        FLUENTD_CONF: fluent-agg.conf
     networks:
       - voltha-net
     ports:
       - "24224"
     volumes:
-      - "/var/log/voltha:/fluentd/log"
+      - "/var/log/voltha/logging_volume:/fluentd/log"
 
 networks:
   voltha-net:
diff --git a/compose/docker-compose-fluentd-cluster.yml b/compose/docker-compose-fluentd-cluster.yml
index 257dbd4..f8250a2 100644
--- a/compose/docker-compose-fluentd-cluster.yml
+++ b/compose/docker-compose-fluentd-cluster.yml
@@ -21,10 +21,11 @@
         condition: on-failure
     environment:
         SERVICE_24224_NAME: "fluentd-intake"
+        FLUENTD_CONF: fluent.conf
     networks:
       - voltha-net
     ports:
-    - "24224:24224"
+    - "24224"
 
 
 networks:
diff --git a/docker/Dockerfile.fluentd b/docker/Dockerfile.fluentd
index 560e005..0ca694f 100644
--- a/docker/Dockerfile.fluentd
+++ b/docker/Dockerfile.fluentd
@@ -18,3 +18,4 @@
 #DockerFile to Create Fluentd Forwards inside cord-voltha
 FROM fluent/fluentd
 COPY fluentd_config/fluent.conf /fluentd/etc/
+COPY fluentd_config/fluent-agg.conf /fluentd/etc/
diff --git a/fluentd_config/fluent-agg.conf b/fluentd_config/fluent-agg.conf
new file mode 100644
index 0000000..c201df8
--- /dev/null
+++ b/fluentd_config/fluent-agg.conf
@@ -0,0 +1,16 @@
+# Configuration for the Fluentd Forwarders, it collects logs from voltha components and sends to Fluentd Log Aggregrators.
+# TCP input
+<source>
+  @type forward
+  port 24224
+</source>
+<match **>
+  @type file
+
+  path /fluentd/log/voltha
+  buffer_path /fluentd/log/*.log
+  append true
+  time_slice_wait 10m
+  time_format %Y%m%dT%H%M%S%z
+  time_slice_format %Y%m%d%H
+</match>
diff --git a/fluentd_config/fluent.conf b/fluentd_config/fluent.conf
index 0beab19..79e6590 100644
--- a/fluentd_config/fluent.conf
+++ b/fluentd_config/fluent.conf
@@ -6,6 +6,7 @@
 </source>
 <match **>
   @type forward
+  heartbeat_type tcp
 
   # primary host
   <server>
diff --git a/install/ansible/group_vars/all b/install/ansible/group_vars/all
index 6a3164e..9ab3722 100644
--- a/install/ansible/group_vars/all
+++ b/install/ansible/group_vars/all
@@ -5,9 +5,12 @@
 docker_push_registry: "vinstall1001:5000"
 cord_home: /home/volthainstall/cord
 target_voltha_dir: /cord/incubator/voltha
-replicated_fs_dir: /var/cache
+barrier_fs_dir: /var/cache
 docker_py_version: "1.7.0"
 netifaces_version: "0.10.4"
 target_voltha_home: /home/voltha
 docker_daemon_json: daemon.json
 docker_daemon_json_dest: /etc/docker
+registry_volume_size: 5
+consul_volume_size: 5
+logger_volume_size: 20
diff --git a/install/ansible/roles/cluster-host/tasks/main.yml b/install/ansible/roles/cluster-host/tasks/main.yml
index 17bcd43..834d34a 100644
--- a/install/ansible/roles/cluster-host/tasks/main.yml
+++ b/install/ansible/roles/cluster-host/tasks/main.yml
@@ -77,6 +77,16 @@
     links: yes
   tags: [cluster_host]
 
+#- name: upstart barrier filesystem loop mount script is installed
+#  copy:
+#    src: "/home/vinstall/losetup.conf"
+#    dest: /etc/init
+#    owner: root
+#    group: root
+#    mode: 0644
+#  when: target == "cluster"
+#  tags: [cluster_host]
+
 #- name: pre-emptive strike to avoid errors during package installation
 #  apt:
 #    name: "{{ item }}"
@@ -121,41 +131,75 @@
   when: target == "cluster"
   tags: [cluster_host]
 
-- name: Replicated filesystem file is created
-  command: "dd if=/dev/zero of={{ replicated_fs_dir }}/.cluster-fs-file bs=100M count=100"
+- name: Logging barrier file is created
+  command: "dd if=/dev/zero of={{ barrier_fs_dir }}/.logger-barrier-file cbs=100M bs=1G count={{ logger_volume_size }}"
   when: target == "cluster"
   tags: [cluster_host]
 
-- name: The loop device is set up for file system creation
-  command: "losetup -f {{ replicated_fs_dir }}/.cluster-fs-file"
+- name: Registry barrier file is created
+  command: "dd if=/dev/zero of={{ barrier_fs_dir }}/.registry-barrier-file cbs=100M bs=1G count={{ registry_volume_size }}"
   when: target == "cluster"
   tags: [cluster_host]
 
-- name: The xfs filesystem is created on the loop device
+- name: Consul barrier file is created
+  command: "dd if=/dev/zero of={{ barrier_fs_dir }}/.consul-barrier-file cbs=100M bs=1G count={{ consul_volume_size }}"
+  when: target == "cluster"
+  tags: [cluster_host]
+
+- name: The logging barrier file is set up as a loop device
+  command: "losetup /dev/loop0 {{ barrier_fs_dir }}/.logger-barrier-file"
+  when: target == "cluster"
+  tags: [cluster_host]
+
+- name: The registry barrier file is set up as a loop device
+  command: "losetup /dev/loop1 {{ barrier_fs_dir }}/.registry-barrier-file"
+  when: target == "cluster"
+  tags: [cluster_host]
+
+- name: The consul barrier file is set up as a loop device
+  command: "losetup /dev/loop2 {{ barrier_fs_dir }}/.consul-barrier-file"
+  when: target == "cluster"
+  tags: [cluster_host]
+
+- name: The xfs filesystem is created on the replicated barrier file systems
   filesystem:
     fstype: xfs
-    dev: /dev/loop0
+    dev: "{{ item }}"
     opts: -i size=512
+  with_items:
+    - /dev/loop0
+    - /dev/loop1
   when: target == "cluster"
   tags: [cluster_host]
 
-- name: The loop device that's no longer needed is removed
+- name: The ext4 filesystem is created on the consul barrier volume
+  filesystem:
+    fstype: ext4
+    dev: /dev/loop2
+  when: target == "cluster"
+  tags: [cluster_host]
+
+- name: The loop devices that are no longer needed are removed
   command: "losetup -D"
   when: target == "cluster"
   tags: [cluster_host]
 
-- name: The registry fileystem file is owned by voltha
+- name: The barrier fileystem files are owned by voltha
   file:
-    path: "{{ replicated_fs_dir }}/.cluster-fs-file"
+    path: "{{ barrier_fs_dir }}/{{ item }}"
     mode: 0755
     owner: voltha
     group: voltha
+  with_items:
+    - ".registry-barrier-file"
+    - ".logger-barrier-file"
+    - ".consul-barrier-file"
   when: target == "cluster"
   tags: [cluster_host]
 
-- name: A brick for a glusterfs mountpoint is created
+- name: A mountpoint for the glusterfs registry brick is created
   file:
-    path: "{{ replicated_fs_dir }}/brick1"
+    path: "{{ barrier_fs_dir }}/reg_brick1"
     state: directory
     mode: 0755
     owner: voltha
@@ -163,19 +207,49 @@
   when: target == "cluster"
   tags: [cluster_host]
 
-- name: The replicated filesystem is mounted on boot
+- name: A mountpoint for the glusterfs logging brick is created
+  file:
+    path: "{{ barrier_fs_dir }}/log_brick1"
+    state: directory
+    mode: 0755
+    owner: voltha
+    group: voltha
+  when: target == "cluster"
+  tags: [cluster_host]
+
+- name: The replicated registry filesystem is mounted on boot
   mount:
-    path: "{{ replicated_fs_dir }}/brick1"
-    src: "{{ replicated_fs_dir }}/.cluster-fs-file"
+    path: "{{ barrier_fs_dir }}/reg_brick1"
+    src: "{{ barrier_fs_dir }}/.registry-barrier-file"
     fstype: xfs
     opts: loop
     state: mounted
   when: target == "cluster"
   tags: [cluster_host]
 
-- name: A directory for the glusterfs volume is created
+- name: The replicated logger filesystem is mounted on boot
+  mount:
+    path: "{{ barrier_fs_dir }}/log_brick1"
+    src: "{{ barrier_fs_dir }}/.logger-barrier-file"
+    fstype: xfs
+    opts: loop
+    state: mounted
+  when: target == "cluster"
+  tags: [cluster_host]
+
+- name: A directory for the registry glusterfs volume is created
   file:
-    path: "{{ replicated_fs_dir }}/brick1/registry_volume"
+    path: "{{ barrier_fs_dir }}/reg_brick1/registry_volume"
+    state: directory
+    mode: 0755
+    owner: voltha
+    group: voltha
+  when: target == "cluster"
+  tags: [cluster_host]
+
+- name: A directory for the logging glusterfs volume is created
+  file:
+    path: "{{ barrier_fs_dir }}/log_brick1/logging_volume"
     state: directory
     mode: 0755
     owner: voltha
@@ -197,6 +271,16 @@
   when: target == "cluster"
   tags: [cluster_host]
 
+- name: The consul data filesystem is mounted on boot
+  mount:
+    path: "{{ target_voltha_dir }}/consul/data"
+    src: "{{ barrier_fs_dir }}/.consul-barrier-file"
+    fstype: ext4
+    opts: loop
+    state: mounted
+  when: target == "cluster"
+  tags: [cluster_host]
+
 - name: The glusterfs service is started
   service:
     name: glusterfs-server
@@ -205,7 +289,7 @@
   when: target == "cluster"
   tags: [cluster_host]
 
-- name: The replicated filesystem is mounted on boot
+- name: The replicated registry filesystem is mounted on boot
   mount:
     path: "{{ target_voltha_dir }}/registry_data"
     src: "{{ inventory_hostname }}:/registry_volume"
@@ -214,3 +298,13 @@
     state: present
   when: target == "cluster"
   tags: [cluster_host]
+
+- name: The replicated logging filesystem is mounted on boot
+  mount:
+    path: "/var/log/voltha"
+    src: "{{ inventory_hostname }}:/logging_volume"
+    fstype: glusterfs
+    opts:  "defaults,_netdev,noauto,x-systemd.automount"
+    state: present
+  when: target == "cluster"
+  tags: [cluster_host]
diff --git a/install/ansible/roles/common/defaults/main.yml b/install/ansible/roles/common/defaults/main.yml
index 8012978..c3dd982 100644
--- a/install/ansible/roles/common/defaults/main.yml
+++ b/install/ansible/roles/common/defaults/main.yml
@@ -20,6 +20,8 @@
   - glusterfs-server
   - glusterfs-client
   - attr
+  - openntpd
+  - ntpdate
 #  - python-libpcap
 
 obsolete_services:
diff --git a/install/ansible/roles/glusterfs/tasks/main.yml b/install/ansible/roles/glusterfs/tasks/main.yml
index e744d75..b8e17da 100644
--- a/install/ansible/roles/glusterfs/tasks/main.yml
+++ b/install/ansible/roles/glusterfs/tasks/main.yml
@@ -1,6 +1,6 @@
-- name: The glusterfs volume is created
+- name: The glusterfs registry volume is created
   gluster_volume:
-    bricks: "{{ replicated_fs_dir }}/brick1"
+    bricks: "{{ barrier_fs_dir }}/reg_brick1"
     force: true
     cluster: "{{groups.cluster | join(\",\")}}"
     replicas: 3
@@ -8,7 +8,21 @@
     state: present
   run_once: true
 
-- name: Start the gluster volume
+- name: The glusterfs logging volume is created
   gluster_volume:
-    name: registry_volume
+    bricks: "{{ barrier_fs_dir }}/log_brick1"
+    force: true
+    cluster: "{{groups.cluster | join(\",\")}}"
+    disperses: 3
+    name: logging_volume
+    state: present
+  run_once: true
+
+- name: Start the gluster volumes
+  gluster_volume:
+    name: "{{ item }}"
     state: started
+  with_items:
+    - registry_volume
+    - logging_volume
+
diff --git a/install/ansible/roles/installer/tasks/main.yml b/install/ansible/roles/installer/tasks/main.yml
index 0b1666e..5db778b 100644
--- a/install/ansible/roles/installer/tasks/main.yml
+++ b/install/ansible/roles/installer/tasks/main.yml
@@ -27,6 +27,7 @@
     - install/get-logs.sh
     - install/get-host-logs.sh
     - install/ansible
+    - install/losetup.conf
     - compose
     - consul_config/basic.json
   tags: [installer]
diff --git a/install/containers.cfg b/install/containers.cfg
index be0409e..c92eba1 100644
--- a/install/containers.cfg
+++ b/install/containers.cfg
@@ -8,8 +8,7 @@
   - cord/chameleon:latest
   - cord/voltha:latest
   - cord/voltha-base:latest
-  - fluent/fluentd:latest
-  - consul:latest
+  - consul:0.9.2
   - wurstmeister/kafka:latest
   - zookeeper:latest
   - gliderlabs/registrator:master
diff --git a/install/get-host-logs.sh b/install/get-host-logs.sh
index 3e0e443..6c627cb 100755
--- a/install/get-host-logs.sh
+++ b/install/get-host-logs.sh
@@ -54,7 +54,7 @@
 patience=5
 while [ "${#lNames[*]}" -ne 0  ]
 do
-	echo "*** Waiting on log collection to complete. Outstanding jobs: ${#lNames[*]} (${lNames[@]})"
+	echo "*** Waiting on log collection to complete (patience = ${patience}). Outstanding jobs: ${#lNames[*]} (${lNames[@]})"
 	sleep 10
 	# Check which collectors are done are remove them from the list
 	jobs > /dev/null # Don't delete this useless line or the next one will eroniously report a PID
diff --git a/install/get-logs.sh b/install/get-logs.sh
index 8843497..2733b88 100644
--- a/install/get-logs.sh
+++ b/install/get-logs.sh
@@ -123,7 +123,7 @@
 
 popd
 pushd ${volthaDir}/registry_data/registry_volume
-tar cjvf ${volthaDir}/logs.tar.bz2 log_tmp/*
+tar cjvf ${volthaDir}/logs.tar`date "+%Y%m%d-%H:%M:%S"`.bz2 log_tmp/*
 rm -fr log_tmp
 popd
 
diff --git a/install/install.cfg b/install/install.cfg
index 80bbc67..964800f 100644
--- a/install/install.cfg
+++ b/install/install.cfg
@@ -3,3 +3,10 @@
 #
 # Configure the user name to initilly log into those hosts as.
 # iUser="vagrant"
+# The maximum storage allocated for the logs in GB
+# logLimit=20
+# The maximum storage allocated for the local registry in GB
+# regLimit=5
+# The space reserved for Consul's storage
+# consulLimit=5
+
diff --git a/install/installer.sh b/install/installer.sh
index 77c25a5..4279c38 100755
--- a/install/installer.sh
+++ b/install/installer.sh
@@ -26,6 +26,18 @@
 	exit
 fi
 
+# Configure barrier file sizes but only if a value was provided in the config file
+
+if [ -v logLimit ]; then
+	sed -i -e "/logger_volume_size/s/.*/logger_volume_size: ${logLimit}/" ansible/group_vars/all
+fi
+if [ -v regLimit ]; then
+	sed -i -e "/registry_volume_size/s/.*/registry_volume_size: ${regLimit}/" ansible/group_vars/all
+fi
+if [ -v consulLimit ]; then
+	sed -i -e "/consul_volume_size/s/.*/consul_volume_size: ${consulLimit}/" ansible/group_vars/all
+fi
+
 # Create the key directory
 mkdir .keys