VOL-393 VOL-395 This update implements a replicated filesystem allowing
fluentd logs to be accessible from all cluster servers. The replicated
filesystem also limits the aggregate size of the logs to limit the
amount of disk space that they consume. This update also
creates a barrier filesystem for consul ensuring that the consul data
can't be affected by a disk full condition. Finally, some serious
changes were made to the fluentd clustering implememtation which was
very sub-optimal and wouldn't start correctly 9 times out of 10.
Added improved debygguing log collection that checks if the log file is
still growing before it kills the collection process. Added consul
release 0.9.2 to the production containers list. Removed fluent/fluentd
from containers.cfg. Added configuration options to the installer to set
the size of the barrier files.
Change-Id: I89389d76b081b0e6c3961e62d24277b1d2cf3cfb
diff --git a/compose/docker-compose-fluentd-agg-cluster.yml b/compose/docker-compose-fluentd-agg-cluster.yml
index 1e74e79..3c6bafe 100644
--- a/compose/docker-compose-fluentd-agg-cluster.yml
+++ b/compose/docker-compose-fluentd-agg-cluster.yml
@@ -13,30 +13,34 @@
#
fluentdactv:
- image: fluent/fluentd
+ image: cord/fluentd
deploy:
replicas: 1
restart_policy:
condition: on-failure
+ environment:
+ FLUENTD_CONF: fluent-agg.conf
networks:
- voltha-net
ports:
- "24224"
volumes:
- - "/var/log/voltha:/fluentd/log"
+ - "/var/log/voltha/logging_volume:/fluentd/log"
fluentdstby:
- image: fluent/fluentd
+ image: cord/fluentd
deploy:
replicas: 1
restart_policy:
condition: on-failure
+ environment:
+ FLUENTD_CONF: fluent-agg.conf
networks:
- voltha-net
ports:
- "24224"
volumes:
- - "/var/log/voltha:/fluentd/log"
+ - "/var/log/voltha/logging_volume:/fluentd/log"
networks:
voltha-net:
diff --git a/compose/docker-compose-fluentd-cluster.yml b/compose/docker-compose-fluentd-cluster.yml
index 257dbd4..f8250a2 100644
--- a/compose/docker-compose-fluentd-cluster.yml
+++ b/compose/docker-compose-fluentd-cluster.yml
@@ -21,10 +21,11 @@
condition: on-failure
environment:
SERVICE_24224_NAME: "fluentd-intake"
+ FLUENTD_CONF: fluent.conf
networks:
- voltha-net
ports:
- - "24224:24224"
+ - "24224"
networks:
diff --git a/docker/Dockerfile.fluentd b/docker/Dockerfile.fluentd
index 560e005..0ca694f 100644
--- a/docker/Dockerfile.fluentd
+++ b/docker/Dockerfile.fluentd
@@ -18,3 +18,4 @@
#DockerFile to Create Fluentd Forwards inside cord-voltha
FROM fluent/fluentd
COPY fluentd_config/fluent.conf /fluentd/etc/
+COPY fluentd_config/fluent-agg.conf /fluentd/etc/
diff --git a/fluentd_config/fluent-agg.conf b/fluentd_config/fluent-agg.conf
new file mode 100644
index 0000000..c201df8
--- /dev/null
+++ b/fluentd_config/fluent-agg.conf
@@ -0,0 +1,16 @@
+# Configuration for the Fluentd Forwarders, it collects logs from voltha components and sends to Fluentd Log Aggregrators.
+# TCP input
+<source>
+ @type forward
+ port 24224
+</source>
+<match **>
+ @type file
+
+ path /fluentd/log/voltha
+ buffer_path /fluentd/log/*.log
+ append true
+ time_slice_wait 10m
+ time_format %Y%m%dT%H%M%S%z
+ time_slice_format %Y%m%d%H
+</match>
diff --git a/fluentd_config/fluent.conf b/fluentd_config/fluent.conf
index 0beab19..79e6590 100644
--- a/fluentd_config/fluent.conf
+++ b/fluentd_config/fluent.conf
@@ -6,6 +6,7 @@
</source>
<match **>
@type forward
+ heartbeat_type tcp
# primary host
<server>
diff --git a/install/ansible/group_vars/all b/install/ansible/group_vars/all
index 6a3164e..9ab3722 100644
--- a/install/ansible/group_vars/all
+++ b/install/ansible/group_vars/all
@@ -5,9 +5,12 @@
docker_push_registry: "vinstall1001:5000"
cord_home: /home/volthainstall/cord
target_voltha_dir: /cord/incubator/voltha
-replicated_fs_dir: /var/cache
+barrier_fs_dir: /var/cache
docker_py_version: "1.7.0"
netifaces_version: "0.10.4"
target_voltha_home: /home/voltha
docker_daemon_json: daemon.json
docker_daemon_json_dest: /etc/docker
+registry_volume_size: 5
+consul_volume_size: 5
+logger_volume_size: 20
diff --git a/install/ansible/roles/cluster-host/tasks/main.yml b/install/ansible/roles/cluster-host/tasks/main.yml
index 17bcd43..834d34a 100644
--- a/install/ansible/roles/cluster-host/tasks/main.yml
+++ b/install/ansible/roles/cluster-host/tasks/main.yml
@@ -77,6 +77,16 @@
links: yes
tags: [cluster_host]
+#- name: upstart barrier filesystem loop mount script is installed
+# copy:
+# src: "/home/vinstall/losetup.conf"
+# dest: /etc/init
+# owner: root
+# group: root
+# mode: 0644
+# when: target == "cluster"
+# tags: [cluster_host]
+
#- name: pre-emptive strike to avoid errors during package installation
# apt:
# name: "{{ item }}"
@@ -121,41 +131,75 @@
when: target == "cluster"
tags: [cluster_host]
-- name: Replicated filesystem file is created
- command: "dd if=/dev/zero of={{ replicated_fs_dir }}/.cluster-fs-file bs=100M count=100"
+- name: Logging barrier file is created
+ command: "dd if=/dev/zero of={{ barrier_fs_dir }}/.logger-barrier-file cbs=100M bs=1G count={{ logger_volume_size }}"
when: target == "cluster"
tags: [cluster_host]
-- name: The loop device is set up for file system creation
- command: "losetup -f {{ replicated_fs_dir }}/.cluster-fs-file"
+- name: Registry barrier file is created
+ command: "dd if=/dev/zero of={{ barrier_fs_dir }}/.registry-barrier-file cbs=100M bs=1G count={{ registry_volume_size }}"
when: target == "cluster"
tags: [cluster_host]
-- name: The xfs filesystem is created on the loop device
+- name: Consul barrier file is created
+ command: "dd if=/dev/zero of={{ barrier_fs_dir }}/.consul-barrier-file cbs=100M bs=1G count={{ consul_volume_size }}"
+ when: target == "cluster"
+ tags: [cluster_host]
+
+- name: The logging barrier file is set up as a loop device
+ command: "losetup /dev/loop0 {{ barrier_fs_dir }}/.logger-barrier-file"
+ when: target == "cluster"
+ tags: [cluster_host]
+
+- name: The registry barrier file is set up as a loop device
+ command: "losetup /dev/loop1 {{ barrier_fs_dir }}/.registry-barrier-file"
+ when: target == "cluster"
+ tags: [cluster_host]
+
+- name: The consul barrier file is set up as a loop device
+ command: "losetup /dev/loop2 {{ barrier_fs_dir }}/.consul-barrier-file"
+ when: target == "cluster"
+ tags: [cluster_host]
+
+- name: The xfs filesystem is created on the replicated barrier file systems
filesystem:
fstype: xfs
- dev: /dev/loop0
+ dev: "{{ item }}"
opts: -i size=512
+ with_items:
+ - /dev/loop0
+ - /dev/loop1
when: target == "cluster"
tags: [cluster_host]
-- name: The loop device that's no longer needed is removed
+- name: The ext4 filesystem is created on the consul barrier volume
+ filesystem:
+ fstype: ext4
+ dev: /dev/loop2
+ when: target == "cluster"
+ tags: [cluster_host]
+
+- name: The loop devices that are no longer needed are removed
command: "losetup -D"
when: target == "cluster"
tags: [cluster_host]
-- name: The registry fileystem file is owned by voltha
+- name: The barrier fileystem files are owned by voltha
file:
- path: "{{ replicated_fs_dir }}/.cluster-fs-file"
+ path: "{{ barrier_fs_dir }}/{{ item }}"
mode: 0755
owner: voltha
group: voltha
+ with_items:
+ - ".registry-barrier-file"
+ - ".logger-barrier-file"
+ - ".consul-barrier-file"
when: target == "cluster"
tags: [cluster_host]
-- name: A brick for a glusterfs mountpoint is created
+- name: A mountpoint for the glusterfs registry brick is created
file:
- path: "{{ replicated_fs_dir }}/brick1"
+ path: "{{ barrier_fs_dir }}/reg_brick1"
state: directory
mode: 0755
owner: voltha
@@ -163,19 +207,49 @@
when: target == "cluster"
tags: [cluster_host]
-- name: The replicated filesystem is mounted on boot
+- name: A mountpoint for the glusterfs logging brick is created
+ file:
+ path: "{{ barrier_fs_dir }}/log_brick1"
+ state: directory
+ mode: 0755
+ owner: voltha
+ group: voltha
+ when: target == "cluster"
+ tags: [cluster_host]
+
+- name: The replicated registry filesystem is mounted on boot
mount:
- path: "{{ replicated_fs_dir }}/brick1"
- src: "{{ replicated_fs_dir }}/.cluster-fs-file"
+ path: "{{ barrier_fs_dir }}/reg_brick1"
+ src: "{{ barrier_fs_dir }}/.registry-barrier-file"
fstype: xfs
opts: loop
state: mounted
when: target == "cluster"
tags: [cluster_host]
-- name: A directory for the glusterfs volume is created
+- name: The replicated logger filesystem is mounted on boot
+ mount:
+ path: "{{ barrier_fs_dir }}/log_brick1"
+ src: "{{ barrier_fs_dir }}/.logger-barrier-file"
+ fstype: xfs
+ opts: loop
+ state: mounted
+ when: target == "cluster"
+ tags: [cluster_host]
+
+- name: A directory for the registry glusterfs volume is created
file:
- path: "{{ replicated_fs_dir }}/brick1/registry_volume"
+ path: "{{ barrier_fs_dir }}/reg_brick1/registry_volume"
+ state: directory
+ mode: 0755
+ owner: voltha
+ group: voltha
+ when: target == "cluster"
+ tags: [cluster_host]
+
+- name: A directory for the logging glusterfs volume is created
+ file:
+ path: "{{ barrier_fs_dir }}/log_brick1/logging_volume"
state: directory
mode: 0755
owner: voltha
@@ -197,6 +271,16 @@
when: target == "cluster"
tags: [cluster_host]
+- name: The consul data filesystem is mounted on boot
+ mount:
+ path: "{{ target_voltha_dir }}/consul/data"
+ src: "{{ barrier_fs_dir }}/.consul-barrier-file"
+ fstype: ext4
+ opts: loop
+ state: mounted
+ when: target == "cluster"
+ tags: [cluster_host]
+
- name: The glusterfs service is started
service:
name: glusterfs-server
@@ -205,7 +289,7 @@
when: target == "cluster"
tags: [cluster_host]
-- name: The replicated filesystem is mounted on boot
+- name: The replicated registry filesystem is mounted on boot
mount:
path: "{{ target_voltha_dir }}/registry_data"
src: "{{ inventory_hostname }}:/registry_volume"
@@ -214,3 +298,13 @@
state: present
when: target == "cluster"
tags: [cluster_host]
+
+- name: The replicated logging filesystem is mounted on boot
+ mount:
+ path: "/var/log/voltha"
+ src: "{{ inventory_hostname }}:/logging_volume"
+ fstype: glusterfs
+ opts: "defaults,_netdev,noauto,x-systemd.automount"
+ state: present
+ when: target == "cluster"
+ tags: [cluster_host]
diff --git a/install/ansible/roles/common/defaults/main.yml b/install/ansible/roles/common/defaults/main.yml
index 8012978..c3dd982 100644
--- a/install/ansible/roles/common/defaults/main.yml
+++ b/install/ansible/roles/common/defaults/main.yml
@@ -20,6 +20,8 @@
- glusterfs-server
- glusterfs-client
- attr
+ - openntpd
+ - ntpdate
# - python-libpcap
obsolete_services:
diff --git a/install/ansible/roles/glusterfs/tasks/main.yml b/install/ansible/roles/glusterfs/tasks/main.yml
index e744d75..b8e17da 100644
--- a/install/ansible/roles/glusterfs/tasks/main.yml
+++ b/install/ansible/roles/glusterfs/tasks/main.yml
@@ -1,6 +1,6 @@
-- name: The glusterfs volume is created
+- name: The glusterfs registry volume is created
gluster_volume:
- bricks: "{{ replicated_fs_dir }}/brick1"
+ bricks: "{{ barrier_fs_dir }}/reg_brick1"
force: true
cluster: "{{groups.cluster | join(\",\")}}"
replicas: 3
@@ -8,7 +8,21 @@
state: present
run_once: true
-- name: Start the gluster volume
+- name: The glusterfs logging volume is created
gluster_volume:
- name: registry_volume
+ bricks: "{{ barrier_fs_dir }}/log_brick1"
+ force: true
+ cluster: "{{groups.cluster | join(\",\")}}"
+ disperses: 3
+ name: logging_volume
+ state: present
+ run_once: true
+
+- name: Start the gluster volumes
+ gluster_volume:
+ name: "{{ item }}"
state: started
+ with_items:
+ - registry_volume
+ - logging_volume
+
diff --git a/install/ansible/roles/installer/tasks/main.yml b/install/ansible/roles/installer/tasks/main.yml
index 0b1666e..5db778b 100644
--- a/install/ansible/roles/installer/tasks/main.yml
+++ b/install/ansible/roles/installer/tasks/main.yml
@@ -27,6 +27,7 @@
- install/get-logs.sh
- install/get-host-logs.sh
- install/ansible
+ - install/losetup.conf
- compose
- consul_config/basic.json
tags: [installer]
diff --git a/install/containers.cfg b/install/containers.cfg
index be0409e..c92eba1 100644
--- a/install/containers.cfg
+++ b/install/containers.cfg
@@ -8,8 +8,7 @@
- cord/chameleon:latest
- cord/voltha:latest
- cord/voltha-base:latest
- - fluent/fluentd:latest
- - consul:latest
+ - consul:0.9.2
- wurstmeister/kafka:latest
- zookeeper:latest
- gliderlabs/registrator:master
diff --git a/install/get-host-logs.sh b/install/get-host-logs.sh
index 3e0e443..6c627cb 100755
--- a/install/get-host-logs.sh
+++ b/install/get-host-logs.sh
@@ -54,7 +54,7 @@
patience=5
while [ "${#lNames[*]}" -ne 0 ]
do
- echo "*** Waiting on log collection to complete. Outstanding jobs: ${#lNames[*]} (${lNames[@]})"
+ echo "*** Waiting on log collection to complete (patience = ${patience}). Outstanding jobs: ${#lNames[*]} (${lNames[@]})"
sleep 10
# Check which collectors are done are remove them from the list
jobs > /dev/null # Don't delete this useless line or the next one will eroniously report a PID
diff --git a/install/get-logs.sh b/install/get-logs.sh
index 8843497..2733b88 100644
--- a/install/get-logs.sh
+++ b/install/get-logs.sh
@@ -123,7 +123,7 @@
popd
pushd ${volthaDir}/registry_data/registry_volume
-tar cjvf ${volthaDir}/logs.tar.bz2 log_tmp/*
+tar cjvf ${volthaDir}/logs.tar`date "+%Y%m%d-%H:%M:%S"`.bz2 log_tmp/*
rm -fr log_tmp
popd
diff --git a/install/install.cfg b/install/install.cfg
index 80bbc67..964800f 100644
--- a/install/install.cfg
+++ b/install/install.cfg
@@ -3,3 +3,10 @@
#
# Configure the user name to initilly log into those hosts as.
# iUser="vagrant"
+# The maximum storage allocated for the logs in GB
+# logLimit=20
+# The maximum storage allocated for the local registry in GB
+# regLimit=5
+# The space reserved for Consul's storage
+# consulLimit=5
+
diff --git a/install/installer.sh b/install/installer.sh
index 77c25a5..4279c38 100755
--- a/install/installer.sh
+++ b/install/installer.sh
@@ -26,6 +26,18 @@
exit
fi
+# Configure barrier file sizes but only if a value was provided in the config file
+
+if [ -v logLimit ]; then
+ sed -i -e "/logger_volume_size/s/.*/logger_volume_size: ${logLimit}/" ansible/group_vars/all
+fi
+if [ -v regLimit ]; then
+ sed -i -e "/registry_volume_size/s/.*/registry_volume_size: ${regLimit}/" ansible/group_vars/all
+fi
+if [ -v consulLimit ]; then
+ sed -i -e "/consul_volume_size/s/.*/consul_volume_size: ${consulLimit}/" ansible/group_vars/all
+fi
+
# Create the key directory
mkdir .keys