Skip to content

Commit b979233

Browse files
author
Jonathan Chauncey
committed
fix(telegraf) Document agent configuration
fixes #48
1 parent fca9ea7 commit b979233

2 files changed

Lines changed: 88 additions & 73 deletions

File tree

telegraf/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,22 @@ You must do 2 things if you want to receive host level metrics from telegraf.
1111
* - Set the volume mounts for both `/sys` and `/proc`
1212
* - Set the environment variables `HOST_PROC` and `HOST_SYS` to the values of where `/sys` and `/proc` are mounted in the container. Example entries can be found in the [manifest](manifests/deis-monitor-telegraf-daemon.yaml).
1313

14+
## Environment Variables
15+
The configuration is driven via environment variables which are published to the `config.toml` file passed to telegraf when it starts. The following table gives the environment variable name and the default value if it is not set.
16+
17+
| Name | Default | Description |
18+
|-----------|---------------|---------------|
19+
| AGENT_INTERVAL | 10s | Default data collection interval for all inputs |
20+
| AGENT_ROUND_INTERVAL | true | Rounds collection interval to 'interval' ie, if interval="10s" then always collect on :00, :10, :20, etc. |
21+
| AGENT_BUFFER_LIMIT | 10000 | Telegraf will cache metric_buffer_limit metrics for each output, and will flush this buffer on a successful write. |
22+
| AGENT_COLLECTION_JITTER | 0s | Collection jitter is used to jitter the collection by a random amount. Each plugin will sleep for a random time within jitter before collecting. This can be used to avoid many plugins querying things like sysfs at the same time, which can have a measurable effect on the system. |
23+
| AGENT_FLUSH_INTERVAL | 10s | Default data flushing interval for all outputs. You should not set this below interval. Maximum flush_interval will be flush_interval + flush_jitter |
24+
| AGENT_FLUSH_JITTER | 0s | Jitter the flush interval by a random amount. This is primarily to avoid large write spikes for users running a large number of telegraf instances. ie, a jitter of 5s and flush_interval 10s means flushes will happen every 10-15s. |
25+
| AGENT_DEBUG | false | Run telegraf in debug mode. |
26+
| AGENT_QUIET | false | Run telegraf in quiet mode. |
27+
| AGENT_HOSTNAME | NodeName | Override default hostname |
28+
29+
1430
## Development
1531
There is a make file provided with the project that can build the image, push it to a registry, and deploy it to a kubernetes cluster.
1632

telegraf/rootfs/config.toml.tpl

Lines changed: 72 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -3,96 +3,96 @@
33

44
# Set Agent Configuration
55
[agent]
6-
interval = "{{ default "10s" .AGENT_INTERVAL}}"
7-
round_interval = {{ default "true" .AGENT_ROUND_INTERVAL }}
6+
interval = {{ default "10s" .AGENT_INTERVAL | quote }}
7+
round_interval = {{ default true .AGENT_ROUND_INTERVAL }}
88
metric_buffer_limit = {{ default "10000" .AGENT_BUFFER_LIMIT }}
9-
collection_jitter = "{{ default "0s" .AGENT_COLLECTION_JITTER }}"
10-
flush_interval = "{{ default "10s" .AGENT_FLUSH_INTERVAL }}"
11-
flush_jitter = "{{ default "0s" .AGENT_FLUSH_JITTER }}"
12-
debug = {{ default "false" .AGENT_DEBUG }}
13-
quiet = {{ default "false" .AGENT_QUIET }}
14-
{{ if .AGENT_HOSTNAME }}hostname = "{{ .AGENT_HOSTNAME }}"{{ end }}
9+
collection_jitter = {{ default "0s" .AGENT_COLLECTION_JITTER | quote }}
10+
flush_interval = {{ default "10s" .AGENT_FLUSH_INTERVAL | quote }}
11+
flush_jitter = {{ default "0s" .AGENT_FLUSH_JITTER | quote }}
12+
debug = {{ default false .AGENT_DEBUG }}
13+
quiet = {{ default false .AGENT_QUIET }}
14+
{{ if .AGENT_HOSTNAME }}hostname = {{ .AGENT_HOSTNAME | quote }} {{ end }}
1515

1616
# Set output configuration
1717
{{ if .AMON_INSTANCE }}
1818
[[outputs.amon]]
19-
server_key = "{{ .AMON_SERVER_KEY }}"
20-
amon_instance = "{{ .AMON_INSTANCE }}"
21-
timeout = "{{ default "5s" .AMON_TIMEOUT }}"
19+
server_key = {{ .AMON_SERVER_KEY | quote }}
20+
amon_instance = {{ .AMON_INSTANCE | quote }}
21+
timeout = {{ default "5s" .AMON_TIMEOUT | quote }}
2222
{{ end }}
2323

2424
{{ if .AMQP_URL }}
2525
[[outputs.amqp]]
26-
url = "{{ .AMQP_URL }}"
27-
exchange = "{{ default "telegraf" .AMQP_EXCHANGE }}"
28-
routing_tag = "{{ default "host" .AMQP_ROUTING_TAG }}"
29-
{{ if .AMQP_SSL_CA }} ssl_ca = "{{ .AMQP_SSL_CA }}" {{ end }}
30-
{{ if .AMQP_SSL_CERT }} ssl_cert = "{{ .AMQP_SSL_CERT }}" {{ end }}
31-
{{ if .AMQP_SSL_KEY }} ssl_key = "{{ .AMQP_SSL_KEY }}" {{ end }}
32-
retention_policy = "{{ default "default" .AMQP_RETENTION_POLICY }}"
33-
database = = "{{ default "telegraf" .AMQP_DATABASE }}"
34-
precision = "{{ default "s" .AMQP_PRECISION }}"
26+
url = {{ .AMQP_URL | quote }}
27+
exchange = {{ default "telegraf" .AMQP_EXCHANGE | quote }}
28+
routing_tag = {{ default "host" .AMQP_ROUTING_TAG | quote }}
29+
{{ if .AMQP_SSL_CA }} ssl_ca = {{ .AMQP_SSL_CA | quote }} {{ end }}
30+
{{ if .AMQP_SSL_CERT }} ssl_cert = {{ .AMQP_SSL_CERT | quote }} {{ end }}
31+
{{ if .AMQP_SSL_KEY }} ssl_key = {{ .AMQP_SSL_KEY | quote }} {{ end }}
32+
retention_policy = {{ default "default" .AMQP_RETENTION_POLICY | quote }}
33+
database = = {{ default "telegraf" .AMQP_DATABASE | quote }}
34+
precision = {{ default "s" .AMQP_PRECISION | quote }}
3535
{{ end }}
3636

3737
{{ if .DATADOG_API_KEY }}
3838
[[outputs.datadog]]
39-
apikey = "{{ .DATADOG_API_KEY }}"
39+
apikey = {{ .DATADOG_API_KEY | quote }}
4040
{{ end }}
4141

4242
{{ if .GRAPHITE_SERVERS }}
4343
[[outputs.graphite]]
4444
servers = [{{ .GRAPHITE_SERVERS }}]
45-
prefix = "{{ default "\"\"" .GRAPHITE_PREFIX }}"
45+
prefix = {{ default "" .GRAPHITE_PREFIX | quote }}
4646
timeout = {{ default 2 .GRAPHITE_TIMEOUT }}
4747
{{ end }}
4848

4949
{{ if .INFLUXDB_URLS}}
5050
[[outputs.influxdb]]
51-
urls = ["{{ .INFLUXDB_URLS}}"]
52-
database = "{{default "telegraf" .INFLUXDB_DATABASE }}"
53-
precision = "{{ default "s" .INFLUXDB_PRECISION }}"
54-
timeout = "{{ default "5s" .INFLUXDB_TIMEOUT }}"
55-
{{ if .INFLUXDB_USERNAME}} username = {{ .INFLUXDB_USERNAME }} {{ end }}
56-
{{ if .INFLUXDB_PASSWORD}} password = {{ .INFLUXDB_PASSWORD }} {{ end }}
57-
{{ if .INFLUXDB_USER_AGENT}} user_agent = {{ .INFLUXDB_USER_AGENT }} {{ end }}
58-
{{ if .INFLUXDB_UDP_PAYLOAD}} udp_payload = {{ .INFLUXDB_UDP_PAYLOAD }} {{ end }}
51+
urls = [{{ .INFLUXDB_URLS | quote }}]
52+
database = {{default "telegraf" .INFLUXDB_DATABASE | quote }}
53+
precision = {{ default "s" .INFLUXDB_PRECISION | quote }}
54+
timeout = {{ default "5s" .INFLUXDB_TIMEOUT | quote }}
55+
{{ if .INFLUXDB_USERNAME}} username = {{ .INFLUXDB_USERNAME | quote }} {{ end }}
56+
{{ if .INFLUXDB_PASSWORD}} password = {{ .INFLUXDB_PASSWORD | quote }} {{ end }}
57+
{{ if .INFLUXDB_USER_AGENT}} user_agent = {{ .INFLUXDB_USER_AGENT | quote }} {{ end }}
58+
{{ if .INFLUXDB_UDP_PAYLOAD}} udp_payload = {{ .INFLUXDB_UDP_PAYLOAD | quote }} {{ end }}
5959
{{ end }}
6060

6161
{{ if .KAFKA_BROKERS}}
6262
[[outputs.kafka]]
63-
brokers = [{{ .KAFKA_BROKERS }}]
64-
topic = "{{ default "telegraf" .KAFKA_TOPIC }}"
65-
routing_tag = "{{ .KAFKA_ROUTING_TAG }}"
66-
certificate = "{{ .KAFKA_CERTIFICATE }}"
67-
key = "{{ .KAFKA_KEY }}"
68-
ca = "{{ .KAFKA_CA }}"
69-
verify_ssl = {{ .KAFKA_VERIFY_SSL }}
63+
brokers = [{{ .KAFKA_BROKERS | quote }}]
64+
topic = {{ default "telegraf" .KAFKA_TOPIC | quote }}
65+
routing_tag = {{ .KAFKA_ROUTING_TAG | quote }}
66+
{{ if .KAFKA_CERTIFICATE }} certificate = {{ .KAFKA_CERTIFICATE | quote }} {{ end }}
67+
{{ if .KAFKA_KEY }} key = {{ .KAFKA_KEY | quote }} {{ end }}
68+
{{ if .KAFKA_CA }} ca = {{ .KAFKA_CA | quote }} {{ end }}
69+
{{ if .KAFKA_VERIFY_SSL }} verify_ssl = {{ .KAFKA_VERIFY_SSL }} {{ end }}
7070
{{ end }}
7171

7272
{{ if .LIBRATO_API_TOKEN }}
7373
[[outputs.librato]]
74-
api_user = "{{ .LIBRATO_API_USER }}"
75-
api_token = "{{ .LIBRATO_API_TOKEN }}"
76-
source_tag = "{{ .LIBRATO_SOURCE_TAG }}"
74+
api_user = {{ .LIBRATO_API_USER | quote }}
75+
api_token = {{ .LIBRATO_API_TOKEN | quote }}
76+
source_tag = {{ .LIBRATO_SOURCE_TAG | quote }}
7777
{{ end }}
7878

7979
{{ if .NSQ_SERVER }}
8080
[[outputs.nsq]]
81-
server = "{{ .NSQ_SERVER }}"
82-
topic = "{{ .NSQ_TOPIC }}"
81+
server = {{ .NSQ_SERVER | quote }}
82+
topic = {{ .NSQ_TOPIC | quote }}
8383
{{ end }}
8484

8585
{{ if .OPEN_TSDB_HOST }}
86-
prefix = "{{ .OPEN_TSDB_PREFIX }}"
87-
host = "{{ .OPEN_TSDB_HOST }}"
86+
prefix = {{ .OPEN_TSDB_PREFIX | quote }}
87+
host = {{ .OPEN_TSDB_HOST | quote }}
8888
port = {{ .OPEN_TSDB_PORT }}
8989
debug = {{ .OPEN_TSDB_DEUBG }}
9090
{{ end }}
9191

9292
{{ if .RIEMANN_URL }}
9393
[[outputs.riemann]]
94-
url = "{{ .RIEMANN_URL }}"
95-
transport = "{{ .RIEMANN_TRANSPORT }}"
94+
url = {{ .RIEMANN_URL | quote }}
95+
transport = {{ .RIEMANN_TRANSPORT | quote }}
9696
{{ end }}
9797

9898
# Set Input Configuration
@@ -103,7 +103,6 @@
103103
[[inputs.cpu]]
104104
percpu = true
105105
totalcpu = true
106-
drop = ["time_*"]
107106

108107
{{ if .ENABLE_ZFS }}[[inputs.zfs]]{{ end }}
109108

@@ -131,7 +130,7 @@
131130

132131
{{ if .DOCKER_ENDPOINT }}
133132
[[inputs.docker]]
134-
endpoint = "{{ .DOCKER_ENDPOINT }}"
133+
endpoint = {{ .DOCKER_ENDPOINT | quote }}
135134
container_names = [{{ if .DOCKER_CONTAINER_NAMES }}{{ .DOCKER_CONTAINER_NAMES }}{{ end }}]
136135
{{ end }}
137136

@@ -149,18 +148,18 @@
149148

150149
{{ if .HTTP_JSON_SERVERS }}
151150
[[inputs.httpjson]]
152-
name = "{{.HTTP_JSON_NAME}}"
153-
servers = [{{.HTTP_JSON_SERVERS}}]
154-
method = "{{.HTTP_JSON_METHOD}}"
155-
tag_keys = [{{.HTTP_JSON_TAG_KEYS}}]
151+
name = {{ .HTTP_JSON_NAME | quote }}
152+
servers = [{{ .HTTP_JSON_SERVERS }}]
153+
method = {{ .HTTP_JSON_METHOD | quote }}
154+
tag_keys = [{{ .HTTP_JSON_TAG_KEYS }}]
156155
[json.parameters]
157-
event_type = "{{.HTTP_JSON_EVENT_TYPE}}"
158-
threshold = "{{.HTTP_JSON_THRESHOLD}}"
156+
event_type = {{ .HTTP_JSON_EVENT_TYPE | quote }}
157+
threshold = {{ .HTTP_JSON_THRESHOLD | quote }}
159158
{{end}}
160159

161160
{{ if .INFLUXDB_URLS }}
162161
[[inputs.influxdb]]
163-
urls = ["{{ .INFLUXDB_URLS }}/debug/vars"]
162+
urls = [{{ .INFLUXDB_URLS }}/debug/vars"]
164163
{{ end }}
165164
166165
{{ if .MEMCACHED_SERVERS }}
@@ -190,22 +189,22 @@
190189
191190
{{ if .POSTGRESQL_ADDRESS }}
192191
[[inputs.postgresql]]
193-
address = "{{ .POSTGRESQL_ADDRESS }}"
192+
address = {{ .POSTGRESQL_ADDRESS | quote }}
194193
{{ if .POSTGRESQL_DATABASES }} databases = [{{ .POSTGRESQL_DATABASES }}] {{ end }}
195194
{{ end }}
196195
197196
{{ if .PROMETHEUS_URLS }}
198197
[[inputs.prometheus]]
199198
urls = [{{ .PROMETHEUS_URLS }}]
200-
bearer_token = "{{ .PROMETHEUS_BEARER_TOKEN }}"
199+
bearer_token = {{ .PROMETHEUS_BEARER_TOKEN | quote }}
201200
{{ end }}
202201
203202
{{ if .RABBITMQ_URL }}
204203
[[inputs.rabbitmq]]
205-
url = "{{ .RABBITMQ_URL }}"
206-
{{ if .RABBITMQ_NAME}} name = "{{ .RABBITMQ_NAME }}" {{ end }}
207-
{{ if .RABBITMQ_USERNAME }} username = "{{ .RABBITMQ_USERNAME }}" {{ end }}
208-
{{ if .RABBITMQ_PASSWORD }} password = "{{ .RABBITMQ_PASSWORD }}" {{ end }}
204+
url = {{ .RABBITMQ_URL | quote }}
205+
{{ if .RABBITMQ_NAME}} name = {{ .RABBITMQ_NAME | quote }} {{ end }}
206+
{{ if .RABBITMQ_USERNAME }} username = {{ .RABBITMQ_USERNAME | quote }} {{ end }}
207+
{{ if .RABBITMQ_PASSWORD }} password = {{ .RABBITMQ_PASSWORD | quote }} {{ end }}
209208
{{ if .RABBITMQ_NODES }} nodes = [{{ .RABBITMQ_NODES}}] {{ end }}
210209
{{ end }}
211210
@@ -229,22 +228,22 @@
229228
[[inputs.kafka_consumer]]
230229
topics = [{{ .KAFKA_CONSUMER_TOPICS }}]
231230
zookeeper_peers = [{{ .KAFKA_CONSUMER_ZOOKEEPER_PEERS }}]
232-
consumer_group = "{{ .KAFKA_CONSUMER_GROUP }}"
231+
consumer_group = {{ .KAFKA_CONSUMER_GROUP | quote }}
233232
point_buffer = {{ .KAFKA_CONSUMER_POINT_BUFFER }}
234-
offset = "{{ .KAFKA_CONSUMER_OFFSET }}"
233+
offset = {{ .KAFKA_CONSUMER_OFFSET | quote }}
235234
{{ end }}
236235
237236
{{ if .STATSD_SERVICE_ADDRESS }}
238237
[[inputs.statsd]]
239-
service_address = "{{ .STATSD_SERVICE_ADDRESS }}"
240-
delete_gauges = {{ default "false" .STATSD_DELETE_GAUGES }}
241-
delete_counters {{ default "false" .STATSD_DELETE_COUNTERS }}
242-
delete_sets = {{ default "false" .STATSD_DELETE_SETS }}
243-
delete_timings = {{ default "true" .STATSD_DELETE_TIMINGS}}
244-
percentiles = [{{ default "90" .STATSD_PERCENTILES }}]
245-
convert_names = {{ default "true" .STATSD_CONVERT_NAMES }}
238+
service_address = {{ .STATSD_SERVICE_ADDRESS | quote }}
239+
delete_gauges = {{ default false .STATSD_DELETE_GAUGES }}
240+
delete_counters {{ default false .STATSD_DELETE_COUNTERS }}
241+
delete_sets = {{ default false .STATSD_DELETE_SETS }}
242+
delete_timings = {{ default true .STATSD_DELETE_TIMINGS}}
243+
percentiles = [{{ default 90 .STATSD_PERCENTILES }}]
244+
convert_names = {{ default true .STATSD_CONVERT_NAMES }}
246245
{{ if .STATSD_TEMPLATES}} templates = [{{ .STATSD_TEMPLATES }}] {{ end }}
247-
allowed_pending_messages = {{ default "10000" .STATSD_ALLOWED_PENDING_MESSAGES }}
248-
percentile_limit = {{ default "1000" .STATSD_PERCENTILE_LIMIT }}
249-
udp_packet_size = {{ default "1500" .STATSD_UDP_PACKET_SIZE }}
246+
allowed_pending_messages = {{ default 10000 .STATSD_ALLOWED_PENDING_MESSAGES }}
247+
percentile_limit = {{ default 1000 .STATSD_PERCENTILE_LIMIT }}
248+
udp_packet_size = {{ default 1500 .STATSD_UDP_PACKET_SIZE }}
250249
{{ end }}

0 commit comments

Comments
 (0)