Skip to content

Commit 12a8b0b

Browse files
zhangeamonzhangeamon
authored andcommitted
2 parents a5eebbd + 1c9125f commit 12a8b0b

7 files changed

Lines changed: 1212 additions & 75 deletions

File tree

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
{
2+
"uid": "pod_cpu",
3+
"title": "pod cpu usage over 80%",
4+
"condition": "C",
5+
"data": [
6+
{
7+
"refId": "A",
8+
"relativeTimeRange": {
9+
"from": 600,
10+
"to": 0
11+
},
12+
"datasourceUid": "prometheus_on_drycc",
13+
"model": {
14+
"editorMode": "code",
15+
"expr": "(sum(rate(container_cpu_usage_seconds_total{image!=\"\"}[2m])) by (pod,namespace)/sum(kube_pod_container_resource_limits{resource=\"cpu\"}) by (pod,namespace) * 100) \u003e 80",
16+
"instant": true,
17+
"intervalMs": 1000,
18+
"legendFormat": "__auto",
19+
"maxDataPoints": 43200,
20+
"range": false,
21+
"refId": "A"
22+
}
23+
},
24+
{
25+
"refId": "C",
26+
"relativeTimeRange": {
27+
"from": 0,
28+
"to": 0
29+
},
30+
"datasourceUid": "__expr__",
31+
"model": {
32+
"conditions": [
33+
{
34+
"evaluator": {
35+
"params": [
36+
0
37+
],
38+
"type": "gt"
39+
},
40+
"operator": {
41+
"type": "and"
42+
},
43+
"query": {
44+
"params": [
45+
"C"
46+
]
47+
},
48+
"reducer": {
49+
"params": [],
50+
"type": "last"
51+
},
52+
"type": "query"
53+
}
54+
],
55+
"datasource": {
56+
"type": "__expr__",
57+
"uid": "__expr__"
58+
},
59+
"expression": "A",
60+
"intervalMs": 1000,
61+
"maxDataPoints": 43200,
62+
"refId": "C",
63+
"type": "threshold"
64+
}
65+
}
66+
],
67+
"noDataState": "OK",
68+
"execErrState": "KeepLast",
69+
"for": "2m",
70+
"ruleGroup": "middle",
71+
"annotations": {
72+
"description": "namespace: {{ $labels.namespace }}, pod: {{ $labels.pod }} 's cpu usage over {{$value }}%.",
73+
"summary": "CPU Usage of Pod limit on has exceeded 80%."
74+
},
75+
"isPaused": false,
76+
"notification_settings": {
77+
"receiver": "grafana-default-email"
78+
},
79+
"folderUID": "drycc"
80+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
{
2+
"uid": "pod_memory",
3+
"title": "pod memory usage over 80%",
4+
"condition": "C",
5+
"data": [
6+
{
7+
"refId": "A",
8+
"relativeTimeRange": {
9+
"from": 600,
10+
"to": 0
11+
},
12+
"datasourceUid": "prometheus_on_drycc",
13+
"model": {
14+
"editorMode": "code",
15+
"expr": "(sum(container_memory_working_set_bytes{pod!=\"\",container!=\"\"}) by (pod, namespace)) / (sum(container_spec_memory_limit_bytes{pod!=\"\"}) by (pod, namespace)) \u003e 0.8 and (sum(container_memory_working_set_bytes{pod!=\"\",container!=\"\"}) by (pod, namespace)) / (sum (container_spec_memory_limit_bytes{pod!=\"\"}) by (pod, namespace)) \u003c 2",
16+
"instant": true,
17+
"intervalMs": 1000,
18+
"legendFormat": "__auto",
19+
"maxDataPoints": 43200,
20+
"range": false,
21+
"refId": "A"
22+
}
23+
},
24+
{
25+
"refId": "C",
26+
"relativeTimeRange": {
27+
"from": 0,
28+
"to": 0
29+
},
30+
"datasourceUid": "__expr__",
31+
"model": {
32+
"conditions": [
33+
{
34+
"evaluator": {
35+
"params": [
36+
0
37+
],
38+
"type": "gt"
39+
},
40+
"operator": {
41+
"type": "and"
42+
},
43+
"query": {
44+
"params": [
45+
"C"
46+
]
47+
},
48+
"reducer": {
49+
"params": [],
50+
"type": "last"
51+
},
52+
"type": "query"
53+
}
54+
],
55+
"datasource": {
56+
"type": "__expr__",
57+
"uid": "__expr__"
58+
},
59+
"expression": "A",
60+
"intervalMs": 1000,
61+
"maxDataPoints": 43200,
62+
"refId": "C",
63+
"type": "threshold"
64+
}
65+
}
66+
],
67+
"noDataState": "OK",
68+
"execErrState": "KeepLast",
69+
"for": "2m",
70+
"ruleGroup": "middle",
71+
"keepFiringFor": "2m",
72+
"annotations": {
73+
"description": "namespace: {{ $labels.namespace }}, pod: {{ $labels.pod }} 's memory usage over {{ $value }}%.",
74+
"summary": "Memory Usage of Pod limit on has exceeded 80%."
75+
},
76+
"isPaused": false,
77+
"notification_settings": {
78+
"receiver": "grafana-default-email"
79+
},
80+
"folderUID": "drycc"
81+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
{
2+
"uid": "pod_restart",
3+
"title": "pod restart",
4+
"annotations": {
5+
"description": "namespace: {{ $labels.namespace }}, pod: {{ $labels.pod }} restart.",
6+
"summary": "Pod restart alert"
7+
},
8+
"condition": "C",
9+
"data": [
10+
{
11+
"datasourceUid": "prometheus_on_drycc",
12+
"model": {
13+
"editorMode": "code",
14+
"expr": "increase(kube_pod_container_status_restarts_total{pod!~\".*job.*\"}[30m]) > 0",
15+
"instant": true,
16+
"intervalMs": 1000,
17+
"legendFormat": "__auto",
18+
"maxDataPoints": 43200,
19+
"range": false,
20+
"refId": "A"
21+
},
22+
"queryType": "",
23+
"refId": "A",
24+
"relativeTimeRange": {
25+
"from": 600,
26+
"to": 0
27+
}
28+
},
29+
{
30+
"datasourceUid": "__expr__",
31+
"model": {
32+
"conditions": [
33+
{
34+
"evaluator": {
35+
"params": [
36+
0
37+
],
38+
"type": "gt"
39+
},
40+
"operator": {
41+
"type": "and"
42+
},
43+
"query": {
44+
"params": [
45+
"C"
46+
]
47+
},
48+
"reducer": {
49+
"params": [],
50+
"type": "last"
51+
},
52+
"type": "query"
53+
}
54+
],
55+
"datasource": {
56+
"type": "__expr__",
57+
"uid": "__expr__"
58+
},
59+
"expression": "A",
60+
"intervalMs": 1000,
61+
"maxDataPoints": 43200,
62+
"refId": "C",
63+
"type": "threshold"
64+
},
65+
"queryType": "",
66+
"refId": "C",
67+
"relativeTimeRange": {
68+
"from": 0,
69+
"to": 0
70+
}
71+
}
72+
],
73+
"execErrState": "OK",
74+
"folderUID": "drycc",
75+
"id": 1,
76+
"isPaused": false,
77+
"keep_firing_for": "2m",
78+
"noDataState": "OK",
79+
"notification_settings": {
80+
"receiver": "grafana-default-email"
81+
},
82+
"orgID": 1,
83+
"record": null,
84+
"ruleGroup": "middle",
85+
"for": "2m"
86+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
{
2+
"uid": "pod_start",
3+
"title": "pod start fail",
4+
"condition": "C",
5+
"data": [
6+
{
7+
"refId": "A",
8+
"relativeTimeRange": {
9+
"from": 600,
10+
"to": 0
11+
},
12+
"datasourceUid": "prometheus_on_drycc",
13+
"model": {
14+
"editorMode": "code",
15+
"expr": "kube_pod_status_phase{pod!~\".*job.*\", phase=~\"Failed|Unknown\"} == 1",
16+
"instant": true,
17+
"intervalMs": 1000,
18+
"legendFormat": "__auto",
19+
"maxDataPoints": 43200,
20+
"range": false,
21+
"refId": "A"
22+
}
23+
},
24+
{
25+
"refId": "C",
26+
"relativeTimeRange": {
27+
"from": 0,
28+
"to": 0
29+
},
30+
"datasourceUid": "__expr__",
31+
"model": {
32+
"conditions": [
33+
{
34+
"evaluator": {
35+
"params": [
36+
0
37+
],
38+
"type": "gt"
39+
},
40+
"operator": {
41+
"type": "and"
42+
},
43+
"query": {
44+
"params": [
45+
"C"
46+
]
47+
},
48+
"reducer": {
49+
"params": [],
50+
"type": "last"
51+
},
52+
"type": "query"
53+
}
54+
],
55+
"datasource": {
56+
"type": "__expr__",
57+
"uid": "__expr__"
58+
},
59+
"expression": "A",
60+
"intervalMs": 1000,
61+
"maxDataPoints": 43200,
62+
"refId": "C",
63+
"type": "threshold"
64+
}
65+
}
66+
],
67+
"noDataState": "OK",
68+
"execErrState": "KeepLast",
69+
"for": "2m",
70+
"ruleGroup": "middle",
71+
"annotations": {
72+
"description": "namespace: {{ $labels.namespace }}, pod: {{ $labels.pod }} start fail.",
73+
"summary": "Pod start fail"
74+
},
75+
"isPaused": false,
76+
"notification_settings": {
77+
"receiver": "grafana-default-email"
78+
},
79+
"folderUID": "drycc"
80+
}

0 commit comments

Comments
 (0)