Easy Uptime Heartbeat Http Server Node Package
Uptime Host Availability - Hand-on Elastic Heartbeat Setup And Watcher alerting Combination
Heartbeat is a lightweight daemon that you install on a remote server to periodically check the status of your services and determine whether they are available. Unlike Metricbeat, which only tells you if your servers are up or down, Heartbeat tells you whether your services are reachable.
Heartbeat is useful when you need to verify that you're meeting your service level agreements for service uptime. It's also useful for other scenarios, such as security use cases when you need to verify that no one from the outside can access services on your private enterprise server.
You can configure Heartbeat to ping all DNS-resolvable IP addresses for a specified hostname. That way, you can check all services that are load-balanced to see if they are available.
When you configure Heartbeat, you specify monitors that identify the hostnames that you want to check. Each monitor runs based on the schedule that you specify. For example, you can configure one monitor to run every 10 minutes, and a different monitor to run between the hours of 9:00 and 17:00.
Heartbeat currently supports monitors for checking hosts or endpoint via:
- ICMP (v4 and v6) Echo Requests. Use the
icmpmonitor when you simply want to check whether a service is available. This monitor requires root access. - TCP. Use the
tcpmonitor to connect via TCP. You can optionally configure this monitor to verify the endpoint by sending and/or receiving a custom payload. - HTTP. Use the
httpmonitor to connect via HTTP. You can optionally configure this monitor to verify that the service returns the expected response, such as a specific status code, response header, or content.
Below I will show you how the magic is
- Heartbeat installation:
I will skip the heartbeat installation here. You can refer to the page.
2. Heartbeat monitor: look at the example closely and you will find that I have set up a few fields there like env, tags, team, etc. That is the magical part of monitoring. We will talk about it more at a later step.
#HTTP example
#Check if url response 200
---
- type: http
schedule: '@every 60s'
urls: [
"https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"
]
check.response.status: 200
fields:
env: Production
tags: image
site: UK
team: TeamA
issue: page is unreachable
fields_under_root: true --- #TCP example
#Check if Port is open
#You can setup multiple hosts and ports
---
- type: tcp
schedule: '@every 60s'
hosts: [
"host1",
"host2"
]
ports: [4369, 5672, 15672, 25672]
ipv4: true
mode: any
timeout: 16s
fields:
env: Production
tags: RabbitMQ
site: USA
fields_under_root: true
After heartbeat is up and running, you can check the detail from Uptime view and data structure
3. Setup watcher to generate alert for notification, the key point is that using top_hits aggregation to bring more details, then use the field you bring in for the alerts
# Http monitor alert rule example
PUT _watcher/watch/heartbeat-http-monitor-test
{
"trigger": {
"schedule": {
"interval": "1m"
}
},
"input": {
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"<heartbeat-*>"
],
"body": {
"size": 1,
"query": {
"bool": {
"must": [
{
"term": {
"monitor.status": {
"value": "down"
}
}
},
{
"term": {
"monitor.type": {
"value": "http"
}
}
},
{
"term": {
"env": {
"value": "Production"
}
}
}
],
"filter": {
"range": {
"@timestamp": {
"lte": "now",
"gte": "now-5m"
}
}
}
}
},
"aggs": {
"host": {
"terms": {
"field": "url.domain",
"size": 10
},
"aggs": {
### using top_hits aggregation more details for generating alert
"top_hits": {
"top_hits": {
"size": 1,
"_source": {
"includes": [
"error",
"http",
"agent.hostname",
"monitor",
"sd",
"issue",
"team",
"url",
"sd",
"site",
"env"
]
}
}
}
}
}
}
}
}
}
},
"condition": {
"script": {
"source": "return ctx.payload.hits.total > params.threshold",
"lang": "painless",
"params": {
"threshold": 0
}
}
},
"actions": {
"index_payload": {
"transform": {
"script": {
"source": """
def docs = [];
def hits = ctx.payload.aggregations.host.buckets;
def df = new DecimalFormat('##.###');
def threshold = 5;
def response_code = 0;
for (hit in hits)
{
def x = hit.top_hits.hits.hits.0._source;
def error_type = x.error.type;
def error_message = x.error.message;
def port = x.url.port;
def url = x.url.full;
def beat_host = x.agent.hostname;
def protocol = x.monitor.type;
def ip = x.monitor.ip;
def host = x.url.domain;
def status = x.monitor.status;
def host_id = x.monitor.id;
def issue = x.issue;
def supportowner;if (x.monitor.response_code != null){
if ( issue == null ) {
response_code = x.monitor.response_code;
}
issue = 'http test fail on target';
} if (x.team != null) {
supportowner = x.team;
} else {
supportowner = "TEAM-global";
}def document = [
'_id': host + '!' + protocol,
'id': host + '!' + protocol,
'@timestamp': ctx.execution_time,
'watcher_interval':'1m',
'cat1': host,
'cat2': 'application',
'cat3': 'server',
'tags': 'watcher',
'site': x.site,
'env': x.env,
'processing_state': 'unprocessed',
'alert_source': 'watcher',
'issue': issue,
'state': 'critical',
'supportowner': supportowner,
'ip': x.monitor.ip,
'error_type': error_type,
'description': error_message,
'port': port,
'url': url,
'probe': beat_host,
'protocol': protocol,
'host': host,
'status': status];
#TCP example
docs.add(document);
}
return ['_doc':docs];
""",
"lang": "painless"
}
},
"index": {
"index": "heartbeat-test" ### write to another index or you can use email/webhook/pagerduty actions
}
}
}
}
PUT _watcher/watch/heartbeat-port-monitor-test
{
"trigger": {
"schedule": {
"interval": "1m"
}
},
"input": {
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"<heartbeat-*>"
],
"body": {
"query": {
"bool": {
"must": [
{
"term": {
"monitor.status": {
"value": "down"
}
}
},
{
"term": {
"monitor.type": {
"value": "tcp"
}
}
},
{
"term": {
"env": {
"value": "Production"
}
}
}
],
"filter": {
"range": {
"@timestamp": {
"gte": "now-5m",
"lte": "now"
}
}
}
}
}
}
}
}
},
"condition": {
"script": {
"source": "return ctx.payload.hits.total > params.threshold",
"lang": "painless",
"params": {
"threshold": 0
}
}
},
"actions": {
"index_payload": {
"transform": {
"script": {
"source": """
def docs = [];
def hits = ctx.payload.hits.hits;
def supportowner;
for (hit in hits){
def x = hit._source;if ( x.team != null ) {
supportowner = x.team;
} else {
supportowner = "Team-global";
}
def document = [
'id': 'hearbeat-monitoring' + '!!' + x['tags'][0] + '-'+x['url']['domain']+'-'+x['url']['port'],
'watcher_interval':'1m',
'@timestamp':ctx.execution_time,
'cat1':x['url']['domain'],
'cat2': x['tags'][0],
'cat3': 'hearbeat-monitoring',
'tags': 'watcher',
'type': 'watcher',
'site': x.site,
'env': x.env,
'processing_state': 'unprocessed',
'alert_source': 'watcher',
'state': 'critical',
'support_owner': supportowner,
'description': x['error']['message'],
'host': x['url']['domain'],
'heartbeat_type': x['monitor']['type'],
'heartbeat_status': x['monitor']['status'],
'heartbeat_port': x['url']['port'],
'issue':'port:'+x['url']['port'] + ' check failed on ' + x['tags'][0] + '!service'
];
docs.add(document);
}
return ['_doc':docs];
""",
"lang": "painless"
}
},
"index": {
"index": "heartbeat-test"
}
}
}
}
4. In the end, you will have the beautiful alert with all the details you need. Your on-call engineer will be very happy for having such riches alerts. And the amazing thing is that you can minimize the time for finding out the issue is.
#http alert
---
{
"issue" : "page is unreachable",
"ip" : null,
"description" : "lookup www.testabcdomain.com.uk on 10.123.0.11:53: no such host",
"env" : "Production",
"processing_state" : "unprocessed",
"url" : "https://www.testabcdomain.com.uk/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png",
"tags" : "watcher",
"probe" : "server1",
"cat2" : "application",
"site" : "Asia",
"protocol" : "http",
"@timestamp" : "2020-03-03T15:35:12.132341Z",
"watcher_interval" : "1m",
"cat3" : "server",
"cat1" : "www.testabcdomain.com.uk",
"port" : 443,
"supportowner" : "TeamC",
"error_type" : "io",
"alert_source" : "watcher",
"host" : "www.testabcdomain.com.uk",
"id" : "www.testabcdomain.com.uk!http",
"state" : "critical",
"status" : "down"
} --- #TCP alert ---
{
"issue" : "port:5672 check failed on RabbitMQ!service",
"heartbeat_port" : 5672,
"description" : "lookup host1 on 10.62.4.12:53: server misbehaving",
"type" : "watcher",
"env" : "Production",
"processing_state" : "unprocessed",
"tags" : "watcher",
"cat2" : "RabbitMQ",
"site" : "USA",
"support_owner" : "Team-global",
"watcher_interval" : "1m",
"@timestamp" : "2020-03-03T15:37:21.221559Z",
"cat3" : "hearbeat-monitoring",
"cat1" : "host1",
"alert_source" : "watcher",
"host" : "host1",
"id" : "hearbeat-monitoring!!RabbitMQ-host1-5672",
"state" : "critical",
"heartbeat_status" : "down",
"heartbeat_type" : "tcp"
}
Follow us on Twitter 🐦 and Facebook 👥 and Instagram 📷 and join our Facebook and Linkedin Groups 💬.
To join our community Slack team chat 🗣️ read our weekly Faun topics 🗞️, and connect with the community 📣 click here⬇
If this post was helpful, please click the clap 👏 button below a few times to show your support for the author! ⬇
Source: https://faun.pub/uptime-host-availability-hand-on-elastic-heartbeat-setup-and-watcher-alerting-combination-1002f183a6b0
0 Response to "Easy Uptime Heartbeat Http Server Node Package"
Post a Comment