Skip to content
Snippets Groups Projects
Commit 34ed8edb authored by Dennis Ahrens's avatar Dennis Ahrens
Browse files

Add mk_jolokia for JVM monitoring

parent 89d629c1
No related branches found
No related tags found
No related merge requests found
...@@ -77,6 +77,13 @@ Official checkmk plugin. Not really working right now. ...@@ -77,6 +77,13 @@ Official checkmk plugin. Not really working right now.
Installs the [mk_apt](https://checkmk.com/cms_check_apt.html) agent plugin Installs the [mk_apt](https://checkmk.com/cms_check_apt.html) agent plugin
### `checkmk.debian.mk_jolokia`
Installs the `mk_jolokia.py` plugin on your host.
This is capable of monitoring a JVM - e.g. inside tomcat as it relies on the [jolokia](https://jolokia.org/) war.
The plugin can be configured using a file located at `/etc/check_mk/jolokia.cfg`.
### `checkmk.debian.mk_haproxy` ### `checkmk.debian.mk_haproxy`
Installs the haproxy agent plugin. Installs the haproxy agent plugin.
......
#!/usr/bin/env python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# | ____ _ _ __ __ _ __ |
# | / ___| |__ ___ ___| | __ | \/ | |/ / |
# | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
# | | |___| | | | __/ (__| < | | | | . \ |
# | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
# | |
# | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation in version 2. check_mk is distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
# out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more de-
# tails. You should have received a copy of the GNU General Public
# License along with GNU Make; see the file COPYING. If not, write
# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301 USA.
import os
import socket
import sys
import urllib2
try:
try:
import simplejson as json
except ImportError:
import json
except ImportError, import_error:
sys.stdout.write(
"<<<jolokia_info>>>\n"
"Error: mk_jolokia requires either the json or simplejson library."
" Please either use a Python version that contains the json library or install the"
" simplejson library on the monitored system.\n")
sys.exit(1)
try:
import requests
from requests.auth import HTTPDigestAuth
from requests.packages import urllib3
except ImportError, import_error:
sys.stdout.write("<<<jolokia_info>>>\n"
"Error: mk_jolokia requires the requests library."
" Please install it on the monitored system.\n")
sys.exit(1)
VERBOSE = sys.argv.count('--verbose') + sys.argv.count('-v') + 2 * sys.argv.count('-vv')
DEBUG = sys.argv.count('--debug')
MBEAN_SECTIONS = {
'jvm_threading': ("java.lang:type=Threading",),
}
MBEAN_SECTIONS_SPECIFIC = {
'tomcat': {
'jvm_threading':
("*:name=*,type=ThreadPool/maxThreads,currentThreadCount,currentThreadsBusy/",),
},
}
QUERY_SPECS_LEGACY = [
("java.lang:type=Memory", "NonHeapMemoryUsage/used", "NonHeapMemoryUsage", [], False),
("java.lang:type=Memory", "NonHeapMemoryUsage/max", "NonHeapMemoryMax", [], False),
("java.lang:type=Memory", "HeapMemoryUsage/used", "HeapMemoryUsage", [], False),
("java.lang:type=Memory", "HeapMemoryUsage/max", "HeapMemoryMax", [], False),
("java.lang:type=Runtime", "Uptime", "Uptime", [], False),
("java.lang:type=GarbageCollector,name=*", "CollectionCount", "", [], False),
("java.lang:type=GarbageCollector,name=*", "CollectionTime", "", [], False),
("java.lang:name=CMS%20Perm%20Gen,type=MemoryPool", "Usage/used", "PermGenUsage", [], False),
("java.lang:name=CMS%20Perm%20Gen,type=MemoryPool", "Usage/max", "PermGenMax", [], False),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics", "OffHeapHits",
"", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics", "OnDiskHits",
"", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics",
"InMemoryHitPercentage", "", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics", "CacheMisses",
"", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics",
"OnDiskHitPercentage", "", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics",
"MemoryStoreObjectCount", "", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics",
"DiskStoreObjectCount", "", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics",
"CacheMissPercentage", "", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics",
"CacheHitPercentage", "", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics",
"OffHeapHitPercentage", "", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics",
"InMemoryMisses", "", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics",
"OffHeapStoreObjectCount", "", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics",
"WriterQueueLength", "", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics",
"WriterMaxQueueSize", "", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics", "OffHeapMisses",
"", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics", "InMemoryHits",
"", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics",
"AssociatedCacheName", "", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics", "ObjectCount",
"", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics", "OnDiskMisses",
"", [], True),
("net.sf.ehcache:CacheManager=CacheManagerApplication*,*,type=CacheStatistics", "CacheHits", "",
[], True),
]
QUERY_SPECS_SPECIFIC_LEGACY = {
"weblogic": [
("*:*", "CompletedRequestCount", None, ["ServerRuntime"], False),
("*:*", "QueueLength", None, ["ServerRuntime"], False),
("*:*", "StandbyThreadCount", None, ["ServerRuntime"], False),
("*:*", "PendingUserRequestCount", None, ["ServerRuntime"], False),
("*:Name=ThreadPoolRuntime,*", "ExecuteThreadTotalCount", None, ["ServerRuntime"], False),
("*:*", "ExecuteThreadIdleCount", None, ["ServerRuntime"], False),
("*:*", "HoggingThreadCount", None, ["ServerRuntime"], False),
("*:Type=WebAppComponentRuntime,*", "OpenSessionsCurrentCount", None,
["ServerRuntime", "ApplicationRuntime"], False),
],
"tomcat": [
("*:type=Manager,*", "activeSessions,maxActiveSessions", None, ["path", "context"], False),
("*:j2eeType=Servlet,name=default,*", "stateName", None, ["WebModule"], False),
# Check not yet working
("*:j2eeType=Servlet,name=default,*", "requestCount", None, ["WebModule"], False),
# too wide location for addressing the right info
# ( "*:j2eeType=Servlet,*", "requestCount", None, [ "WebModule" ] , False),
],
"jboss": [("*:type=Manager,*", "activeSessions,maxActiveSessions", None, ["path",
"context"], False),],
}
AVAILABLE_PRODUCTS = sorted(set(QUERY_SPECS_SPECIFIC_LEGACY.keys() +
MBEAN_SECTIONS_SPECIFIC.keys()))
# Default global configuration: key, value [, help]
DEFAULT_CONFIG_TUPLES = (
("protocol", "http", "Protocol to use (http/https)."),
("server", "localhost", "Host name or IP address of the Jolokia server."),
("port", 8080, "TCP Port of the Jolokia server."),
("suburi", "jolokia", "Path-component of the URI to query."),
("user", "monitoring", "Username to use for connecting."),
("password", None, "Password to use for connecting."),
("mode", "digest", "Authentication mode. Can be \"basic\", \"digest\" or \"https\"."),
("instance", None, "Name of the instance in the monitoring. Defaults to port."),
("verify", None),
("client_cert", None, "Path to client cert for https authentication."),
("client_key", None, "Client cert secret for https authentication."),
("service_url", None),
("service_user", None),
("service_password", None),
("product", None, "Product description. Available: %s. If not provided," \
" we try to detect the product from the jolokia info section." % \
", ".join(AVAILABLE_PRODUCTS)),
("timeout", 1.0, "Connection/read timeout for requests."),
("custom_vars", []),
# List of instances to monitor. Each instance is a dict where
# the global configuration values can be overridden.
("instances", [{}]),
)
class SkipInstance(RuntimeError):
pass
class SkipMBean(RuntimeError):
pass
def get_default_config_dict():
return dict(tup[:2] for tup in DEFAULT_CONFIG_TUPLES)
def write_section(name, iterable):
sys.stdout.write('<<<%s:sep(0)>>>\n' % name)
for line in iterable:
sys.stdout.write(chr(0).join(map(str, line)) + '\n')
def cached(function):
cache = {}
def cached_function(*args):
key = repr(args)
try:
return cache[key]
except KeyError:
return cache.setdefault(key, function(*args))
return cached_function
class JolokiaInstance(object):
@staticmethod
def _sanitize_config(config):
instance = config.get("instance")
err_msg = "%s in configuration"
if instance:
err_msg += " for %s" % instance
required_keys = set(("protocol", "server", "port", "suburi", "timeout"))
auth_mode = config.get("mode")
if auth_mode in ("digest", "basic", "basic_preemtive"):
required_keys |= set(("user", "password"))
elif auth_mode == "https":
required_keys |= set(("client_cert", "client_key"))
if config.get("service_url") is not None and config.get("service_user") is not None:
required_keys.add("service_password")
missing_keys = required_keys - set(config.keys())
if missing_keys:
raise ValueError(err_msg % ("Missing key(s): %s" % ", ".join(sorted(missing_keys))))
if not instance:
instance = str(config["port"])
config["instance"] = instance.replace(" ", "_")
# port must be (or look like) an integer, timeout like float
for key, type_ in (("port", int), ("timeout", float)):
val = config[key]
try:
config[key] = type_(val)
except ValueError:
raise ValueError(err_msg % ("Invalid %s %r" % (key, val)))
if config.get("server") == "use fqdn":
config["server"] = socket.getfqdn()
# if "verify" was not set to bool/string
if config.get("verify") is None:
# handle legacy "cert_path"
cert_path = config.get("cert_path")
if cert_path not in ("_default", None):
# The '_default' was the default value
# up to cmk version 1.5.0p8. It broke things.
config["verify"] = cert_path
else:
# this is default, but be explicit
config["verify"] = True
return config
def __init__(self, config):
super(JolokiaInstance, self).__init__()
self._config = self._sanitize_config(config)
self.name = self._config["instance"]
self.product = self._config.get("product")
self.custom_vars = self._config.get("custom_vars", [])
self.base_url = self._get_base_url()
self.target = self._get_target()
self._session = self._initialize_http_session()
def _get_base_url(self):
return "%s://%s:%d/%s/" % (
self._config["protocol"].strip('/'),
self._config["server"].strip('/'),
self._config["port"],
self._config["suburi"],
)
def _get_target(self):
url = self._config.get("service_url")
if url is None:
return {}
user = self._config.get("service_user")
if user is None:
return {"url": url}
return {
"url": url,
"user": user,
"password": self._config["service_password"],
}
def _initialize_http_session(self):
session = requests.Session()
# Watch out: we must provide the verify keyword to every individual request call!
# Else it will be overwritten by the REQUESTS_CA_BUNDLE env variable
session.verify = self._config["verify"]
if session.verify is False:
urllib3.disable_warnings(category=urllib3.exceptions.InsecureRequestWarning)
session.timeout = self._config["timeout"]
auth_method = self._config.get("mode")
if auth_method is None:
return session
# initialize authentication
if auth_method == "https":
session.cert = (
self._config["client_cert"],
self._config["client_key"],
)
elif auth_method == 'digest':
session.auth = HTTPDigestAuth(
self._config["user"],
self._config["password"],
)
elif auth_method in ("basic", "basic_preemptive"):
session.auth = (
self._config["user"],
self._config["password"],
)
else:
raise NotImplementedError("Authentication method %r" % auth_method)
return session
def get_post_data(self, path, function, use_target):
segments = path.strip("/").split("/")
# we may have one to three segments:
data = dict(zip(("mbean", "attribute", "path"), segments))
data["type"] = function
if use_target and self.target:
data["target"] = self.target
return data
def post(self, data):
post_data = json.dumps(data)
if VERBOSE:
sys.stderr.write("\nDEBUG: POST data: %r\n" % post_data)
try:
# Watch out: we must provide the verify keyword to every individual request call!
# Else it will be overwritten by the REQUESTS_CA_BUNDLE env variable
raw_response = self._session.post(self.base_url,
data=post_data,
verify=self._session.verify)
except () if DEBUG else Exception, exc:
sys.stderr.write("ERROR: %s\n" % exc)
raise SkipMBean(exc)
return validate_response(raw_response)
def validate_response(raw):
'''return loaded response or raise exception'''
if VERBOSE > 1:
sys.stderr.write("DEBUG: %r:\n"
"DEBUG: headers: %r\n"
"DEBUG: content: %r\n\n" % (raw, raw.headers, raw.content))
# check the status of the http server
if not 200 <= raw.status_code < 300:
sys.stderr.write("ERROR: HTTP STATUS: %d\n" % raw.status_code)
# Unauthorized, Forbidden, Bad Gateway
if raw.status_code in (401, 403, 502):
raise SkipInstance("HTTP STATUS", raw.status_code)
raise SkipMBean("HTTP STATUS", raw.status_code)
response = raw.json()
# check the status of the jolokia response
if response.get("status") != 200:
errmsg = response.get("error", "unkown error")
sys.stderr.write("ERROR: JAVA: %s\n" % errmsg)
raise SkipMBean("JAVA", errmsg)
if "value" not in response:
sys.stderr.write("ERROR: missing 'value': %r\n" % response)
raise SkipMBean("ERROR", "missing 'value'")
if VERBOSE:
sys.stderr.write("\nDEBUG: RESPONSE: %r\n" % response)
return response
def fetch_var(inst, function, path, use_target=False):
data = inst.get_post_data(path, function, use_target=use_target)
obj = inst.post(data)
return obj['value']
# convert single values into lists of items in
# case value is a 1-levelled or 2-levelled dict
def make_item_list(path, value, itemspec):
if not isinstance(value, dict):
if isinstance(value, str):
value = value.replace(r'\/', '/')
return [(path, value)]
result = []
for key, subvalue in value.items():
# Handle filtering via itemspec
miss = False
while itemspec and '=' in itemspec[0]:
if itemspec[0] not in key:
miss = True
break
itemspec = itemspec[1:]
if miss:
continue
item = extract_item(key, itemspec)
if not item:
item = (key,)
result += make_item_list(path + item, subvalue, [])
return result
# Example:
# key = 'Catalina:host=localhost,path=\\/,type=Manager'
# itemsepc = [ "path" ]
# --> "/"
def extract_item(key, itemspec):
if not itemspec:
return ()
path = key.split(":", 1)[-1]
components = path.split(",")
comp_dict = dict(c.split('=') for c in components if c.count('=') == 1)
item = ()
for pathkey in itemspec:
if pathkey in comp_dict:
right = comp_dict[pathkey]
if '/' in right:
right = '/' + right.split('/')[-1]
item = item + (right,)
return item
def fetch_metric(inst, path, title, itemspec, inst_add=None):
values = fetch_var(inst, "read", path, use_target=True)
item_list = make_item_list((), values, itemspec)
for subinstance, value in item_list:
if not subinstance and not title:
sys.stderr.write("INTERNAL ERROR: %s\n" % value)
continue
if "threadStatus" in subinstance or "threadParam" in subinstance:
continue
if len(subinstance) > 1:
item = ",".join((inst.name,) + subinstance[:-1])
elif inst_add is not None:
item = ",".join((inst.name, inst_add))
else:
item = inst.name
if title:
if subinstance:
tit = title + "." + subinstance[-1]
else:
tit = title
else:
tit = subinstance[-1]
yield (item.replace(" ", "_"), tit, value)
@cached
def _get_queries(do_search, inst, itemspec, title, path, mbean):
if not do_search:
return [(mbean + "/" + path, title, itemspec)]
try:
value = fetch_var(inst, "search", mbean)
except () if DEBUG else SkipMBean:
return []
try:
paths = make_item_list((), value, "")[0][1]
except IndexError:
return []
return [("%s/%s" % (urllib2.quote(mbean_exp), path), path, itemspec) for mbean_exp in paths]
def _process_queries(inst, queries):
for mbean_path, title, itemspec in queries:
try:
for item, out_title, value in fetch_metric(inst, mbean_path, title, itemspec):
yield item, out_title, value
except (IOError, socket.timeout):
raise SkipInstance()
except SkipMBean:
continue
except () if DEBUG else Exception:
continue
def query_instance(inst):
write_section('jolokia_info', generate_jolokia_info(inst))
# now (after jolokia_info) we're sure about the product
specs_specific = QUERY_SPECS_SPECIFIC_LEGACY.get(inst.product, [])
write_section('jolokia_metrics', generate_values(inst, specs_specific))
write_section('jolokia_metrics', generate_values(inst, QUERY_SPECS_LEGACY))
sections_specific = MBEAN_SECTIONS_SPECIFIC.get(inst.product, {})
for section_name, mbeans in sections_specific.iteritems():
write_section('jolokia_%s' % section_name, generate_json(inst, mbeans))
for section_name, mbeans in MBEAN_SECTIONS.iteritems():
write_section('jolokia_%s' % section_name, generate_json(inst, mbeans))
write_section('jolokia_generic', generate_values(inst, inst.custom_vars))
def generate_jolokia_info(inst):
# Determine type of server
try:
data = fetch_var(inst, "version", "")
except (SkipInstance, SkipMBean), exc:
yield inst.name, "ERROR", str(exc)
raise SkipInstance(exc)
info = data.get('info', {})
version = info.get('version', "unknown")
product = info.get('product', "unknown")
if inst.product is not None:
product = inst.product
else:
inst.product = product
agentversion = data.get('agent', "unknown")
yield inst.name, product, version, agentversion
def generate_values(inst, var_list):
for var in var_list:
mbean, path, title, itemspec, do_search = var[:5]
value_type = var[5] if len(var) >= 6 else None
queries = _get_queries(do_search, inst, itemspec, title, path, mbean)
for item, title, value in _process_queries(inst, queries):
if value_type:
yield item, title, value, value_type
else:
yield item, title, value
def generate_json(inst, mbeans):
for mbean in mbeans:
try:
data = inst.get_post_data(mbean, "read", use_target=True)
obj = inst.post(data)
yield inst.name, mbean, json.dumps(obj['value'])
except (IOError, socket.timeout):
raise SkipInstance()
except SkipMBean if DEBUG else Exception:
pass
def yield_configured_instances(custom_config=None):
if custom_config is None:
custom_config = get_default_config_dict()
conffile = os.path.join(os.getenv("MK_CONFDIR", "/etc/check_mk"), "jolokia.cfg")
if os.path.exists(conffile):
execfile(conffile, {}, custom_config)
# Generate list of instances to monitor. If the user has defined
# instances in his configuration, we will use this (a list of dicts).
individual_configs = custom_config.pop("instances", [{}])
for cfg in individual_configs:
keys = set(cfg.keys() + custom_config.keys())
conf_dict = dict((k, cfg.get(k, custom_config.get(k))) for k in keys)
if VERBOSE:
sys.stderr.write("DEBUG: configuration: %r\n" % conf_dict)
yield conf_dict
def main(configs_iterable=None):
if configs_iterable is None:
configs_iterable = yield_configured_instances()
for config in configs_iterable:
instance = JolokiaInstance(config)
try:
query_instance(instance)
except SkipInstance:
pass
if __name__ == "__main__":
main()
hsh_checkmk_install_jolokia_plugin:
file.managed:
- name: /usr/lib/check_mk_agent/plugins/mk_jolokia
- source: salt://checkmk/checkmk-files/mk_jolokia.py
- user: root
- group: root
- mode: 755
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment