#! /usr/local/bin/python # Overview # -------- # # this is a munin plugin that lists the number of pages (including ACL-protected pages) in all wikis of a MoinMoin wikifarm # # Installation # ------------ # # Put this in your munin plugins directory. You probably want to set the following block in plugin-conf.d/munin-node so that it runs under the riht user for your wiki: # # [moinmoin_*] # user www # # Implementation notes # -------------------- # # it is quite koumbit-specific: # 1. the wikifarm config is hardcoded # 2. it relies on the "wikilist.py" file to contain the list of wiki -> url patterns # 3. it assumes the url patterns are simple enough that they are decodable into an url # # also note that it reuses code from MoinMoin/wikimacro.py's SystemInfo macro # # finally, i tried using XMLRPC instead of native functions to fetch the data, but it ended up being slower. For the record, here is what the getPageList() call would have looked like: # xmlrpclib.ServerProxy("http://wiki.koumbit.net/?action=xmlrpc2").getAllPages() # # the quick benchmark i did yieled those results for the getAllPages() vs getPageList() calls: # xmlrpc: 2.35 real 0.12 user 0.04 sys # native: 1.44 real 1.07 user 0.35 sys # # so the plugin is spending more time in the CPU (all time, actually), but it's doing in faster. it is highly possible that the CPU time spared in XMLRPC is in fact used by the server # # However, running tests showed that the moinmoin pagelist cache helps a lot long-term performance. The "manual" process didn't use the cache and took always a constant, long time to render the results whereas the XMLRPC method improves over time. So we switched to that. # # Note that the XMLRPC method doesn't find ACL-protected pages *but* finds system pages, and will therefore lead to a different page count # # (C) Copyleft 2007, The Anarcat # Licensed under the GPLv2 or any later version import sys, operator, os os.chdir('/export/wiki/config') sys.path.insert(0, '/export/wiki/config') from MoinMoin import wikiutil from MoinMoin.Page import Page from farmconfig import wikis from re import sub import farmconfig from MoinMoin.request import RequestCLI def _formatInReadableUnits(size): size = float(size) unit = u' Byte' if size > 9999: unit = u' KiB' size /= 1024 if size > 9999: unit = u' MiB' size /= 1024 if size > 9999: unit = u' GiB' size /= 1024 return u"%.1f %s" % (size, unit) def _getDirectorySize(path): try: dirsize = 0 for root, dirs, files in os.walk(path): dirsize += sum([os.path.getsize(os.path.join(root, name)) for name in files]) except EnvironmentError, e: dirsize = -1 return dirsize def main(): import xmlrpclib for wiki in wikis: name = wiki[0] url = wiki[1] # XXX, hack: transform the regexp into a canonical url # we need canonical urls in the config for this to be clean # look for (foo|bar) and replace with foo # (wikifarm.koumbit.net/rococo|(www.)?rocococamp.info) url = sub('\(([^\|\?]*)(\|.*\))+', '\\1', url) # remove common regexp patterns and slap a protocol to make this a real url url = sub('[\^\$]|(\.\*)', '', url) url = 'http://' + url if not url.endswith('/'): url += '/' url += "?action=xmlrpc2" mod = getattr(__import__(name), 'Config') #print "getting pagelist from %s" % url pagelist = xmlrpclib.ServerProxy(url).getAllPages() #request = RequestCLI(url) #pagelist = request.rootpage.getPageList(user='') #systemPages = [page for page in pagelist # if wikiutil.isSystemPage(request, page)] #print(name + '.value ' + str(len(pagelist)-len(systemPages))) print (name + '.value ' + str(len(pagelist))) #totalsize = reduce(operator.add, [Page(request, name).size() for name in pagelist]) #print('Accumulated page sizes' + _formatInReadableUnits(totalsize)) def config(): print("""graph_title Wiki size graph_vlabel Number of pages graph_args --base 1000 -l 0 graph_scale no graph_category Wiki graph_info The number of pages excludes system pages but includes ACL-protected pages.""") for wiki in wikis: name = wiki[0] mod = getattr(__import__(name), 'Config') print(name + '.label ' + getattr(mod, 'sitename')) if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == 'config': config() else: main()