#!/usr/bin/python
#
#    Parse usage data from Round Robin Databases collected by SSK
#    Copyright (C) 2012  Anish Mangal <anish@activitycentral.com>
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#   --
#   =Changelog=
#   * 20120229: Add comments to the code
#
#   =TODO=
#   * Create a project/git repo on git.sl.o
#

import os
import re
from collections import defaultdict

import rrdtool

class StatsParser:
    def __init__(self, path):
        self._path = path

        # The dict which will store activity usage statistics
        # it is a dict of dict of dicts and the organization is as
        # follows:
        # [activity_db_name.rrd]>[db_path_without_filename]>[different_params]
        #   eg: activity_db_name.rrd = activity.org.laptop.Terminal.rrd
        #       db_path_without_filename = ./1f/1ff8e2509daff34490b47438db1d75cf0b88c20e/
        #       different_params = 'active', 'uptime' etc...
        #
        # Currently the activity 'active' stats are being stored. The
        # same method can be replicated to store different params.
        self._activity_stats = defaultdict(lambda: defaultdict(dict))

        # The dict which will store system.rrd stats. It is stored in
        # the exact same format as self._activity_stats
        self._system_stats = defaultdict(lambda: defaultdict(dict))

        # The dict which will store journal.rrd stats. It is stored in
        # the exact same format as self._activity_stats
        self._journal_stats = defaultdict(lambda: defaultdict(dict))

        # The pattern in which activity databases are stored are
        # activity.<activity_uid>.rrd
        self._activity_re = re.compile('^activity\..*\.rrd$')

        # System database
        self._system_re = re.compile('system.rrd')

        # Journal database
        self._journal_re = re.compile('journal.rrd')

        # Processed data will be output to this csv file
        self._output_csv = file('output.csv', 'w')

    def get_path(self):
        return self._path

    def process_stats(self):
        self._parse_stats()

        # Iterating over the activity stats dict on a
        # per-activity-basis
        for activity, activity_data in self._activity_stats.iteritems():
            # Will store the total uptime of each individual activity
            total_active = 0

            # Iterating over different user databases for the same
            # activity
            for user, data in activity_data.iteritems():
                # Compute the per-user activity-usage percentage:
                #
                #   'active' value for the activity / 'uptime' value for that user
                #             OR
                #   [activity/user/'active'] / [system/user/'uptime']
                #
                # Also check if the uptime value is '0', that would
                # make the percentage computation invalid
                if float(self._system_stats['system.rrd'][user]['uptime']) > 0:
                    percent =\
                            100.0 * float(data['active'])/ \
                            float(self._system_stats['system.rrd'][user]['uptime'])
                else:
                    pass

                # Increment the cumulative 'active' value for the
                # activity
                total_active += data['active']
            # Store the cumulative 'active' value of all users combined
            # in our activity stats dict
            self._activity_stats[activity]['total'] = total_active

            # Put the output in the csv file and print on terminal
            self._output_csv.write('\"%s\", \"%d\"\n' % \
                    (re.split('\.', activity)[-2], total_active))
            print '%s\t\t%d' % \
                    (re.split('\.', activity)[-2], total_active)
        self._output_csv.close()

        # The following piece of code is a stub. It can be expanded
        # following the way it is done for activities to get the
        # required data
        for journal, journal_data in self._journal_stats.iteritems():
            total_active = 0
            for user, data in journal_data.iteritems():
                total_active += data['active']

        # The following piece of code is a stub. It can be expanded
        # following the way it is done for activities to get the
        # required data
        for system, system_data in self._system_stats.iteritems():
            total_uptime = 0
            for user, data in system_data.iteritems():
                total_uptime += data['uptime']

    def _parse_stats(self):
        # We are just collecting per user stats here:
        # Activities: active
        # Journal: active
        # System: uptime
        for root, dirnames, filenames in os.walk(self._path):
            if len(filenames) > 0:
                for filename in filenames:
                    if self._activity_re.match(filename) is not None:
                        self._activity_stats[filename][root]['active'] =\
                            self._get_stats(os.path.join(root,
                                filename), 'active')
                    if self._system_re.match(filename) is not None:
                        self._system_stats[filename][root]['uptime'] =\
                            self._get_stats(os.path.join(root,
                                filename), 'uptime')
                    if self._journal_re.match(filename) is not None:
                        self._journal_stats[filename][root]['active'] =\
                            self._get_stats(os.path.join(root,
                                filename), 'active')
                    # More loops may be added here to get the required
                    # values in similar fashion.

        #DEBUG
        #print self._system_stats
        #print self._journal_stats
        #print self._activity_stats

    # This will return sum of the values in the list of values obtained
    # from the rrd
    def _find_cumulative_value(self, key_values_list):
        if sum(key_values_list) == 0:
            return 0
        else:
            # Since the list of values is like a saw-tooth wave with
            # only the highest value that is significant, we ignore the
            # rest of the values.
            # (In effect, we are finding local maximas here).
            #
            # NOTE: This logic might be needed to be improved to
            # account for noise/wrong values.
            for i in range(1, len(key_values_list)):
                if key_values_list[i] > key_values_list[i - 1]:
                    key_values_list[i - 1] = 0

            # Return the sum of the local maximas as obtained from
            # above
            return sum(key_values_list)

    # This function will get the fetch the stats for the selected path
    # and the parameter (key)
    def _get_stats(self, path, key):
        assert os.path.exists(path) == True

        # This will return a tuple of tuples and lists in the following
        # form:
        # ((start, stop, step),
        #  (key1, key2, ... keyN)
        #  [(key1_val1, key2_val1, ... keyN_val1),
        #   (key1_val2, key2_val2, ... keyN_val2)
        #    ...
        #   (key1_valM, key2_valM, ... keyN_valM)]
        # )
        raw_rrd_data = rrdtool.fetch(path, '--start', 'now-7d',\
                '--end', 'now', 'AVERAGE')
        key_index = None

        # Find out which keyid = key from our raw data above
        for i in range(len(raw_rrd_data[1])):
            if raw_rrd_data[1][i] == key:
                key_index = i

        if key_index == None:
            print 'ERROR, %s key not found, exiting' % key
            return -1

        # A simple list that contains all the values obtained from the
        # rrd database
        key_values_list = []
        i = 0
        for values_tuple in raw_rrd_data[2]:
            data = values_tuple[key_index]
            if data == None:
                key_values_list.append(0)
            else:
                key_values_list.append(int(data))

        return self._find_cumulative_value(key_values_list)

if __name__ == '__main__':
    stats_parser = StatsParser('/var/lib/sugar-stats/rrd/')
    print 'Parsing stats at %s' % stats_parser.get_path()
    stats_parser.process_stats()

