Add cgroupstats
This patch is inspired by the discussion at
http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics
as suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263. The
patch is on top of 2.6.21-mm1 with Paul's cgroups v9 patches (forward
ported)
This patch implements per cgroup statistics infrastructure and re-uses
code from the taskstats interface. A new set of cgroup operations are
registered with commands and attributes. It should be very easy to
*extend* per cgroup statistics, by adding members to the cgroupstats
structure.
The current model for cgroupstats is a pull, a push model (to post
statistics on interesting events), should be very easy to add. Currently
user space requests for statistics by passing the cgroup file
descriptor. Statistics about the state of all the tasks in the cgroup
is returned to user space.
TODO's/NOTE:
This patch provides an infrastructure for implementing cgroup statistics.
Based on the needs of each controller, we can incrementally add more statistics,
event based support for notification of statistics, accumulation of taskstats
into cgroup statistics in the future.
Sample output
# ./cgroupstats -C /cgroup/a
sleeping 2, blocked 0, running 1, stopped 0, uninterruptible 0
# ./cgroupstats -C /cgroup/
sleeping 154, blocked 0, running 0, stopped 0, uninterruptible 0
If the approach looks good, I'll enhance and post the user space utility for
the same
Feedback, comments, test results are always welcome!
[akpm@linux-foundation.org: build fix]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 7d4d7f9..9f360f6 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -22,6 +22,10 @@
#include <linux/delayacct.h>
#include <linux/cpumask.h>
#include <linux/percpu.h>
+#include <linux/cgroupstats.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/file.h>
#include <net/genetlink.h>
#include <asm/atomic.h>
@@ -49,6 +53,11 @@
[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
+static struct nla_policy
+cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] __read_mostly = {
+ [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 },
+};
+
struct listener {
struct list_head list;
pid_t pid;
@@ -372,6 +381,51 @@
return NULL;
}
+static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+ int rc = 0;
+ struct sk_buff *rep_skb;
+ struct cgroupstats *stats;
+ struct nlattr *na;
+ size_t size;
+ u32 fd;
+ struct file *file;
+ int fput_needed;
+
+ na = info->attrs[CGROUPSTATS_CMD_ATTR_FD];
+ if (!na)
+ return -EINVAL;
+
+ fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]);
+ file = fget_light(fd, &fput_needed);
+ if (file) {
+ size = nla_total_size(sizeof(struct cgroupstats));
+
+ rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb,
+ size);
+ if (rc < 0)
+ goto err;
+
+ na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS,
+ sizeof(struct cgroupstats));
+ stats = nla_data(na);
+ memset(stats, 0, sizeof(*stats));
+
+ rc = cgroupstats_build(stats, file->f_dentry);
+ if (rc < 0)
+ goto err;
+
+ fput_light(file, fput_needed);
+ return send_reply(rep_skb, info->snd_pid);
+ }
+
+err:
+ if (file)
+ fput_light(file, fput_needed);
+ nlmsg_free(rep_skb);
+ return rc;
+}
+
static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
{
int rc = 0;
@@ -522,6 +576,12 @@
.policy = taskstats_cmd_get_policy,
};
+static struct genl_ops cgroupstats_ops = {
+ .cmd = CGROUPSTATS_CMD_GET,
+ .doit = cgroupstats_user_cmd,
+ .policy = cgroupstats_cmd_get_policy,
+};
+
/* Needed early in initialization */
void __init taskstats_init_early(void)
{
@@ -546,8 +606,15 @@
if (rc < 0)
goto err;
+ rc = genl_register_ops(&family, &cgroupstats_ops);
+ if (rc < 0)
+ goto err_cgroup_ops;
+
family_registered = 1;
+ printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
return 0;
+err_cgroup_ops:
+ genl_unregister_ops(&family, &taskstats_ops);
err:
genl_unregister_family(&family);
return rc;