Mathieu Desnoyers | 97e1c18 | 2008-07-18 12:16:16 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2008 Mathieu Desnoyers |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or modify |
| 5 | * it under the terms of the GNU General Public License as published by |
| 6 | * the Free Software Foundation; either version 2 of the License, or |
| 7 | * (at your option) any later version. |
| 8 | * |
| 9 | * This program is distributed in the hope that it will be useful, |
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | * GNU General Public License for more details. |
| 13 | * |
| 14 | * You should have received a copy of the GNU General Public License |
| 15 | * along with this program; if not, write to the Free Software |
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 17 | */ |
| 18 | #include <linux/module.h> |
| 19 | #include <linux/mutex.h> |
| 20 | #include <linux/types.h> |
| 21 | #include <linux/jhash.h> |
| 22 | #include <linux/list.h> |
| 23 | #include <linux/rcupdate.h> |
| 24 | #include <linux/tracepoint.h> |
| 25 | #include <linux/err.h> |
| 26 | #include <linux/slab.h> |
| 27 | |
| 28 | extern struct tracepoint __start___tracepoints[]; |
| 29 | extern struct tracepoint __stop___tracepoints[]; |
| 30 | |
| 31 | /* Set to 1 to enable tracepoint debug output */ |
| 32 | static const int tracepoint_debug; |
| 33 | |
| 34 | /* |
| 35 | * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the |
| 36 | * builtin and module tracepoints and the hash table. |
| 37 | */ |
| 38 | static DEFINE_MUTEX(tracepoints_mutex); |
| 39 | |
| 40 | /* |
| 41 | * Tracepoint hash table, containing the active tracepoints. |
| 42 | * Protected by tracepoints_mutex. |
| 43 | */ |
| 44 | #define TRACEPOINT_HASH_BITS 6 |
| 45 | #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) |
| 46 | |
| 47 | /* |
| 48 | * Note about RCU : |
| 49 | * It is used to to delay the free of multiple probes array until a quiescent |
| 50 | * state is reached. |
| 51 | * Tracepoint entries modifications are protected by the tracepoints_mutex. |
| 52 | */ |
| 53 | struct tracepoint_entry { |
| 54 | struct hlist_node hlist; |
| 55 | void **funcs; |
| 56 | int refcount; /* Number of times armed. 0 if disarmed. */ |
| 57 | struct rcu_head rcu; |
| 58 | void *oldptr; |
| 59 | unsigned char rcu_pending:1; |
| 60 | char name[0]; |
| 61 | }; |
| 62 | |
| 63 | static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; |
| 64 | |
| 65 | static void free_old_closure(struct rcu_head *head) |
| 66 | { |
| 67 | struct tracepoint_entry *entry = container_of(head, |
| 68 | struct tracepoint_entry, rcu); |
| 69 | kfree(entry->oldptr); |
| 70 | /* Make sure we free the data before setting the pending flag to 0 */ |
| 71 | smp_wmb(); |
| 72 | entry->rcu_pending = 0; |
| 73 | } |
| 74 | |
| 75 | static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) |
| 76 | { |
| 77 | if (!old) |
| 78 | return; |
| 79 | entry->oldptr = old; |
| 80 | entry->rcu_pending = 1; |
| 81 | /* write rcu_pending before calling the RCU callback */ |
| 82 | smp_wmb(); |
Mathieu Desnoyers | ca2db6c | 2008-09-30 01:49:39 -0400 | [diff] [blame] | 83 | call_rcu_sched(&entry->rcu, free_old_closure); |
Mathieu Desnoyers | 97e1c18 | 2008-07-18 12:16:16 -0400 | [diff] [blame] | 84 | } |
| 85 | |
| 86 | static void debug_print_probes(struct tracepoint_entry *entry) |
| 87 | { |
| 88 | int i; |
| 89 | |
| 90 | if (!tracepoint_debug) |
| 91 | return; |
| 92 | |
| 93 | for (i = 0; entry->funcs[i]; i++) |
| 94 | printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]); |
| 95 | } |
| 96 | |
| 97 | static void * |
| 98 | tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe) |
| 99 | { |
| 100 | int nr_probes = 0; |
| 101 | void **old, **new; |
| 102 | |
| 103 | WARN_ON(!probe); |
| 104 | |
| 105 | debug_print_probes(entry); |
| 106 | old = entry->funcs; |
| 107 | if (old) { |
| 108 | /* (N -> N+1), (N != 0, 1) probes */ |
| 109 | for (nr_probes = 0; old[nr_probes]; nr_probes++) |
| 110 | if (old[nr_probes] == probe) |
| 111 | return ERR_PTR(-EEXIST); |
| 112 | } |
| 113 | /* + 2 : one for new probe, one for NULL func */ |
| 114 | new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); |
| 115 | if (new == NULL) |
| 116 | return ERR_PTR(-ENOMEM); |
| 117 | if (old) |
| 118 | memcpy(new, old, nr_probes * sizeof(void *)); |
| 119 | new[nr_probes] = probe; |
| 120 | entry->refcount = nr_probes + 1; |
| 121 | entry->funcs = new; |
| 122 | debug_print_probes(entry); |
| 123 | return old; |
| 124 | } |
| 125 | |
| 126 | static void * |
| 127 | tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) |
| 128 | { |
| 129 | int nr_probes = 0, nr_del = 0, i; |
| 130 | void **old, **new; |
| 131 | |
| 132 | old = entry->funcs; |
| 133 | |
| 134 | debug_print_probes(entry); |
| 135 | /* (N -> M), (N > 1, M >= 0) probes */ |
| 136 | for (nr_probes = 0; old[nr_probes]; nr_probes++) { |
| 137 | if ((!probe || old[nr_probes] == probe)) |
| 138 | nr_del++; |
| 139 | } |
| 140 | |
| 141 | if (nr_probes - nr_del == 0) { |
| 142 | /* N -> 0, (N > 1) */ |
| 143 | entry->funcs = NULL; |
| 144 | entry->refcount = 0; |
| 145 | debug_print_probes(entry); |
| 146 | return old; |
| 147 | } else { |
| 148 | int j = 0; |
| 149 | /* N -> M, (N > 1, M > 0) */ |
| 150 | /* + 1 for NULL */ |
| 151 | new = kzalloc((nr_probes - nr_del + 1) |
| 152 | * sizeof(void *), GFP_KERNEL); |
| 153 | if (new == NULL) |
| 154 | return ERR_PTR(-ENOMEM); |
| 155 | for (i = 0; old[i]; i++) |
| 156 | if ((probe && old[i] != probe)) |
| 157 | new[j++] = old[i]; |
| 158 | entry->refcount = nr_probes - nr_del; |
| 159 | entry->funcs = new; |
| 160 | } |
| 161 | debug_print_probes(entry); |
| 162 | return old; |
| 163 | } |
| 164 | |
| 165 | /* |
| 166 | * Get tracepoint if the tracepoint is present in the tracepoint hash table. |
| 167 | * Must be called with tracepoints_mutex held. |
| 168 | * Returns NULL if not present. |
| 169 | */ |
| 170 | static struct tracepoint_entry *get_tracepoint(const char *name) |
| 171 | { |
| 172 | struct hlist_head *head; |
| 173 | struct hlist_node *node; |
| 174 | struct tracepoint_entry *e; |
| 175 | u32 hash = jhash(name, strlen(name), 0); |
| 176 | |
Mathieu Desnoyers | 9795302 | 2008-07-24 16:37:23 -0400 | [diff] [blame] | 177 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; |
Mathieu Desnoyers | 97e1c18 | 2008-07-18 12:16:16 -0400 | [diff] [blame] | 178 | hlist_for_each_entry(e, node, head, hlist) { |
| 179 | if (!strcmp(name, e->name)) |
| 180 | return e; |
| 181 | } |
| 182 | return NULL; |
| 183 | } |
| 184 | |
| 185 | /* |
| 186 | * Add the tracepoint to the tracepoint hash table. Must be called with |
| 187 | * tracepoints_mutex held. |
| 188 | */ |
| 189 | static struct tracepoint_entry *add_tracepoint(const char *name) |
| 190 | { |
| 191 | struct hlist_head *head; |
| 192 | struct hlist_node *node; |
| 193 | struct tracepoint_entry *e; |
| 194 | size_t name_len = strlen(name) + 1; |
| 195 | u32 hash = jhash(name, name_len-1, 0); |
| 196 | |
Mathieu Desnoyers | 9795302 | 2008-07-24 16:37:23 -0400 | [diff] [blame] | 197 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; |
Mathieu Desnoyers | 97e1c18 | 2008-07-18 12:16:16 -0400 | [diff] [blame] | 198 | hlist_for_each_entry(e, node, head, hlist) { |
| 199 | if (!strcmp(name, e->name)) { |
| 200 | printk(KERN_NOTICE |
| 201 | "tracepoint %s busy\n", name); |
| 202 | return ERR_PTR(-EEXIST); /* Already there */ |
| 203 | } |
| 204 | } |
| 205 | /* |
| 206 | * Using kmalloc here to allocate a variable length element. Could |
| 207 | * cause some memory fragmentation if overused. |
| 208 | */ |
| 209 | e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL); |
| 210 | if (!e) |
| 211 | return ERR_PTR(-ENOMEM); |
| 212 | memcpy(&e->name[0], name, name_len); |
| 213 | e->funcs = NULL; |
| 214 | e->refcount = 0; |
| 215 | e->rcu_pending = 0; |
| 216 | hlist_add_head(&e->hlist, head); |
| 217 | return e; |
| 218 | } |
| 219 | |
| 220 | /* |
| 221 | * Remove the tracepoint from the tracepoint hash table. Must be called with |
| 222 | * mutex_lock held. |
| 223 | */ |
| 224 | static int remove_tracepoint(const char *name) |
| 225 | { |
| 226 | struct hlist_head *head; |
| 227 | struct hlist_node *node; |
| 228 | struct tracepoint_entry *e; |
| 229 | int found = 0; |
| 230 | size_t len = strlen(name) + 1; |
| 231 | u32 hash = jhash(name, len-1, 0); |
| 232 | |
Mathieu Desnoyers | 9795302 | 2008-07-24 16:37:23 -0400 | [diff] [blame] | 233 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; |
Mathieu Desnoyers | 97e1c18 | 2008-07-18 12:16:16 -0400 | [diff] [blame] | 234 | hlist_for_each_entry(e, node, head, hlist) { |
| 235 | if (!strcmp(name, e->name)) { |
| 236 | found = 1; |
| 237 | break; |
| 238 | } |
| 239 | } |
| 240 | if (!found) |
| 241 | return -ENOENT; |
| 242 | if (e->refcount) |
| 243 | return -EBUSY; |
| 244 | hlist_del(&e->hlist); |
Mathieu Desnoyers | ca2db6c | 2008-09-30 01:49:39 -0400 | [diff] [blame] | 245 | /* Make sure the call_rcu_sched has been executed */ |
Mathieu Desnoyers | 97e1c18 | 2008-07-18 12:16:16 -0400 | [diff] [blame] | 246 | if (e->rcu_pending) |
Mathieu Desnoyers | ca2db6c | 2008-09-30 01:49:39 -0400 | [diff] [blame] | 247 | rcu_barrier_sched(); |
Mathieu Desnoyers | 97e1c18 | 2008-07-18 12:16:16 -0400 | [diff] [blame] | 248 | kfree(e); |
| 249 | return 0; |
| 250 | } |
| 251 | |
| 252 | /* |
| 253 | * Sets the probe callback corresponding to one tracepoint. |
| 254 | */ |
| 255 | static void set_tracepoint(struct tracepoint_entry **entry, |
| 256 | struct tracepoint *elem, int active) |
| 257 | { |
| 258 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); |
| 259 | |
| 260 | /* |
| 261 | * rcu_assign_pointer has a smp_wmb() which makes sure that the new |
| 262 | * probe callbacks array is consistent before setting a pointer to it. |
| 263 | * This array is referenced by __DO_TRACE from |
| 264 | * include/linux/tracepoints.h. A matching smp_read_barrier_depends() |
| 265 | * is used. |
| 266 | */ |
| 267 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); |
| 268 | elem->state = active; |
| 269 | } |
| 270 | |
| 271 | /* |
| 272 | * Disable a tracepoint and its probe callback. |
| 273 | * Note: only waiting an RCU period after setting elem->call to the empty |
| 274 | * function insures that the original callback is not used anymore. This insured |
| 275 | * by preempt_disable around the call site. |
| 276 | */ |
| 277 | static void disable_tracepoint(struct tracepoint *elem) |
| 278 | { |
| 279 | elem->state = 0; |
| 280 | } |
| 281 | |
| 282 | /** |
| 283 | * tracepoint_update_probe_range - Update a probe range |
| 284 | * @begin: beginning of the range |
| 285 | * @end: end of the range |
| 286 | * |
| 287 | * Updates the probe callback corresponding to a range of tracepoints. |
| 288 | */ |
| 289 | void tracepoint_update_probe_range(struct tracepoint *begin, |
| 290 | struct tracepoint *end) |
| 291 | { |
| 292 | struct tracepoint *iter; |
| 293 | struct tracepoint_entry *mark_entry; |
| 294 | |
| 295 | mutex_lock(&tracepoints_mutex); |
| 296 | for (iter = begin; iter < end; iter++) { |
| 297 | mark_entry = get_tracepoint(iter->name); |
| 298 | if (mark_entry) { |
| 299 | set_tracepoint(&mark_entry, iter, |
| 300 | !!mark_entry->refcount); |
| 301 | } else { |
| 302 | disable_tracepoint(iter); |
| 303 | } |
| 304 | } |
| 305 | mutex_unlock(&tracepoints_mutex); |
| 306 | } |
| 307 | |
| 308 | /* |
| 309 | * Update probes, removing the faulty probes. |
| 310 | */ |
| 311 | static void tracepoint_update_probes(void) |
| 312 | { |
| 313 | /* Core kernel tracepoints */ |
| 314 | tracepoint_update_probe_range(__start___tracepoints, |
| 315 | __stop___tracepoints); |
| 316 | /* tracepoints in modules. */ |
| 317 | module_update_tracepoints(); |
| 318 | } |
| 319 | |
| 320 | /** |
| 321 | * tracepoint_probe_register - Connect a probe to a tracepoint |
| 322 | * @name: tracepoint name |
| 323 | * @probe: probe handler |
| 324 | * |
| 325 | * Returns 0 if ok, error value on error. |
| 326 | * The probe address must at least be aligned on the architecture pointer size. |
| 327 | */ |
| 328 | int tracepoint_probe_register(const char *name, void *probe) |
| 329 | { |
| 330 | struct tracepoint_entry *entry; |
| 331 | int ret = 0; |
| 332 | void *old; |
| 333 | |
| 334 | mutex_lock(&tracepoints_mutex); |
| 335 | entry = get_tracepoint(name); |
| 336 | if (!entry) { |
| 337 | entry = add_tracepoint(name); |
| 338 | if (IS_ERR(entry)) { |
| 339 | ret = PTR_ERR(entry); |
| 340 | goto end; |
| 341 | } |
| 342 | } |
| 343 | /* |
Mathieu Desnoyers | ca2db6c | 2008-09-30 01:49:39 -0400 | [diff] [blame] | 344 | * If we detect that a call_rcu_sched is pending for this tracepoint, |
Mathieu Desnoyers | 97e1c18 | 2008-07-18 12:16:16 -0400 | [diff] [blame] | 345 | * make sure it's executed now. |
| 346 | */ |
| 347 | if (entry->rcu_pending) |
Mathieu Desnoyers | ca2db6c | 2008-09-30 01:49:39 -0400 | [diff] [blame] | 348 | rcu_barrier_sched(); |
Mathieu Desnoyers | 97e1c18 | 2008-07-18 12:16:16 -0400 | [diff] [blame] | 349 | old = tracepoint_entry_add_probe(entry, probe); |
| 350 | if (IS_ERR(old)) { |
| 351 | ret = PTR_ERR(old); |
| 352 | goto end; |
| 353 | } |
| 354 | mutex_unlock(&tracepoints_mutex); |
| 355 | tracepoint_update_probes(); /* may update entry */ |
| 356 | mutex_lock(&tracepoints_mutex); |
| 357 | entry = get_tracepoint(name); |
| 358 | WARN_ON(!entry); |
Mathieu Desnoyers | 9a1e969 | 2008-09-30 01:51:12 -0400 | [diff] [blame] | 359 | if (entry->rcu_pending) |
| 360 | rcu_barrier_sched(); |
Mathieu Desnoyers | 97e1c18 | 2008-07-18 12:16:16 -0400 | [diff] [blame] | 361 | tracepoint_entry_free_old(entry, old); |
| 362 | end: |
| 363 | mutex_unlock(&tracepoints_mutex); |
| 364 | return ret; |
| 365 | } |
| 366 | EXPORT_SYMBOL_GPL(tracepoint_probe_register); |
| 367 | |
| 368 | /** |
| 369 | * tracepoint_probe_unregister - Disconnect a probe from a tracepoint |
| 370 | * @name: tracepoint name |
| 371 | * @probe: probe function pointer |
| 372 | * |
| 373 | * We do not need to call a synchronize_sched to make sure the probes have |
| 374 | * finished running before doing a module unload, because the module unload |
| 375 | * itself uses stop_machine(), which insures that every preempt disabled section |
| 376 | * have finished. |
| 377 | */ |
| 378 | int tracepoint_probe_unregister(const char *name, void *probe) |
| 379 | { |
| 380 | struct tracepoint_entry *entry; |
| 381 | void *old; |
| 382 | int ret = -ENOENT; |
| 383 | |
| 384 | mutex_lock(&tracepoints_mutex); |
| 385 | entry = get_tracepoint(name); |
| 386 | if (!entry) |
| 387 | goto end; |
| 388 | if (entry->rcu_pending) |
Mathieu Desnoyers | ca2db6c | 2008-09-30 01:49:39 -0400 | [diff] [blame] | 389 | rcu_barrier_sched(); |
Mathieu Desnoyers | 97e1c18 | 2008-07-18 12:16:16 -0400 | [diff] [blame] | 390 | old = tracepoint_entry_remove_probe(entry, probe); |
| 391 | mutex_unlock(&tracepoints_mutex); |
| 392 | tracepoint_update_probes(); /* may update entry */ |
| 393 | mutex_lock(&tracepoints_mutex); |
| 394 | entry = get_tracepoint(name); |
| 395 | if (!entry) |
| 396 | goto end; |
Mathieu Desnoyers | 9a1e969 | 2008-09-30 01:51:12 -0400 | [diff] [blame] | 397 | if (entry->rcu_pending) |
| 398 | rcu_barrier_sched(); |
Mathieu Desnoyers | 97e1c18 | 2008-07-18 12:16:16 -0400 | [diff] [blame] | 399 | tracepoint_entry_free_old(entry, old); |
| 400 | remove_tracepoint(name); /* Ignore busy error message */ |
| 401 | ret = 0; |
| 402 | end: |
| 403 | mutex_unlock(&tracepoints_mutex); |
| 404 | return ret; |
| 405 | } |
| 406 | EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); |
| 407 | |
| 408 | /** |
| 409 | * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. |
| 410 | * @tracepoint: current tracepoints (in), next tracepoint (out) |
| 411 | * @begin: beginning of the range |
| 412 | * @end: end of the range |
| 413 | * |
| 414 | * Returns whether a next tracepoint has been found (1) or not (0). |
| 415 | * Will return the first tracepoint in the range if the input tracepoint is |
| 416 | * NULL. |
| 417 | */ |
| 418 | int tracepoint_get_iter_range(struct tracepoint **tracepoint, |
| 419 | struct tracepoint *begin, struct tracepoint *end) |
| 420 | { |
| 421 | if (!*tracepoint && begin != end) { |
| 422 | *tracepoint = begin; |
| 423 | return 1; |
| 424 | } |
| 425 | if (*tracepoint >= begin && *tracepoint < end) |
| 426 | return 1; |
| 427 | return 0; |
| 428 | } |
| 429 | EXPORT_SYMBOL_GPL(tracepoint_get_iter_range); |
| 430 | |
| 431 | static void tracepoint_get_iter(struct tracepoint_iter *iter) |
| 432 | { |
| 433 | int found = 0; |
| 434 | |
| 435 | /* Core kernel tracepoints */ |
| 436 | if (!iter->module) { |
| 437 | found = tracepoint_get_iter_range(&iter->tracepoint, |
| 438 | __start___tracepoints, __stop___tracepoints); |
| 439 | if (found) |
| 440 | goto end; |
| 441 | } |
| 442 | /* tracepoints in modules. */ |
| 443 | found = module_get_iter_tracepoints(iter); |
| 444 | end: |
| 445 | if (!found) |
| 446 | tracepoint_iter_reset(iter); |
| 447 | } |
| 448 | |
| 449 | void tracepoint_iter_start(struct tracepoint_iter *iter) |
| 450 | { |
| 451 | tracepoint_get_iter(iter); |
| 452 | } |
| 453 | EXPORT_SYMBOL_GPL(tracepoint_iter_start); |
| 454 | |
| 455 | void tracepoint_iter_next(struct tracepoint_iter *iter) |
| 456 | { |
| 457 | iter->tracepoint++; |
| 458 | /* |
| 459 | * iter->tracepoint may be invalid because we blindly incremented it. |
| 460 | * Make sure it is valid by marshalling on the tracepoints, getting the |
| 461 | * tracepoints from following modules if necessary. |
| 462 | */ |
| 463 | tracepoint_get_iter(iter); |
| 464 | } |
| 465 | EXPORT_SYMBOL_GPL(tracepoint_iter_next); |
| 466 | |
| 467 | void tracepoint_iter_stop(struct tracepoint_iter *iter) |
| 468 | { |
| 469 | } |
| 470 | EXPORT_SYMBOL_GPL(tracepoint_iter_stop); |
| 471 | |
| 472 | void tracepoint_iter_reset(struct tracepoint_iter *iter) |
| 473 | { |
| 474 | iter->module = NULL; |
| 475 | iter->tracepoint = NULL; |
| 476 | } |
| 477 | EXPORT_SYMBOL_GPL(tracepoint_iter_reset); |