Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
new file mode 100644
index 0000000..8082807
--- /dev/null
+++ b/net/xfrm/xfrm_policy.c
@@ -0,0 +1,1367 @@
+/*
+ * xfrm_policy.c
+ *
+ * Changes:
+ * Mitsuru KANDA @USAGI
+ * Kazunori MIYAZAWA @USAGI
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * IPv6 support
+ * Kazunori MIYAZAWA @USAGI
+ * YOSHIFUJI Hideaki
+ * Split up af-specific portion
+ * Derek Atkins <derek@ihtfp.com> Add the post_input processor
+ *
+ */
+
+#include <asm/bug.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
+
+DECLARE_MUTEX(xfrm_cfg_sem);
+EXPORT_SYMBOL(xfrm_cfg_sem);
+
+static DEFINE_RWLOCK(xfrm_policy_lock);
+
+struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
+EXPORT_SYMBOL(xfrm_policy_list);
+
+static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
+static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
+
+static kmem_cache_t *xfrm_dst_cache;
+
+static struct work_struct xfrm_policy_gc_work;
+static struct list_head xfrm_policy_gc_list =
+ LIST_HEAD_INIT(xfrm_policy_gc_list);
+static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
+
+static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
+static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
+
+int xfrm_register_type(struct xfrm_type *type, unsigned short family)
+{
+ struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+ struct xfrm_type_map *typemap;
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EAFNOSUPPORT;
+ typemap = afinfo->type_map;
+
+ write_lock(&typemap->lock);
+ if (likely(typemap->map[type->proto] == NULL))
+ typemap->map[type->proto] = type;
+ else
+ err = -EEXIST;
+ write_unlock(&typemap->lock);
+ xfrm_policy_put_afinfo(afinfo);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_register_type);
+
+int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
+{
+ struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+ struct xfrm_type_map *typemap;
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EAFNOSUPPORT;
+ typemap = afinfo->type_map;
+
+ write_lock(&typemap->lock);
+ if (unlikely(typemap->map[type->proto] != type))
+ err = -ENOENT;
+ else
+ typemap->map[type->proto] = NULL;
+ write_unlock(&typemap->lock);
+ xfrm_policy_put_afinfo(afinfo);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_unregister_type);
+
+struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
+{
+ struct xfrm_policy_afinfo *afinfo;
+ struct xfrm_type_map *typemap;
+ struct xfrm_type *type;
+ int modload_attempted = 0;
+
+retry:
+ afinfo = xfrm_policy_get_afinfo(family);
+ if (unlikely(afinfo == NULL))
+ return NULL;
+ typemap = afinfo->type_map;
+
+ read_lock(&typemap->lock);
+ type = typemap->map[proto];
+ if (unlikely(type && !try_module_get(type->owner)))
+ type = NULL;
+ read_unlock(&typemap->lock);
+ if (!type && !modload_attempted) {
+ xfrm_policy_put_afinfo(afinfo);
+ request_module("xfrm-type-%d-%d",
+ (int) family, (int) proto);
+ modload_attempted = 1;
+ goto retry;
+ }
+
+ xfrm_policy_put_afinfo(afinfo);
+ return type;
+}
+EXPORT_SYMBOL(xfrm_get_type);
+
+int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl,
+ unsigned short family)
+{
+ struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EAFNOSUPPORT;
+
+ if (likely(afinfo->dst_lookup != NULL))
+ err = afinfo->dst_lookup(dst, fl);
+ else
+ err = -EINVAL;
+ xfrm_policy_put_afinfo(afinfo);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_dst_lookup);
+
+void xfrm_put_type(struct xfrm_type *type)
+{
+ module_put(type->owner);
+}
+
+static inline unsigned long make_jiffies(long secs)
+{
+ if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
+ return MAX_SCHEDULE_TIMEOUT-1;
+ else
+ return secs*HZ;
+}
+
+static void xfrm_policy_timer(unsigned long data)
+{
+ struct xfrm_policy *xp = (struct xfrm_policy*)data;
+ unsigned long now = (unsigned long)xtime.tv_sec;
+ long next = LONG_MAX;
+ int warn = 0;
+ int dir;
+
+ read_lock(&xp->lock);
+
+ if (xp->dead)
+ goto out;
+
+ dir = xp->index & 7;
+
+ if (xp->lft.hard_add_expires_seconds) {
+ long tmo = xp->lft.hard_add_expires_seconds +
+ xp->curlft.add_time - now;
+ if (tmo <= 0)
+ goto expired;
+ if (tmo < next)
+ next = tmo;
+ }
+ if (xp->lft.hard_use_expires_seconds) {
+ long tmo = xp->lft.hard_use_expires_seconds +
+ (xp->curlft.use_time ? : xp->curlft.add_time) - now;
+ if (tmo <= 0)
+ goto expired;
+ if (tmo < next)
+ next = tmo;
+ }
+ if (xp->lft.soft_add_expires_seconds) {
+ long tmo = xp->lft.soft_add_expires_seconds +
+ xp->curlft.add_time - now;
+ if (tmo <= 0) {
+ warn = 1;
+ tmo = XFRM_KM_TIMEOUT;
+ }
+ if (tmo < next)
+ next = tmo;
+ }
+ if (xp->lft.soft_use_expires_seconds) {
+ long tmo = xp->lft.soft_use_expires_seconds +
+ (xp->curlft.use_time ? : xp->curlft.add_time) - now;
+ if (tmo <= 0) {
+ warn = 1;
+ tmo = XFRM_KM_TIMEOUT;
+ }
+ if (tmo < next)
+ next = tmo;
+ }
+
+ if (warn)
+ km_policy_expired(xp, dir, 0);
+ if (next != LONG_MAX &&
+ !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
+ xfrm_pol_hold(xp);
+
+out:
+ read_unlock(&xp->lock);
+ xfrm_pol_put(xp);
+ return;
+
+expired:
+ read_unlock(&xp->lock);
+ km_policy_expired(xp, dir, 1);
+ xfrm_policy_delete(xp, dir);
+ xfrm_pol_put(xp);
+}
+
+
+/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
+ * SPD calls.
+ */
+
+struct xfrm_policy *xfrm_policy_alloc(int gfp)
+{
+ struct xfrm_policy *policy;
+
+ policy = kmalloc(sizeof(struct xfrm_policy), gfp);
+
+ if (policy) {
+ memset(policy, 0, sizeof(struct xfrm_policy));
+ atomic_set(&policy->refcnt, 1);
+ rwlock_init(&policy->lock);
+ init_timer(&policy->timer);
+ policy->timer.data = (unsigned long)policy;
+ policy->timer.function = xfrm_policy_timer;
+ }
+ return policy;
+}
+EXPORT_SYMBOL(xfrm_policy_alloc);
+
+/* Destroy xfrm_policy: descendant resources must be released to this moment. */
+
+void __xfrm_policy_destroy(struct xfrm_policy *policy)
+{
+ if (!policy->dead)
+ BUG();
+
+ if (policy->bundles)
+ BUG();
+
+ if (del_timer(&policy->timer))
+ BUG();
+
+ kfree(policy);
+}
+EXPORT_SYMBOL(__xfrm_policy_destroy);
+
+static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
+{
+ struct dst_entry *dst;
+
+ while ((dst = policy->bundles) != NULL) {
+ policy->bundles = dst->next;
+ dst_free(dst);
+ }
+
+ if (del_timer(&policy->timer))
+ atomic_dec(&policy->refcnt);
+
+ if (atomic_read(&policy->refcnt) > 1)
+ flow_cache_flush();
+
+ xfrm_pol_put(policy);
+}
+
+static void xfrm_policy_gc_task(void *data)
+{
+ struct xfrm_policy *policy;
+ struct list_head *entry, *tmp;
+ struct list_head gc_list = LIST_HEAD_INIT(gc_list);
+
+ spin_lock_bh(&xfrm_policy_gc_lock);
+ list_splice_init(&xfrm_policy_gc_list, &gc_list);
+ spin_unlock_bh(&xfrm_policy_gc_lock);
+
+ list_for_each_safe(entry, tmp, &gc_list) {
+ policy = list_entry(entry, struct xfrm_policy, list);
+ xfrm_policy_gc_kill(policy);
+ }
+}
+
+/* Rule must be locked. Release descentant resources, announce
+ * entry dead. The rule must be unlinked from lists to the moment.
+ */
+
+static void xfrm_policy_kill(struct xfrm_policy *policy)
+{
+ int dead;
+
+ write_lock_bh(&policy->lock);
+ dead = policy->dead;
+ policy->dead = 1;
+ write_unlock_bh(&policy->lock);
+
+ if (unlikely(dead)) {
+ WARN_ON(1);
+ return;
+ }
+
+ spin_lock(&xfrm_policy_gc_lock);
+ list_add(&policy->list, &xfrm_policy_gc_list);
+ spin_unlock(&xfrm_policy_gc_lock);
+
+ schedule_work(&xfrm_policy_gc_work);
+}
+
+/* Generate new index... KAME seems to generate them ordered by cost
+ * of an absolute inpredictability of ordering of rules. This will not pass. */
+static u32 xfrm_gen_index(int dir)
+{
+ u32 idx;
+ struct xfrm_policy *p;
+ static u32 idx_generator;
+
+ for (;;) {
+ idx = (idx_generator | dir);
+ idx_generator += 8;
+ if (idx == 0)
+ idx = 8;
+ for (p = xfrm_policy_list[dir]; p; p = p->next) {
+ if (p->index == idx)
+ break;
+ }
+ if (!p)
+ return idx;
+ }
+}
+
+int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+{
+ struct xfrm_policy *pol, **p;
+ struct xfrm_policy *delpol = NULL;
+ struct xfrm_policy **newpos = NULL;
+
+ write_lock_bh(&xfrm_policy_lock);
+ for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
+ if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
+ if (excl) {
+ write_unlock_bh(&xfrm_policy_lock);
+ return -EEXIST;
+ }
+ *p = pol->next;
+ delpol = pol;
+ if (policy->priority > pol->priority)
+ continue;
+ } else if (policy->priority >= pol->priority) {
+ p = &pol->next;
+ continue;
+ }
+ if (!newpos)
+ newpos = p;
+ if (delpol)
+ break;
+ p = &pol->next;
+ }
+ if (newpos)
+ p = newpos;
+ xfrm_pol_hold(policy);
+ policy->next = *p;
+ *p = policy;
+ atomic_inc(&flow_cache_genid);
+ policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
+ policy->curlft.add_time = (unsigned long)xtime.tv_sec;
+ policy->curlft.use_time = 0;
+ if (!mod_timer(&policy->timer, jiffies + HZ))
+ xfrm_pol_hold(policy);
+ write_unlock_bh(&xfrm_policy_lock);
+
+ if (delpol) {
+ xfrm_policy_kill(delpol);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(xfrm_policy_insert);
+
+struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
+ int delete)
+{
+ struct xfrm_policy *pol, **p;
+
+ write_lock_bh(&xfrm_policy_lock);
+ for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
+ if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
+ xfrm_pol_hold(pol);
+ if (delete)
+ *p = pol->next;
+ break;
+ }
+ }
+ write_unlock_bh(&xfrm_policy_lock);
+
+ if (pol && delete) {
+ atomic_inc(&flow_cache_genid);
+ xfrm_policy_kill(pol);
+ }
+ return pol;
+}
+EXPORT_SYMBOL(xfrm_policy_bysel);
+
+struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
+{
+ struct xfrm_policy *pol, **p;
+
+ write_lock_bh(&xfrm_policy_lock);
+ for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) {
+ if (pol->index == id) {
+ xfrm_pol_hold(pol);
+ if (delete)
+ *p = pol->next;
+ break;
+ }
+ }
+ write_unlock_bh(&xfrm_policy_lock);
+
+ if (pol && delete) {
+ atomic_inc(&flow_cache_genid);
+ xfrm_policy_kill(pol);
+ }
+ return pol;
+}
+EXPORT_SYMBOL(xfrm_policy_byid);
+
+void xfrm_policy_flush(void)
+{
+ struct xfrm_policy *xp;
+ int dir;
+
+ write_lock_bh(&xfrm_policy_lock);
+ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
+ while ((xp = xfrm_policy_list[dir]) != NULL) {
+ xfrm_policy_list[dir] = xp->next;
+ write_unlock_bh(&xfrm_policy_lock);
+
+ xfrm_policy_kill(xp);
+
+ write_lock_bh(&xfrm_policy_lock);
+ }
+ }
+ atomic_inc(&flow_cache_genid);
+ write_unlock_bh(&xfrm_policy_lock);
+}
+EXPORT_SYMBOL(xfrm_policy_flush);
+
+int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
+ void *data)
+{
+ struct xfrm_policy *xp;
+ int dir;
+ int count = 0;
+ int error = 0;
+
+ read_lock_bh(&xfrm_policy_lock);
+ for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
+ for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
+ count++;
+ }
+
+ if (count == 0) {
+ error = -ENOENT;
+ goto out;
+ }
+
+ for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
+ for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
+ error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
+ if (error)
+ goto out;
+ }
+ }
+
+out:
+ read_unlock_bh(&xfrm_policy_lock);
+ return error;
+}
+EXPORT_SYMBOL(xfrm_policy_walk);
+
+/* Find policy to apply to this flow. */
+
+static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
+ void **objp, atomic_t **obj_refp)
+{
+ struct xfrm_policy *pol;
+
+ read_lock_bh(&xfrm_policy_lock);
+ for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
+ struct xfrm_selector *sel = &pol->selector;
+ int match;
+
+ if (pol->family != family)
+ continue;
+
+ match = xfrm_selector_match(sel, fl, family);
+ if (match) {
+ xfrm_pol_hold(pol);
+ break;
+ }
+ }
+ read_unlock_bh(&xfrm_policy_lock);
+ if ((*objp = (void *) pol) != NULL)
+ *obj_refp = &pol->refcnt;
+}
+
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
+{
+ struct xfrm_policy *pol;
+
+ read_lock_bh(&xfrm_policy_lock);
+ if ((pol = sk->sk_policy[dir]) != NULL) {
+ int match = xfrm_selector_match(&pol->selector, fl,
+ sk->sk_family);
+ if (match)
+ xfrm_pol_hold(pol);
+ else
+ pol = NULL;
+ }
+ read_unlock_bh(&xfrm_policy_lock);
+ return pol;
+}
+
+static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
+{
+ pol->next = xfrm_policy_list[dir];
+ xfrm_policy_list[dir] = pol;
+ xfrm_pol_hold(pol);
+}
+
+static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
+ int dir)
+{
+ struct xfrm_policy **polp;
+
+ for (polp = &xfrm_policy_list[dir];
+ *polp != NULL; polp = &(*polp)->next) {
+ if (*polp == pol) {
+ *polp = pol->next;
+ return pol;
+ }
+ }
+ return NULL;
+}
+
+void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
+{
+ write_lock_bh(&xfrm_policy_lock);
+ pol = __xfrm_policy_unlink(pol, dir);
+ write_unlock_bh(&xfrm_policy_lock);
+ if (pol) {
+ if (dir < XFRM_POLICY_MAX)
+ atomic_inc(&flow_cache_genid);
+ xfrm_policy_kill(pol);
+ }
+}
+
+int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
+{
+ struct xfrm_policy *old_pol;
+
+ write_lock_bh(&xfrm_policy_lock);
+ old_pol = sk->sk_policy[dir];
+ sk->sk_policy[dir] = pol;
+ if (pol) {
+ pol->curlft.add_time = (unsigned long)xtime.tv_sec;
+ pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
+ __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
+ }
+ if (old_pol)
+ __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
+ write_unlock_bh(&xfrm_policy_lock);
+
+ if (old_pol) {
+ xfrm_policy_kill(old_pol);
+ }
+ return 0;
+}
+
+static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
+{
+ struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
+
+ if (newp) {
+ newp->selector = old->selector;
+ newp->lft = old->lft;
+ newp->curlft = old->curlft;
+ newp->action = old->action;
+ newp->flags = old->flags;
+ newp->xfrm_nr = old->xfrm_nr;
+ newp->index = old->index;
+ memcpy(newp->xfrm_vec, old->xfrm_vec,
+ newp->xfrm_nr*sizeof(struct xfrm_tmpl));
+ write_lock_bh(&xfrm_policy_lock);
+ __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
+ write_unlock_bh(&xfrm_policy_lock);
+ xfrm_pol_put(newp);
+ }
+ return newp;
+}
+
+int __xfrm_sk_clone_policy(struct sock *sk)
+{
+ struct xfrm_policy *p0 = sk->sk_policy[0],
+ *p1 = sk->sk_policy[1];
+
+ sk->sk_policy[0] = sk->sk_policy[1] = NULL;
+ if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
+ return -ENOMEM;
+ if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
+ return -ENOMEM;
+ return 0;
+}
+
+/* Resolve list of templates for the flow, given policy. */
+
+static int
+xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
+ struct xfrm_state **xfrm,
+ unsigned short family)
+{
+ int nx;
+ int i, error;
+ xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
+ xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
+
+ for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
+ struct xfrm_state *x;
+ xfrm_address_t *remote = daddr;
+ xfrm_address_t *local = saddr;
+ struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
+
+ if (tmpl->mode) {
+ remote = &tmpl->id.daddr;
+ local = &tmpl->saddr;
+ }
+
+ x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
+
+ if (x && x->km.state == XFRM_STATE_VALID) {
+ xfrm[nx++] = x;
+ daddr = remote;
+ saddr = local;
+ continue;
+ }
+ if (x) {
+ error = (x->km.state == XFRM_STATE_ERROR ?
+ -EINVAL : -EAGAIN);
+ xfrm_state_put(x);
+ }
+
+ if (!tmpl->optional)
+ goto fail;
+ }
+ return nx;
+
+fail:
+ for (nx--; nx>=0; nx--)
+ xfrm_state_put(xfrm[nx]);
+ return error;
+}
+
+/* Check that the bundle accepts the flow and its components are
+ * still valid.
+ */
+
+static struct dst_entry *
+xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
+{
+ struct dst_entry *x;
+ struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+ if (unlikely(afinfo == NULL))
+ return ERR_PTR(-EINVAL);
+ x = afinfo->find_bundle(fl, policy);
+ xfrm_policy_put_afinfo(afinfo);
+ return x;
+}
+
+/* Allocate chain of dst_entry's, attach known xfrm's, calculate
+ * all the metrics... Shortly, bundle a bundle.
+ */
+
+static int
+xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
+ struct flowi *fl, struct dst_entry **dst_p,
+ unsigned short family)
+{
+ int err;
+ struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+ if (unlikely(afinfo == NULL))
+ return -EINVAL;
+ err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
+ xfrm_policy_put_afinfo(afinfo);
+ return err;
+}
+
+static inline int policy_to_flow_dir(int dir)
+{
+ if (XFRM_POLICY_IN == FLOW_DIR_IN &&
+ XFRM_POLICY_OUT == FLOW_DIR_OUT &&
+ XFRM_POLICY_FWD == FLOW_DIR_FWD)
+ return dir;
+ switch (dir) {
+ default:
+ case XFRM_POLICY_IN:
+ return FLOW_DIR_IN;
+ case XFRM_POLICY_OUT:
+ return FLOW_DIR_OUT;
+ case XFRM_POLICY_FWD:
+ return FLOW_DIR_FWD;
+ };
+}
+
+static int stale_bundle(struct dst_entry *dst);
+
+/* Main function: finds/creates a bundle for given flow.
+ *
+ * At the moment we eat a raw IP route. Mostly to speed up lookups
+ * on interfaces with disabled IPsec.
+ */
+int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
+ struct sock *sk, int flags)
+{
+ struct xfrm_policy *policy;
+ struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+ struct dst_entry *dst, *dst_orig = *dst_p;
+ int nx = 0;
+ int err;
+ u32 genid;
+ u16 family = dst_orig->ops->family;
+restart:
+ genid = atomic_read(&flow_cache_genid);
+ policy = NULL;
+ if (sk && sk->sk_policy[1])
+ policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+
+ if (!policy) {
+ /* To accelerate a bit... */
+ if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
+ return 0;
+
+ policy = flow_cache_lookup(fl, family,
+ policy_to_flow_dir(XFRM_POLICY_OUT),
+ xfrm_policy_lookup);
+ }
+
+ if (!policy)
+ return 0;
+
+ policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+ switch (policy->action) {
+ case XFRM_POLICY_BLOCK:
+ /* Prohibit the flow */
+ xfrm_pol_put(policy);
+ return -EPERM;
+
+ case XFRM_POLICY_ALLOW:
+ if (policy->xfrm_nr == 0) {
+ /* Flow passes not transformed. */
+ xfrm_pol_put(policy);
+ return 0;
+ }
+
+ /* Try to find matching bundle.
+ *
+ * LATER: help from flow cache. It is optional, this
+ * is required only for output policy.
+ */
+ dst = xfrm_find_bundle(fl, policy, family);
+ if (IS_ERR(dst)) {
+ xfrm_pol_put(policy);
+ return PTR_ERR(dst);
+ }
+
+ if (dst)
+ break;
+
+ nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+
+ if (unlikely(nx<0)) {
+ err = nx;
+ if (err == -EAGAIN && flags) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue(&km_waitq, &wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&km_waitq, &wait);
+
+ nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+
+ if (nx == -EAGAIN && signal_pending(current)) {
+ err = -ERESTART;
+ goto error;
+ }
+ if (nx == -EAGAIN ||
+ genid != atomic_read(&flow_cache_genid)) {
+ xfrm_pol_put(policy);
+ goto restart;
+ }
+ err = nx;
+ }
+ if (err < 0)
+ goto error;
+ }
+ if (nx == 0) {
+ /* Flow passes not transformed. */
+ xfrm_pol_put(policy);
+ return 0;
+ }
+
+ dst = dst_orig;
+ err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
+
+ if (unlikely(err)) {
+ int i;
+ for (i=0; i<nx; i++)
+ xfrm_state_put(xfrm[i]);
+ goto error;
+ }
+
+ write_lock_bh(&policy->lock);
+ if (unlikely(policy->dead || stale_bundle(dst))) {
+ /* Wow! While we worked on resolving, this
+ * policy has gone. Retry. It is not paranoia,
+ * we just cannot enlist new bundle to dead object.
+ * We can't enlist stable bundles either.
+ */
+ write_unlock_bh(&policy->lock);
+
+ xfrm_pol_put(policy);
+ if (dst)
+ dst_free(dst);
+ goto restart;
+ }
+ dst->next = policy->bundles;
+ policy->bundles = dst;
+ dst_hold(dst);
+ write_unlock_bh(&policy->lock);
+ }
+ *dst_p = dst;
+ dst_release(dst_orig);
+ xfrm_pol_put(policy);
+ return 0;
+
+error:
+ dst_release(dst_orig);
+ xfrm_pol_put(policy);
+ *dst_p = NULL;
+ return err;
+}
+EXPORT_SYMBOL(xfrm_lookup);
+
+/* When skb is transformed back to its "native" form, we have to
+ * check policy restrictions. At the moment we make this in maximally
+ * stupid way. Shame on me. :-) Of course, connected sockets must
+ * have policy cached at them.
+ */
+
+static inline int
+xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
+ unsigned short family)
+{
+ if (xfrm_state_kern(x))
+ return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family);
+ return x->id.proto == tmpl->id.proto &&
+ (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
+ (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
+ x->props.mode == tmpl->mode &&
+ (tmpl->aalgos & (1<<x->props.aalgo)) &&
+ !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
+}
+
+static inline int
+xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
+ unsigned short family)
+{
+ int idx = start;
+
+ if (tmpl->optional) {
+ if (!tmpl->mode)
+ return start;
+ } else
+ start = -1;
+ for (; idx < sp->len; idx++) {
+ if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family))
+ return ++idx;
+ if (sp->x[idx].xvec->props.mode)
+ break;
+ }
+ return start;
+}
+
+static int
+_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
+{
+ struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+
+ if (unlikely(afinfo == NULL))
+ return -EAFNOSUPPORT;
+
+ afinfo->decode_session(skb, fl);
+ xfrm_policy_put_afinfo(afinfo);
+ return 0;
+}
+
+static inline int secpath_has_tunnel(struct sec_path *sp, int k)
+{
+ for (; k < sp->len; k++) {
+ if (sp->x[k].xvec->props.mode)
+ return 1;
+ }
+
+ return 0;
+}
+
+int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
+ unsigned short family)
+{
+ struct xfrm_policy *pol;
+ struct flowi fl;
+
+ if (_decode_session(skb, &fl, family) < 0)
+ return 0;
+
+ /* First, check used SA against their selectors. */
+ if (skb->sp) {
+ int i;
+
+ for (i=skb->sp->len-1; i>=0; i--) {
+ struct sec_decap_state *xvec = &(skb->sp->x[i]);
+ if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
+ return 0;
+
+ /* If there is a post_input processor, try running it */
+ if (xvec->xvec->type->post_input &&
+ (xvec->xvec->type->post_input)(xvec->xvec,
+ &(xvec->decap),
+ skb) != 0)
+ return 0;
+ }
+ }
+
+ pol = NULL;
+ if (sk && sk->sk_policy[dir])
+ pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+
+ if (!pol)
+ pol = flow_cache_lookup(&fl, family,
+ policy_to_flow_dir(dir),
+ xfrm_policy_lookup);
+
+ if (!pol)
+ return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
+
+ pol->curlft.use_time = (unsigned long)xtime.tv_sec;
+
+ if (pol->action == XFRM_POLICY_ALLOW) {
+ struct sec_path *sp;
+ static struct sec_path dummy;
+ int i, k;
+
+ if ((sp = skb->sp) == NULL)
+ sp = &dummy;
+
+ /* For each tunnel xfrm, find the first matching tmpl.
+ * For each tmpl before that, find corresponding xfrm.
+ * Order is _important_. Later we will implement
+ * some barriers, but at the moment barriers
+ * are implied between each two transformations.
+ */
+ for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
+ k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
+ if (k < 0)
+ goto reject;
+ }
+
+ if (secpath_has_tunnel(sp, k))
+ goto reject;
+
+ xfrm_pol_put(pol);
+ return 1;
+ }
+
+reject:
+ xfrm_pol_put(pol);
+ return 0;
+}
+EXPORT_SYMBOL(__xfrm_policy_check);
+
+int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
+{
+ struct flowi fl;
+
+ if (_decode_session(skb, &fl, family) < 0)
+ return 0;
+
+ return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
+}
+EXPORT_SYMBOL(__xfrm_route_forward);
+
+/* Optimize later using cookies and generation ids. */
+
+static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
+{
+ if (!stale_bundle(dst))
+ return dst;
+
+ return NULL;
+}
+
+static int stale_bundle(struct dst_entry *dst)
+{
+ return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC);
+}
+
+static void xfrm_dst_destroy(struct dst_entry *dst)
+{
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+ dst_release(xdst->route);
+
+ if (!dst->xfrm)
+ return;
+ xfrm_state_put(dst->xfrm);
+ dst->xfrm = NULL;
+}
+
+static void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+ int unregister)
+{
+ if (!unregister)
+ return;
+
+ while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
+ dst->dev = &loopback_dev;
+ dev_hold(&loopback_dev);
+ dev_put(dev);
+ }
+}
+
+static void xfrm_link_failure(struct sk_buff *skb)
+{
+ /* Impossible. Such dst must be popped before reaches point of failure. */
+ return;
+}
+
+static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
+{
+ if (dst) {
+ if (dst->obsolete) {
+ dst_release(dst);
+ dst = NULL;
+ }
+ }
+ return dst;
+}
+
+static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
+{
+ int i;
+ struct xfrm_policy *pol;
+ struct dst_entry *dst, **dstp, *gc_list = NULL;
+
+ read_lock_bh(&xfrm_policy_lock);
+ for (i=0; i<2*XFRM_POLICY_MAX; i++) {
+ for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
+ write_lock(&pol->lock);
+ dstp = &pol->bundles;
+ while ((dst=*dstp) != NULL) {
+ if (func(dst)) {
+ *dstp = dst->next;
+ dst->next = gc_list;
+ gc_list = dst;
+ } else {
+ dstp = &dst->next;
+ }
+ }
+ write_unlock(&pol->lock);
+ }
+ }
+ read_unlock_bh(&xfrm_policy_lock);
+
+ while (gc_list) {
+ dst = gc_list;
+ gc_list = dst->next;
+ dst_free(dst);
+ }
+}
+
+static int unused_bundle(struct dst_entry *dst)
+{
+ return !atomic_read(&dst->__refcnt);
+}
+
+static void __xfrm_garbage_collect(void)
+{
+ xfrm_prune_bundles(unused_bundle);
+}
+
+int xfrm_flush_bundles(void)
+{
+ xfrm_prune_bundles(stale_bundle);
+ return 0;
+}
+
+void xfrm_init_pmtu(struct dst_entry *dst)
+{
+ do {
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ u32 pmtu, route_mtu_cached;
+
+ pmtu = dst_mtu(dst->child);
+ xdst->child_mtu_cached = pmtu;
+
+ pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
+
+ route_mtu_cached = dst_mtu(xdst->route);
+ xdst->route_mtu_cached = route_mtu_cached;
+
+ if (pmtu > route_mtu_cached)
+ pmtu = route_mtu_cached;
+
+ dst->metrics[RTAX_MTU-1] = pmtu;
+ } while ((dst = dst->next));
+}
+
+EXPORT_SYMBOL(xfrm_init_pmtu);
+
+/* Check that the bundle accepts the flow and its components are
+ * still valid.
+ */
+
+int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
+{
+ struct dst_entry *dst = &first->u.dst;
+ struct xfrm_dst *last;
+ u32 mtu;
+
+ if (!dst_check(dst->path, 0) ||
+ (dst->dev && !netif_running(dst->dev)))
+ return 0;
+
+ last = NULL;
+
+ do {
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+ if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
+ return 0;
+ if (dst->xfrm->km.state != XFRM_STATE_VALID)
+ return 0;
+
+ mtu = dst_mtu(dst->child);
+ if (xdst->child_mtu_cached != mtu) {
+ last = xdst;
+ xdst->child_mtu_cached = mtu;
+ }
+
+ if (!dst_check(xdst->route, 0))
+ return 0;
+ mtu = dst_mtu(xdst->route);
+ if (xdst->route_mtu_cached != mtu) {
+ last = xdst;
+ xdst->route_mtu_cached = mtu;
+ }
+
+ dst = dst->child;
+ } while (dst->xfrm);
+
+ if (likely(!last))
+ return 1;
+
+ mtu = last->child_mtu_cached;
+ for (;;) {
+ dst = &last->u.dst;
+
+ mtu = xfrm_state_mtu(dst->xfrm, mtu);
+ if (mtu > last->route_mtu_cached)
+ mtu = last->route_mtu_cached;
+ dst->metrics[RTAX_MTU-1] = mtu;
+
+ if (last == first)
+ break;
+
+ last = last->u.next;
+ last->child_mtu_cached = mtu;
+ }
+
+ return 1;
+}
+
+EXPORT_SYMBOL(xfrm_bundle_ok);
+
+/* Well... that's _TASK_. We need to scan through transformation
+ * list and figure out what mss tcp should generate in order to
+ * final datagram fit to mtu. Mama mia... :-)
+ *
+ * Apparently, some easy way exists, but we used to choose the most
+ * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta.
+ *
+ * Consider this function as something like dark humour. :-)
+ */
+static int xfrm_get_mss(struct dst_entry *dst, u32 mtu)
+{
+ int res = mtu - dst->header_len;
+
+ for (;;) {
+ struct dst_entry *d = dst;
+ int m = res;
+
+ do {
+ struct xfrm_state *x = d->xfrm;
+ if (x) {
+ spin_lock_bh(&x->lock);
+ if (x->km.state == XFRM_STATE_VALID &&
+ x->type && x->type->get_max_size)
+ m = x->type->get_max_size(d->xfrm, m);
+ else
+ m += x->props.header_len;
+ spin_unlock_bh(&x->lock);
+ }
+ } while ((d = d->child) != NULL);
+
+ if (m <= mtu)
+ break;
+ res -= (m - mtu);
+ if (res < 88)
+ return mtu;
+ }
+
+ return res + dst->header_len;
+}
+
+int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+ int err = 0;
+ if (unlikely(afinfo == NULL))
+ return -EINVAL;
+ if (unlikely(afinfo->family >= NPROTO))
+ return -EAFNOSUPPORT;
+ write_lock(&xfrm_policy_afinfo_lock);
+ if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
+ err = -ENOBUFS;
+ else {
+ struct dst_ops *dst_ops = afinfo->dst_ops;
+ if (likely(dst_ops->kmem_cachep == NULL))
+ dst_ops->kmem_cachep = xfrm_dst_cache;
+ if (likely(dst_ops->check == NULL))
+ dst_ops->check = xfrm_dst_check;
+ if (likely(dst_ops->destroy == NULL))
+ dst_ops->destroy = xfrm_dst_destroy;
+ if (likely(dst_ops->ifdown == NULL))
+ dst_ops->ifdown = xfrm_dst_ifdown;
+ if (likely(dst_ops->negative_advice == NULL))
+ dst_ops->negative_advice = xfrm_negative_advice;
+ if (likely(dst_ops->link_failure == NULL))
+ dst_ops->link_failure = xfrm_link_failure;
+ if (likely(dst_ops->get_mss == NULL))
+ dst_ops->get_mss = xfrm_get_mss;
+ if (likely(afinfo->garbage_collect == NULL))
+ afinfo->garbage_collect = __xfrm_garbage_collect;
+ xfrm_policy_afinfo[afinfo->family] = afinfo;
+ }
+ write_unlock(&xfrm_policy_afinfo_lock);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_policy_register_afinfo);
+
+int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+ int err = 0;
+ if (unlikely(afinfo == NULL))
+ return -EINVAL;
+ if (unlikely(afinfo->family >= NPROTO))
+ return -EAFNOSUPPORT;
+ write_lock(&xfrm_policy_afinfo_lock);
+ if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
+ if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
+ err = -EINVAL;
+ else {
+ struct dst_ops *dst_ops = afinfo->dst_ops;
+ xfrm_policy_afinfo[afinfo->family] = NULL;
+ dst_ops->kmem_cachep = NULL;
+ dst_ops->check = NULL;
+ dst_ops->destroy = NULL;
+ dst_ops->ifdown = NULL;
+ dst_ops->negative_advice = NULL;
+ dst_ops->link_failure = NULL;
+ dst_ops->get_mss = NULL;
+ afinfo->garbage_collect = NULL;
+ }
+ }
+ write_unlock(&xfrm_policy_afinfo_lock);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
+
+static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
+{
+ struct xfrm_policy_afinfo *afinfo;
+ if (unlikely(family >= NPROTO))
+ return NULL;
+ read_lock(&xfrm_policy_afinfo_lock);
+ afinfo = xfrm_policy_afinfo[family];
+ if (likely(afinfo != NULL))
+ read_lock(&afinfo->lock);
+ read_unlock(&xfrm_policy_afinfo_lock);
+ return afinfo;
+}
+
+static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+ if (unlikely(afinfo == NULL))
+ return;
+ read_unlock(&afinfo->lock);
+}
+
+static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ switch (event) {
+ case NETDEV_DOWN:
+ xfrm_flush_bundles();
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block xfrm_dev_notifier = {
+ xfrm_dev_event,
+ NULL,
+ 0
+};
+
+static void __init xfrm_policy_init(void)
+{
+ xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
+ sizeof(struct xfrm_dst),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!xfrm_dst_cache)
+ panic("XFRM: failed to allocate xfrm_dst_cache\n");
+
+ INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
+ register_netdevice_notifier(&xfrm_dev_notifier);
+}
+
+void __init xfrm_init(void)
+{
+ xfrm_state_init();
+ xfrm_policy_init();
+ xfrm_input_init();
+}
+