| /* |
| * Pseudo-driver for the intermediate queue device. |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License |
| * as published by the Free Software Foundation; either version |
| * 2 of the License, or (at your option) any later version. |
| * |
| * Authors: Patrick McHardy, <kaber@trash.net> |
| * |
| * The first version was written by Martin Devera, <devik@cdi.cz> |
| * |
| * See Credits.txt |
| OPPO Coding Static Checking Skip |
| */ |
| |
| #include <linux/module.h> |
| #include <linux/kernel.h> |
| #include <linux/moduleparam.h> |
| #include <linux/list.h> |
| #include <linux/skbuff.h> |
| #include <linux/netdevice.h> |
| #include <linux/etherdevice.h> |
| #include <linux/rtnetlink.h> |
| #include <linux/if_arp.h> |
| #include <linux/netfilter.h> |
| #include <linux/netfilter_ipv4.h> |
| #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
| #include <linux/netfilter_ipv6.h> |
| #endif |
| #include <linux/imq.h> |
| #include <net/pkt_sched.h> |
| #include <net/netfilter/nf_queue.h> |
| #include <net/sock.h> |
| #include <linux/ip.h> |
| #include <linux/ipv6.h> |
| #include <linux/if_vlan.h> |
| #include <linux/if_pppox.h> |
| #include <net/ip.h> |
| #include <net/ipv6.h> |
| |
| static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num); |
| |
| static nf_hookfn imq_nf_hook; |
| |
| static struct nf_hook_ops imq_ops[] = { |
| { |
| /* imq_ingress_ipv4 */ |
| .hook = imq_nf_hook, |
| .pf = PF_INET, |
| .hooknum = NF_INET_LOCAL_IN, |
| .priority = NF_IP_PRI_FILTER + 1, |
| }, |
| |
| { |
| /* imq_egress_ipv4 */ |
| .hook = imq_nf_hook, |
| .pf = PF_INET, |
| .hooknum = NF_INET_POST_ROUTING, |
| .priority = NF_IP_PRI_LAST, |
| }, |
| |
| #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
| { |
| /* imq_ingress_ipv6 */ |
| .hook = imq_nf_hook, |
| .pf = PF_INET6, |
| .hooknum = NF_INET_LOCAL_IN, |
| .priority = NF_IP_PRI_FILTER + 1, |
| }, |
| |
| { |
| /* imq_egress_ipv6 */ |
| .hook = imq_nf_hook, |
| .pf = PF_INET6, |
| .hooknum = NF_INET_POST_ROUTING, |
| .priority = NF_IP6_PRI_LAST, |
| }, |
| #endif |
| }; |
| |
| #if defined(CONFIG_IMQ_NUM_DEVS) |
| static int numdevs = CONFIG_IMQ_NUM_DEVS; |
| #else |
| static int numdevs = IMQ_MAX_DEVS; |
| #endif |
| |
| static struct net_device *imq_devs_cache[IMQ_MAX_DEVS]; |
| |
| #define IMQ_MAX_QUEUES 32 |
| static int numqueues = 1; |
| static u32 imq_hashrnd; |
| static int imq_dev_accurate_stats = 1; |
| |
| static inline __be16 pppoe_proto(const struct sk_buff *skb) |
| { |
| return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + |
| sizeof(struct pppoe_hdr))); |
| } |
| |
| static u16 imq_hash(struct net_device *dev, struct sk_buff *skb) |
| { |
| unsigned int pull_len; |
| u16 protocol = skb->protocol; |
| u32 addr1, addr2; |
| u32 hash = 0; |
| int ihl = 0; |
| union { |
| u16 in16[2]; |
| u32 in32; |
| } ports; |
| u8 ip_proto; |
| |
| pull_len = 0; |
| |
| recheck: |
| switch (protocol) { |
| case htons(ETH_P_8021Q): { |
| if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL)) |
| goto other; |
| |
| pull_len += VLAN_HLEN; |
| skb->network_header += VLAN_HLEN; |
| |
| protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; |
| goto recheck; |
| } |
| |
| case htons(ETH_P_PPP_SES): { |
| if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL)) |
| goto other; |
| |
| pull_len += PPPOE_SES_HLEN; |
| skb->network_header += PPPOE_SES_HLEN; |
| |
| protocol = pppoe_proto(skb); |
| goto recheck; |
| } |
| |
| case htons(ETH_P_IP): { |
| const struct iphdr *iph = ip_hdr(skb); |
| |
| if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr)))) |
| goto other; |
| |
| addr1 = iph->daddr; |
| addr2 = iph->saddr; |
| |
| ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ? |
| iph->protocol : 0; |
| ihl = ip_hdrlen(skb); |
| |
| break; |
| } |
| #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
| case htons(ETH_P_IPV6): { |
| const struct ipv6hdr *iph = ipv6_hdr(skb); |
| __be16 fo = 0; |
| |
| if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr)))) |
| goto other; |
| |
| ip_proto = iph->nexthdr; |
| addr1 = iph->daddr.s6_addr32[3]; |
| addr2 = iph->saddr.s6_addr32[3]; |
| ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto, |
| &fo); |
| if (unlikely(ihl < 0)) |
| goto other; |
| |
| break; |
| } |
| #endif |
| default: |
| other: |
| if (pull_len != 0) { |
| skb_push(skb, pull_len); |
| skb->network_header -= pull_len; |
| } |
| |
| return (u16)(ntohs(protocol) % dev->real_num_tx_queues); |
| } |
| |
| if (addr1 > addr2) |
| swap(addr1, addr2); |
| |
| switch (ip_proto) { |
| case IPPROTO_TCP: |
| case IPPROTO_UDP: |
| case IPPROTO_DCCP: |
| case IPPROTO_ESP: |
| case IPPROTO_AH: |
| case IPPROTO_SCTP: |
| case IPPROTO_UDPLITE: { |
| if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) { |
| if (ports.in16[0] > ports.in16[1]) |
| swap(ports.in16[0], ports.in16[1]); |
| break; |
| } |
| /* fall-through */ |
| } |
| default: |
| ports.in32 = 0; |
| break; |
| } |
| |
| if (pull_len != 0) { |
| skb_push(skb, pull_len); |
| skb->network_header -= pull_len; |
| } |
| |
| hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto); |
| |
| return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32); |
| } |
| |
| static inline bool sk_tx_queue_recorded(struct sock *sk) |
| { |
| return (sk_tx_queue_get(sk) >= 0); |
| } |
| |
| static struct netdev_queue *imq_select_queue(struct net_device *dev, |
| struct sk_buff *skb) |
| { |
| u16 queue_index = 0; |
| u32 hash; |
| |
| if (likely(dev->real_num_tx_queues == 1)) |
| goto out; |
| |
| /* IMQ can be receiving ingress or engress packets. */ |
| |
| /* Check first for if rx_queue is set */ |
| if (skb_rx_queue_recorded(skb)) { |
| queue_index = skb_get_rx_queue(skb); |
| goto out; |
| } |
| |
| /* Check if socket has tx_queue set */ |
| if (sk_tx_queue_recorded(skb->sk)) { |
| queue_index = sk_tx_queue_get(skb->sk); |
| goto out; |
| } |
| |
| /* Try use socket hash */ |
| if (skb->sk && skb->sk->sk_hash) { |
| hash = skb->sk->sk_hash; |
| queue_index = |
| (u16)(((u64)hash * dev->real_num_tx_queues) >> 32); |
| goto out; |
| } |
| |
| /* Generate hash from packet data */ |
| queue_index = imq_hash(dev, skb); |
| |
| out: |
| if (unlikely(queue_index >= dev->real_num_tx_queues)) |
| queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues); |
| |
| skb_set_queue_mapping(skb, queue_index); |
| return netdev_get_tx_queue(dev, queue_index); |
| } |
| |
| static struct net_device_stats *imq_get_stats(struct net_device *dev) |
| { |
| return &dev->stats; |
| } |
| |
| /* called for packets kfree'd in qdiscs at places other than enqueue */ |
| static void imq_skb_destructor(struct sk_buff *skb) |
| { |
| struct nf_queue_entry *entry = skb->nf_queue_entry; |
| |
| skb->nf_queue_entry = NULL; |
| |
| if (entry) { |
| nf_queue_entry_release_refs(entry); |
| kfree(entry); |
| } |
| |
| skb_restore_cb(skb); /* kfree backup */ |
| } |
| |
| static void imq_done_check_queue_mapping(struct sk_buff *skb, |
| struct net_device *dev) |
| { |
| unsigned int queue_index; |
| |
| /* Don't let queue_mapping be left too large after exiting IMQ */ |
| if (likely(skb->dev != dev && skb->dev != NULL)) { |
| queue_index = skb_get_queue_mapping(skb); |
| if (unlikely(queue_index >= skb->dev->real_num_tx_queues)) { |
| queue_index = (u16)((u32)queue_index % |
| skb->dev->real_num_tx_queues); |
| skb_set_queue_mapping(skb, queue_index); |
| } |
| } else { |
| /* skb->dev was IMQ device itself or NULL, be on safe side and |
| * just clear queue mapping. |
| */ |
| skb_set_queue_mapping(skb, 0); |
| } |
| } |
| |
| static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev) |
| { |
| struct nf_queue_entry *entry = skb->nf_queue_entry; |
| |
| rcu_read_lock(); |
| |
| skb->nf_queue_entry = NULL; |
| netif_trans_update(dev); |
| |
| dev->stats.tx_bytes += skb->len; |
| dev->stats.tx_packets++; |
| |
| if (unlikely(entry == NULL)) { |
| /* We don't know what is going on here.. packet is queued for |
| * imq device, but (probably) not by us. |
| * |
| * If this packet was not send here by imq_nf_queue(), then |
| * skb_save_cb() was not used and skb_free() should not show: |
| * WARNING: IMQ: kfree_skb: skb->cb_next:.. |
| * and/or |
| * WARNING: IMQ: kfree_skb: skb->nf_queue_entry... |
| * |
| * However if this message is shown, then IMQ is somehow broken |
| * and you should report this to linuximq.net. |
| */ |
| |
| /* imq_dev_xmit is black hole that eats all packets, report that |
| * we eat this packet happily and increase dropped counters. |
| */ |
| |
| dev->stats.tx_dropped++; |
| dev_kfree_skb(skb); |
| |
| rcu_read_unlock(); |
| return NETDEV_TX_OK; |
| } |
| |
| skb_restore_cb(skb); /* restore skb->cb */ |
| |
| skb->imq_flags = 0; |
| skb->destructor = NULL; |
| |
| imq_done_check_queue_mapping(skb, dev); |
| |
| nf_reinject(entry, NF_ACCEPT); |
| |
| rcu_read_unlock(); |
| return NETDEV_TX_OK; |
| } |
| |
| static struct net_device *get_imq_device_by_index(int index) |
| { |
| struct net_device *dev = NULL; |
| struct net *net; |
| char buf[8]; |
| |
| /* get device by name and cache result */ |
| snprintf(buf, sizeof(buf), "imq%d", index); |
| |
| /* Search device from all namespaces. */ |
| for_each_net(net) { |
| dev = dev_get_by_name(net, buf); |
| if (dev) |
| break; |
| } |
| |
| if (WARN_ON_ONCE(dev == NULL)) { |
| /* IMQ device not found. Exotic config? */ |
| return ERR_PTR(-ENODEV); |
| } |
| |
| imq_devs_cache[index] = dev; |
| dev_put(dev); |
| |
| return dev; |
| } |
| |
| static struct nf_queue_entry *nf_queue_entry_dup(struct nf_queue_entry *e) |
| { |
| struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); |
| if (entry) { |
| nf_queue_entry_get_refs(entry); |
| return entry; |
| } |
| return NULL; |
| } |
| |
| #ifdef CONFIG_BRIDGE_NETFILTER |
| /* When called from bridge netfilter, skb->data must point to MAC header |
| * before calling skb_gso_segment(). Else, original MAC header is lost |
| * and segmented skbs will be sent to wrong destination. |
| */ |
| static void nf_bridge_adjust_skb_data(struct sk_buff *skb) |
| { |
| if (skb->nf_bridge) |
| __skb_push(skb, skb->network_header - skb->mac_header); |
| } |
| |
| static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) |
| { |
| if (skb->nf_bridge) |
| __skb_pull(skb, skb->network_header - skb->mac_header); |
| } |
| #else |
| #define nf_bridge_adjust_skb_data(s) do {} while (0) |
| #define nf_bridge_adjust_segmented_data(s) do {} while (0) |
| #endif |
| |
| static void free_entry(struct nf_queue_entry *entry) |
| { |
| nf_queue_entry_release_refs(entry); |
| kfree(entry); |
| } |
| |
| static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev); |
| |
| static int __imq_nf_queue_gso(struct nf_queue_entry *entry, |
| struct net_device *dev, struct sk_buff *skb) |
| { |
| int ret = -ENOMEM; |
| struct nf_queue_entry *entry_seg; |
| |
| nf_bridge_adjust_segmented_data(skb); |
| |
| if (skb->next == NULL) { /* last packet, no need to copy entry */ |
| struct sk_buff *gso_skb = entry->skb; |
| entry->skb = skb; |
| ret = __imq_nf_queue(entry, dev); |
| if (ret) |
| entry->skb = gso_skb; |
| return ret; |
| } |
| |
| skb->next = NULL; |
| |
| entry_seg = nf_queue_entry_dup(entry); |
| if (entry_seg) { |
| entry_seg->skb = skb; |
| ret = __imq_nf_queue(entry_seg, dev); |
| if (ret) |
| free_entry(entry_seg); |
| } |
| return ret; |
| } |
| |
| static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num) |
| { |
| struct sk_buff *skb, *segs; |
| struct net_device *dev; |
| unsigned int queued; |
| int index, retval, err; |
| |
| index = entry->skb->imq_flags & IMQ_F_IFMASK; |
| if (unlikely(index > numdevs - 1)) { |
| if (net_ratelimit()) |
| pr_warn("IMQ: invalid device specified, highest is %u\n", |
| numdevs - 1); |
| retval = -EINVAL; |
| goto out_no_dev; |
| } |
| |
| /* check for imq device by index from cache */ |
| dev = imq_devs_cache[index]; |
| if (unlikely(!dev)) { |
| dev = get_imq_device_by_index(index); |
| if (IS_ERR(dev)) { |
| retval = PTR_ERR(dev); |
| goto out_no_dev; |
| } |
| } |
| |
| if (unlikely(!(dev->flags & IFF_UP))) { |
| entry->skb->imq_flags = 0; |
| retval = -ECANCELED; |
| goto out_no_dev; |
| } |
| |
| /* Since 3.10.x, GSO handling moved here as result of upstream commit |
| * a5fedd43d5f6c94c71053a66e4c3d2e35f1731a2 (netfilter: move |
| * skb_gso_segment into nfnetlink_queue module). |
| * |
| * Following code replicates the gso handling from |
| * 'net/netfilter/nfnetlink_queue_core.c':nfqnl_enqueue_packet(). |
| */ |
| |
| skb = entry->skb; |
| |
| switch (entry->state.pf) { |
| case NFPROTO_IPV4: |
| skb->protocol = htons(ETH_P_IP); |
| break; |
| case NFPROTO_IPV6: |
| skb->protocol = htons(ETH_P_IPV6); |
| break; |
| } |
| |
| if (!skb_is_gso(entry->skb)) |
| return __imq_nf_queue(entry, dev); |
| |
| nf_bridge_adjust_skb_data(skb); |
| segs = skb_gso_segment(skb, 0); |
| /* Does not use PTR_ERR to limit the number of error codes that can be |
| * returned by nf_queue. For instance, callers rely on -ECANCELED to |
| * mean 'ignore this hook'. |
| */ |
| err = -ENOBUFS; |
| if (IS_ERR(segs)) |
| goto out_err; |
| queued = 0; |
| err = 0; |
| do { |
| struct sk_buff *nskb = segs->next; |
| if (nskb && nskb->next) |
| nskb->cb_next = NULL; |
| if (err == 0) |
| err = __imq_nf_queue_gso(entry, dev, segs); |
| if (err == 0) |
| queued++; |
| else |
| kfree_skb(segs); |
| segs = nskb; |
| } while (segs); |
| |
| if (queued) { |
| if (err) /* some segments are already queued */ |
| free_entry(entry); |
| kfree_skb(skb); |
| return 0; |
| } |
| |
| out_err: |
| nf_bridge_adjust_segmented_data(skb); |
| retval = err; |
| out_no_dev: |
| return retval; |
| } |
| |
| static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev) |
| { |
| struct sk_buff *skb_orig, *skb, *skb_shared, *skb_popd; |
| struct Qdisc *q; |
| struct sk_buff *to_free = NULL; |
| struct netdev_queue *txq; |
| spinlock_t *root_lock; |
| int users; |
| int retval = -EINVAL; |
| unsigned int orig_queue_index; |
| |
| skb = entry->skb; |
| skb_orig = NULL; |
| |
| /* skb has owner? => make clone */ |
| if (unlikely(skb->destructor)) { |
| skb_orig = skb; |
| skb = skb_clone(skb, GFP_ATOMIC); |
| if (unlikely(!skb)) { |
| retval = -ENOMEM; |
| goto out; |
| } |
| skb->cb_next = NULL; |
| entry->skb = skb; |
| } |
| |
| dev->stats.rx_bytes += skb->len; |
| dev->stats.rx_packets++; |
| |
| if (!skb->dev) { |
| /* skb->dev == NULL causes problems, try the find cause. */ |
| if (net_ratelimit()) { |
| dev_warn(&dev->dev, |
| "received packet with skb->dev == NULL\n"); |
| dump_stack(); |
| } |
| |
| skb->dev = dev; |
| } |
| |
| /* Disables softirqs for lock below */ |
| rcu_read_lock_bh(); |
| |
| /* Multi-queue selection */ |
| orig_queue_index = skb_get_queue_mapping(skb); |
| txq = imq_select_queue(dev, skb); |
| |
| q = rcu_dereference(txq->qdisc); |
| if (unlikely(!q->enqueue)) |
| goto packet_not_enqueue_to_imq_dev; |
| |
| skb->nf_queue_entry = entry; |
| root_lock = qdisc_lock(q); |
| spin_lock(root_lock); |
| |
| users = refcount_read(&skb->users); |
| |
| skb_shared = skb_get(skb); /* increase reference count by one */ |
| |
| /* backup skb->cb, as qdisc layer will overwrite it */ |
| skb_save_cb(skb_shared); |
| qdisc_enqueue_root(skb_shared, q, &to_free); /* might kfree_skb */ |
| if (likely(refcount_read(&skb_shared->users) == users + 1)) { |
| bool validate; |
| |
| kfree_skb(skb_shared); /* decrease reference count by one */ |
| |
| skb->destructor = &imq_skb_destructor; |
| |
| skb_popd = qdisc_dequeue_skb(q, &validate); |
| |
| /* cloned? */ |
| if (unlikely(skb_orig)) |
| kfree_skb(skb_orig); /* free original */ |
| |
| spin_unlock(root_lock); |
| |
| if (likely(skb_popd)) { |
| /* Note that we validate skb (GSO, checksum, ...) outside of locks */ |
| if (validate) |
| skb_popd = validate_xmit_skb_list(skb_popd, dev); |
| |
| if (skb_popd) { |
| int dummy_ret; |
| int cpu = smp_processor_id(); /* ok because BHs are off */ |
| |
| txq = skb_get_tx_queue(dev, skb_popd); |
| /* |
| IMQ device will not be frozen or stoped, and it always be successful. |
| So we need not check its status and return value to accelerate. |
| */ |
| if (imq_dev_accurate_stats && txq->xmit_lock_owner != cpu) { |
| HARD_TX_LOCK(dev, txq, cpu); |
| if (!netif_xmit_frozen_or_stopped(txq)) { |
| dev_hard_start_xmit(skb_popd, dev, txq, &dummy_ret); |
| } else { |
| kfree_skb_list(skb_popd); |
| } |
| HARD_TX_UNLOCK(dev, txq); |
| } else { |
| if (!netif_xmit_frozen_or_stopped(txq)) { |
| dev_hard_start_xmit(skb_popd, dev, txq, &dummy_ret); |
| } else { |
| kfree_skb_list(skb_popd); |
| } |
| } |
| } |
| } else { |
| /* No ready skb, then schedule it */ |
| __netif_schedule(q); |
| } |
| |
| rcu_read_unlock_bh(); |
| retval = 0; |
| goto out; |
| } else { |
| skb_restore_cb(skb_shared); /* restore skb->cb */ |
| skb->nf_queue_entry = NULL; |
| /* |
| * qdisc dropped packet and decreased skb reference count of |
| * skb, so we don't really want to and try refree as that would |
| * actually destroy the skb. |
| */ |
| spin_unlock(root_lock); |
| goto packet_not_enqueue_to_imq_dev; |
| } |
| |
| packet_not_enqueue_to_imq_dev: |
| skb_set_queue_mapping(skb, orig_queue_index); |
| rcu_read_unlock_bh(); |
| |
| /* cloned? restore original */ |
| if (unlikely(skb_orig)) { |
| kfree_skb(skb); |
| entry->skb = skb_orig; |
| } |
| retval = -1; |
| out: |
| if (unlikely(to_free)) { |
| kfree_skb_list(to_free); |
| } |
| return retval; |
| } |
| static unsigned int imq_nf_hook(void *priv, |
| struct sk_buff *skb, |
| const struct nf_hook_state *state) |
| { |
| return (skb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT; |
| } |
| |
| static int imq_close(struct net_device *dev) |
| { |
| netif_stop_queue(dev); |
| return 0; |
| } |
| |
| static int imq_open(struct net_device *dev) |
| { |
| netif_start_queue(dev); |
| return 0; |
| } |
| |
| static struct device_type imq_device_type = { |
| .name = "imq", |
| }; |
| |
| static const struct net_device_ops imq_netdev_ops = { |
| .ndo_open = imq_open, |
| .ndo_stop = imq_close, |
| .ndo_start_xmit = imq_dev_xmit, |
| .ndo_get_stats = imq_get_stats, |
| }; |
| |
| static void imq_setup(struct net_device *dev) |
| { |
| dev->netdev_ops = &imq_netdev_ops; |
| dev->type = ARPHRD_VOID; |
| dev->mtu = 16000; /* too small? */ |
| dev->tx_queue_len = 1000; /* too big? */ |
| dev->flags = IFF_NOARP; |
| dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | |
| NETIF_F_GSO | NETIF_F_HW_CSUM | |
| NETIF_F_HIGHDMA; |
| dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | |
| IFF_TX_SKB_SHARING); |
| } |
| |
| static int imq_validate(struct nlattr *tb[], struct nlattr *data[], |
| struct netlink_ext_ack *extack) |
| { |
| int ret = 0; |
| |
| if (tb[IFLA_ADDRESS]) { |
| if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { |
| ret = -EINVAL; |
| goto end; |
| } |
| if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { |
| ret = -EADDRNOTAVAIL; |
| goto end; |
| } |
| } |
| return 0; |
| end: |
| pr_warn("IMQ: imq_validate failed (%d)\n", ret); |
| return ret; |
| } |
| |
| static struct rtnl_link_ops imq_link_ops __read_mostly = { |
| .kind = "imq", |
| .priv_size = 0, |
| .setup = imq_setup, |
| .validate = imq_validate, |
| }; |
| |
| static const struct nf_queue_handler imq_nfqh = { |
| .outfn = imq_nf_queue, |
| }; |
| |
| static int __net_init imq_nf_register(struct net *net) |
| { |
| return nf_register_net_hooks(net, imq_ops, |
| ARRAY_SIZE(imq_ops)); |
| }; |
| |
| static void __net_exit imq_nf_unregister(struct net *net) |
| { |
| nf_unregister_net_hooks(net, imq_ops, |
| ARRAY_SIZE(imq_ops)); |
| }; |
| |
| static struct pernet_operations imq_net_ops = { |
| .init = imq_nf_register, |
| .exit = imq_nf_unregister, |
| }; |
| |
| static int __net_init imq_init_hooks(void) |
| { |
| int ret; |
| nf_register_queue_imq_handler(&imq_nfqh); |
| |
| ret = register_pernet_subsys(&imq_net_ops); |
| if (ret < 0) |
| nf_unregister_queue_imq_handler(); |
| |
| return ret; |
| } |
| |
| #ifdef CONFIG_LOCKDEP |
| static struct lock_class_key imq_netdev_addr_lock_key; |
| |
| static void __init imq_dev_set_lockdep_one(struct net_device *dev, |
| struct netdev_queue *txq, void *arg) |
| { |
| /* |
| * the IMQ transmit locks can be taken recursively, |
| * for example with one IMQ rule for input- and one for |
| * output network devices in iptables! |
| * until we find a better solution ignore them. |
| */ |
| lockdep_set_novalidate_class(&txq->_xmit_lock); |
| } |
| |
| static void imq_dev_set_lockdep_class(struct net_device *dev) |
| { |
| lockdep_set_class_and_name(&dev->addr_list_lock, |
| &imq_netdev_addr_lock_key, "_xmit_addr_IMQ"); |
| netdev_for_each_tx_queue(dev, imq_dev_set_lockdep_one, NULL); |
| } |
| #else |
| static inline void imq_dev_set_lockdep_class(struct net_device *dev) |
| { |
| } |
| #endif |
| |
| static int __init imq_init_one(int index) |
| { |
| struct net_device *dev; |
| int ret; |
| |
| dev = alloc_netdev_mq(0, "imq%d", NET_NAME_UNKNOWN, imq_setup, numqueues); |
| if (!dev) |
| return -ENOMEM; |
| |
| ret = dev_alloc_name(dev, dev->name); |
| if (ret < 0) |
| goto fail; |
| |
| dev->rtnl_link_ops = &imq_link_ops; |
| SET_NETDEV_DEVTYPE(dev, &imq_device_type); |
| ret = register_netdevice(dev); |
| if (ret < 0) |
| goto fail; |
| |
| imq_dev_set_lockdep_class(dev); |
| |
| return 0; |
| fail: |
| free_netdev(dev); |
| return ret; |
| } |
| |
| static int __init imq_init_devs(void) |
| { |
| int err, i; |
| |
| if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) { |
| pr_err("IMQ: numdevs has to be betweed 1 and %u\n", |
| IMQ_MAX_DEVS); |
| return -EINVAL; |
| } |
| |
| if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) { |
| pr_err("IMQ: numqueues has to be betweed 1 and %u\n", |
| IMQ_MAX_QUEUES); |
| return -EINVAL; |
| } |
| |
| get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd)); |
| |
| rtnl_lock(); |
| err = __rtnl_link_register(&imq_link_ops); |
| |
| for (i = 0; i < numdevs && !err; i++) |
| err = imq_init_one(i); |
| |
| if (err) { |
| __rtnl_link_unregister(&imq_link_ops); |
| memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); |
| } |
| rtnl_unlock(); |
| |
| return err; |
| } |
| |
| static int __init imq_init_module(void) |
| { |
| int err; |
| |
| #if defined(CONFIG_IMQ_NUM_DEVS) |
| BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16); |
| BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2); |
| BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK); |
| #endif |
| |
| err = imq_init_devs(); |
| if (err) { |
| pr_err("IMQ: Error trying imq_init_devs(net)\n"); |
| return err; |
| } |
| |
| err = imq_init_hooks(); |
| if (err) { |
| pr_err(KERN_ERR "IMQ: Error trying imq_init_hooks()\n"); |
| rtnl_link_unregister(&imq_link_ops); |
| memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); |
| return err; |
| } |
| |
| pr_info("IMQ driver loaded successfully. (numdevs = %d, numqueues = %d, imq_dev_accurate_stats = %d)\n", |
| numdevs, numqueues, imq_dev_accurate_stats); |
| |
| return 0; |
| } |
| |
| static void __exit imq_unhook(void) |
| { |
| unregister_pernet_subsys(&imq_net_ops); |
| nf_unregister_queue_imq_handler(); |
| } |
| |
| static void __exit imq_cleanup_devs(void) |
| { |
| rtnl_link_unregister(&imq_link_ops); |
| memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); |
| } |
| |
| static void __exit imq_exit_module(void) |
| { |
| imq_unhook(); |
| imq_cleanup_devs(); |
| pr_info("IMQ driver unloaded successfully.\n"); |
| } |
| |
| module_init(imq_init_module); |
| module_exit(imq_exit_module); |
| |
| module_param(numdevs, int, 0); |
| module_param(numqueues, int, 0); |
| module_param(imq_dev_accurate_stats, int, 0); |
| MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will be created)"); |
| MODULE_PARM_DESC(numqueues, "number of queues per IMQ device"); |
| MODULE_PARM_DESC(imq_dev_accurate_stats, "Notify if need the accurate imq device stats"); |
| |
| MODULE_AUTHOR("https://github.com/imq/linuximq"); |
| MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See https://github.com/imq/linuximq/wiki for more information."); |
| MODULE_LICENSE("GPL"); |
| MODULE_ALIAS_RTNL_LINK("imq"); |
| |