Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless into for-davem
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index 9ecd43d..3fc3605 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -10,7 +10,7 @@
- max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than
the maximum frame size (there's contradiction in ePAPR).
- phy-mode: string, operation mode of the PHY interface; supported values are
- "mii", "gmii", "sgmii", "tbi", "rev-mii", "rmii", "rgmii", "rgmii-id",
+ "mii", "gmii", "sgmii", "qsgmii", "tbi", "rev-mii", "rmii", "rgmii", "rgmii-id",
"rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii"; this is now a de-facto
standard property;
- phy-connection-type: the same as "phy-mode" property but described in ePAPR;
diff --git a/Makefile b/Makefile
index e709613..60ccbfe 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 3
-PATCHLEVEL = 14
+PATCHLEVEL = 15
SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
NAME = Shuffling Zombie Juror
# *DOCUMENTATION*
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fcaf9c9..7de069af 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -60,7 +60,7 @@
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
+ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bea6067..f47a104 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -308,7 +308,7 @@
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
- F(ADX);
+ F(ADX) | F(SMAP);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a2a1bb7..eeecbed 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -48,6 +48,14 @@
return best && (best->ebx & bit(X86_FEATURE_SMEP));
}
+static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ebx & bit(X86_FEATURE_SMAP));
+}
+
static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f5704d9..813d310 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3601,20 +3601,27 @@
}
}
-static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept)
{
unsigned bit, byte, pfec;
u8 map;
- bool fault, x, w, u, wf, uf, ff, smep;
+ bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0;
- smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
map = 0;
wf = pfec & PFERR_WRITE_MASK;
uf = pfec & PFERR_USER_MASK;
ff = pfec & PFERR_FETCH_MASK;
+ /*
+ * PFERR_RSVD_MASK bit is set in PFEC if the access is not
+ * subject to SMAP restrictions, and cleared otherwise. The
+ * bit is only meaningful if the SMAP bit is set in CR4.
+ */
+ smapf = !(pfec & PFERR_RSVD_MASK);
for (bit = 0; bit < 8; ++bit) {
x = bit & ACC_EXEC_MASK;
w = bit & ACC_WRITE_MASK;
@@ -3626,12 +3633,33 @@
/* Allow supervisor writes if !cr0.wp */
w |= !is_write_protection(vcpu) && !uf;
/* Disallow supervisor fetches of user code if cr4.smep */
- x &= !(smep && u && !uf);
+ x &= !(cr4_smep && u && !uf);
+
+ /*
+ * SMAP:kernel-mode data accesses from user-mode
+ * mappings should fault. A fault is considered
+ * as a SMAP violation if all of the following
+ * conditions are ture:
+ * - X86_CR4_SMAP is set in CR4
+ * - An user page is accessed
+ * - Page fault in kernel mode
+ * - if CPL = 3 or X86_EFLAGS_AC is clear
+ *
+ * Here, we cover the first three conditions.
+ * The fourth is computed dynamically in
+ * permission_fault() and is in smapf.
+ *
+ * Also, SMAP does not affect instruction
+ * fetches, add the !ff check here to make it
+ * clearer.
+ */
+ smap = cr4_smap && u && !uf && !ff;
} else
/* Not really needed: no U/S accesses on ept */
u = 1;
- fault = (ff && !x) || (uf && !u) || (wf && !w);
+ fault = (ff && !x) || (uf && !u) || (wf && !w) ||
+ (smapf && smap);
map |= fault << bit;
}
mmu->permissions[byte] = map;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2926152..3842e70 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -44,11 +44,17 @@
#define PT_DIRECTORY_LEVEL 2
#define PT_PAGE_TABLE_LEVEL 1
-#define PFERR_PRESENT_MASK (1U << 0)
-#define PFERR_WRITE_MASK (1U << 1)
-#define PFERR_USER_MASK (1U << 2)
-#define PFERR_RSVD_MASK (1U << 3)
-#define PFERR_FETCH_MASK (1U << 4)
+#define PFERR_PRESENT_BIT 0
+#define PFERR_WRITE_BIT 1
+#define PFERR_USER_BIT 2
+#define PFERR_RSVD_BIT 3
+#define PFERR_FETCH_BIT 4
+
+#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
+#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
+#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
@@ -73,6 +79,8 @@
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
bool execonly);
+void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ bool ept);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
@@ -110,10 +118,30 @@
* Will a fault with a given page-fault error code (pfec) cause a permission
* fault with the given access (in ACC_* format)?
*/
-static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
- unsigned pfec)
+static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ unsigned pte_access, unsigned pfec)
{
- return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
+ int cpl = kvm_x86_ops->get_cpl(vcpu);
+ unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+
+ /*
+ * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
+ *
+ * If CPL = 3, SMAP applies to all supervisor-mode data accesses
+ * (these are implicit supervisor accesses) regardless of the value
+ * of EFLAGS.AC.
+ *
+ * This computes (cpl < 3) && (rflags & X86_EFLAGS_AC), leaving
+ * the result in X86_EFLAGS_AC. We then insert it in place of
+ * the PFERR_RSVD_MASK bit; this bit will always be zero in pfec,
+ * but it will be one in index if SMAP checks are being overridden.
+ * It is important to keep this branchless.
+ */
+ unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
+ int index = (pfec >> 1) +
+ (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
+
+ return (mmu->permissions[index] >> pte_access) & 1;
}
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index b1e6c1b..123efd3 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -353,7 +353,7 @@
walker->ptes[walker->level - 1] = pte;
} while (!is_last_gpte(mmu, walker->level, pte));
- if (unlikely(permission_fault(mmu, pte_access, access))) {
+ if (unlikely(permission_fault(vcpu, mmu, pte_access, access))) {
errcode |= PFERR_PRESENT_MASK;
goto error;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1320e0f..1f68c58 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3484,13 +3484,14 @@
hw_cr4 &= ~X86_CR4_PAE;
hw_cr4 |= X86_CR4_PSE;
/*
- * SMEP is disabled if CPU is in non-paging mode in
- * hardware. However KVM always uses paging mode to
+ * SMEP/SMAP is disabled if CPU is in non-paging mode
+ * in hardware. However KVM always uses paging mode to
* emulate guest non-paging mode with TDP.
- * To emulate this behavior, SMEP needs to be manually
- * disabled when guest switches to non-paging mode.
+ * To emulate this behavior, SMEP/SMAP needs to be
+ * manually disabled when guest switches to non-paging
+ * mode.
*/
- hw_cr4 &= ~X86_CR4_SMEP;
+ hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
} else if (!(cr4 & X86_CR4_PAE)) {
hw_cr4 &= ~X86_CR4_PAE;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9d1b5cd..8b8fc0b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -652,6 +652,9 @@
if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
return 1;
+ if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
+ return 1;
+
if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
return 1;
@@ -680,6 +683,9 @@
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
+ if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
+ update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
+
if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
kvm_update_cpuid(vcpu);
@@ -1117,7 +1123,6 @@
{
struct timespec ts;
- WARN_ON(preemptible());
ktime_get_ts(&ts);
monotonic_to_bootbased(&ts);
return timespec_to_ns(&ts);
@@ -4164,7 +4169,8 @@
| (write ? PFERR_WRITE_MASK : 0);
if (vcpu_match_mmio_gva(vcpu, gva)
- && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
+ && !permission_fault(vcpu, vcpu->arch.walk_mmu,
+ vcpu->arch.access, access)) {
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
(gva & (PAGE_SIZE - 1));
trace_vcpu_match_mmio(gva, *gpa, write, false);
diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c
index 8c3b255..e900961 100644
--- a/drivers/char/hw_random/bcm2835-rng.c
+++ b/drivers/char/hw_random/bcm2835-rng.c
@@ -61,18 +61,18 @@
}
bcm2835_rng_ops.priv = (unsigned long)rng_base;
+ /* set warm-up count & enable */
+ __raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS);
+ __raw_writel(RNG_RBGEN, rng_base + RNG_CTRL);
+
/* register driver */
err = hwrng_register(&bcm2835_rng_ops);
if (err) {
dev_err(dev, "hwrng registration failed\n");
iounmap(rng_base);
- } else {
+ } else
dev_info(dev, "hwrng registered\n");
- /* set warm-up count & enable */
- __raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS);
- __raw_writel(RNG_RBGEN, rng_base + RNG_CTRL);
- }
return err;
}
diff --git a/drivers/isdn/icn/icn.c b/drivers/isdn/icn/icn.c
index 53d487f..6a7447c 100644
--- a/drivers/isdn/icn/icn.c
+++ b/drivers/isdn/icn/icn.c
@@ -1155,7 +1155,7 @@
ulong a;
ulong flags;
int i;
- char cbuf[60];
+ char cbuf[80];
isdn_ctrl cmd;
icn_cdef cdef;
char __user *arg;
@@ -1309,7 +1309,6 @@
break;
if ((c->arg & 255) < ICN_BCH) {
char *p;
- char dial[50];
char dcode[4];
a = c->arg;
@@ -1321,10 +1320,10 @@
} else
/* Normal Dial */
strcpy(dcode, "CAL");
- strcpy(dial, p);
- sprintf(cbuf, "%02d;D%s_R%s,%02d,%02d,%s\n", (int) (a + 1),
- dcode, dial, c->parm.setup.si1,
- c->parm.setup.si2, c->parm.setup.eazmsn);
+ snprintf(cbuf, sizeof(cbuf),
+ "%02d;D%s_R%s,%02d,%02d,%s\n", (int) (a + 1),
+ dcode, p, c->parm.setup.si1,
+ c->parm.setup.si2, c->parm.setup.eazmsn);
i = icn_writecmd(cbuf, strlen(cbuf), 0, card);
}
break;
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index a8efb18..0ab8370 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -8627,6 +8627,7 @@
pci_disable_device(pdev);
}
+#ifdef CONFIG_PM_SLEEP
static int
bnx2_suspend(struct device *device)
{
@@ -8665,7 +8666,6 @@
return 0;
}
-#ifdef CONFIG_PM_SLEEP
static SIMPLE_DEV_PM_OPS(bnx2_pm_ops, bnx2_suspend, bnx2_resume);
#define BNX2_PM_OPS (&bnx2_pm_ops)
diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
index 751d5c7..7e49c43 100644
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig
@@ -4,7 +4,7 @@
config NET_CADENCE
bool "Cadence devices"
- depends on HAS_IOMEM
+ depends on HAS_IOMEM && (ARM || AVR32 || COMPILE_TEST)
default y
---help---
If you have a network (Ethernet) card belonging to this class, say Y.
@@ -22,7 +22,7 @@
config ARM_AT91_ETHER
tristate "AT91RM9200 Ethernet support"
- depends on HAS_DMA
+ depends on HAS_DMA && (ARCH_AT91RM9200 || COMPILE_TEST)
select MACB
---help---
If you wish to compile a kernel for the AT91RM9200 and enable
@@ -30,7 +30,7 @@
config MACB
tristate "Cadence MACB/GEM support"
- depends on HAS_DMA
+ depends on HAS_DMA && (PLATFORM_AT32AP || ARCH_AT91 || ARCH_PICOXCELL || ARCH_ZYNQ || COMPILE_TEST)
select PHYLIB
---help---
The Cadence MACB ethernet interface is found on many Atmel AT32 and
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index 81e8402..8a96572 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -154,7 +154,7 @@
req->params = htons(L2T_W_PORT(e->lport) | L2T_W_NOREPLY(!sync));
req->l2t_idx = htons(e->idx);
req->vlan = htons(e->vlan);
- if (e->neigh)
+ if (e->neigh && !(e->neigh->dev->flags & IFF_LOOPBACK))
memcpy(e->dmac, e->neigh->ha, sizeof(e->dmac));
memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
@@ -394,6 +394,8 @@
if (e) {
spin_lock(&e->lock); /* avoid race with t4_l2t_free */
e->state = L2T_STATE_RESOLVING;
+ if (neigh->dev->flags & IFF_LOOPBACK)
+ memcpy(e->dmac, physdev->dev_addr, sizeof(e->dmac));
memcpy(e->addr, addr, addr_len);
e->ifindex = ifidx;
e->hash = hash;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index fb2fe65..bba6768 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -682,7 +682,7 @@
SF_RD_ID = 0x9f, /* read ID */
SF_ERASE_SECTOR = 0xd8, /* erase sector */
- FW_MAX_SIZE = 512 * 1024,
+ FW_MAX_SIZE = 16 * SF_SEC_SIZE,
};
/**
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 8ccaa25..97db5a7 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -374,6 +374,7 @@
#define BE_FLAGS_NAPI_ENABLED (1 << 9)
#define BE_FLAGS_QNQ_ASYNC_EVT_RCVD (1 << 11)
#define BE_FLAGS_VXLAN_OFFLOADS (1 << 12)
+#define BE_FLAGS_SETUP_DONE (1 << 13)
#define BE_UC_PMAC_COUNT 30
#define BE_VF_UC_PMAC_COUNT 2
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 3e6df47..a186454 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2033,11 +2033,13 @@
bool dummy_wrb;
int i, pending_txqs;
- /* Wait for a max of 200ms for all the tx-completions to arrive. */
+ /* Stop polling for compls when HW has been silent for 10ms */
do {
pending_txqs = adapter->num_tx_qs;
for_all_tx_queues(adapter, txo, i) {
+ cmpl = 0;
+ num_wrbs = 0;
txq = &txo->q;
while ((txcp = be_tx_compl_get(&txo->cq))) {
end_idx =
@@ -2050,14 +2052,13 @@
if (cmpl) {
be_cq_notify(adapter, txo->cq.id, false, cmpl);
atomic_sub(num_wrbs, &txq->used);
- cmpl = 0;
- num_wrbs = 0;
+ timeo = 0;
}
if (atomic_read(&txq->used) == 0)
pending_txqs--;
}
- if (pending_txqs == 0 || ++timeo > 200)
+ if (pending_txqs == 0 || ++timeo > 10 || be_hw_error(adapter))
break;
mdelay(1);
@@ -2725,6 +2726,12 @@
struct be_eq_obj *eqo;
int i;
+ /* This protection is needed as be_close() may be called even when the
+ * adapter is in cleared state (after eeh perm failure)
+ */
+ if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
+ return 0;
+
be_roce_dev_close(adapter);
if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
@@ -3055,6 +3062,7 @@
be_clear_queues(adapter);
be_msix_disable(adapter);
+ adapter->flags &= ~BE_FLAGS_SETUP_DONE;
return 0;
}
@@ -3559,6 +3567,7 @@
adapter->phy.fc_autoneg = 1;
be_schedule_worker(adapter);
+ adapter->flags |= BE_FLAGS_SETUP_DONE;
return 0;
err:
be_clear(adapter);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d04b1c3..14786c8 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -91,7 +91,7 @@
#define MVNETA_RX_MIN_FRAME_SIZE 0x247c
#define MVNETA_SERDES_CFG 0x24A0
#define MVNETA_SGMII_SERDES_PROTO 0x0cc7
-#define MVNETA_RGMII_SERDES_PROTO 0x0667
+#define MVNETA_QSGMII_SERDES_PROTO 0x0667
#define MVNETA_TYPE_PRIO 0x24bc
#define MVNETA_FORCE_UNI BIT(21)
#define MVNETA_TXQ_CMD_1 0x24e4
@@ -2721,29 +2721,44 @@
}
/* Power up the port */
-static void mvneta_port_power_up(struct mvneta_port *pp, int phy_mode)
+static int mvneta_port_power_up(struct mvneta_port *pp, int phy_mode)
{
- u32 val;
+ u32 ctrl;
/* MAC Cause register should be cleared */
mvreg_write(pp, MVNETA_UNIT_INTR_CAUSE, 0);
- if (phy_mode == PHY_INTERFACE_MODE_SGMII)
+ ctrl = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
+
+ /* Even though it might look weird, when we're configured in
+ * SGMII or QSGMII mode, the RGMII bit needs to be set.
+ */
+ switch(phy_mode) {
+ case PHY_INTERFACE_MODE_QSGMII:
+ mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_QSGMII_SERDES_PROTO);
+ ctrl |= MVNETA_GMAC2_PCS_ENABLE | MVNETA_GMAC2_PORT_RGMII;
+ break;
+ case PHY_INTERFACE_MODE_SGMII:
mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO);
- else
- mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_RGMII_SERDES_PROTO);
-
- val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
-
- val |= MVNETA_GMAC2_PCS_ENABLE | MVNETA_GMAC2_PORT_RGMII;
+ ctrl |= MVNETA_GMAC2_PCS_ENABLE | MVNETA_GMAC2_PORT_RGMII;
+ break;
+ case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ ctrl |= MVNETA_GMAC2_PORT_RGMII;
+ break;
+ default:
+ return -EINVAL;
+ }
/* Cancel Port Reset */
- val &= ~MVNETA_GMAC2_PORT_RESET;
- mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+ ctrl &= ~MVNETA_GMAC2_PORT_RESET;
+ mvreg_write(pp, MVNETA_GMAC_CTRL_2, ctrl);
while ((mvreg_read(pp, MVNETA_GMAC_CTRL_2) &
MVNETA_GMAC2_PORT_RESET) != 0)
continue;
+
+ return 0;
}
/* Device initialization routine */
@@ -2854,7 +2869,12 @@
dev_err(&pdev->dev, "can't init eth hal\n");
goto err_free_stats;
}
- mvneta_port_power_up(pp, phy_mode);
+
+ err = mvneta_port_power_up(pp, phy_mode);
+ if (err < 0) {
+ dev_err(&pdev->dev, "can't power up port\n");
+ goto err_deinit;
+ }
dram_target_info = mv_mbus_dram_info();
if (dram_target_info)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 70e9532..c2cd8d3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -66,7 +66,6 @@
cq->ring = ring;
cq->is_tx = mode;
- spin_lock_init(&cq->lock);
/* Allocate HW buffers on provided NUMA node.
* dev->numa_node is used in mtt range allocation flow.
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index f085c2d..7e4b172 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1304,15 +1304,11 @@
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_cq *cq;
- unsigned long flags;
int i;
for (i = 0; i < priv->rx_ring_num; i++) {
cq = priv->rx_cq[i];
- spin_lock_irqsave(&cq->lock, flags);
- napi_synchronize(&cq->napi);
- mlx4_en_process_rx_cq(dev, cq, 0);
- spin_unlock_irqrestore(&cq->lock, flags);
+ napi_schedule(&cq->napi);
}
}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index f0ae95f..cef267e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2301,13 +2301,8 @@
/* Allow large DMA segments, up to the firmware limit of 1 GB */
dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (!priv) {
- err = -ENOMEM;
- goto err_release_regions;
- }
-
- dev = &priv->dev;
+ dev = pci_get_drvdata(pdev);
+ priv = mlx4_priv(dev);
dev->pdev = pdev;
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
@@ -2374,10 +2369,10 @@
} else {
atomic_inc(&pf_loading);
err = pci_enable_sriov(pdev, total_vfs);
- atomic_dec(&pf_loading);
if (err) {
mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n",
err);
+ atomic_dec(&pf_loading);
err = 0;
} else {
mlx4_warn(dev, "Running in master mode\n");
@@ -2535,8 +2530,10 @@
mlx4_sense_init(dev);
mlx4_start_sense(dev);
- priv->pci_dev_data = pci_dev_data;
- pci_set_drvdata(pdev, dev);
+ priv->removed = 0;
+
+ if (mlx4_is_master(dev) && dev->num_vfs)
+ atomic_dec(&pf_loading);
return 0;
@@ -2588,6 +2585,9 @@
if (!mlx4_is_slave(dev))
mlx4_free_ownership(dev);
+ if (mlx4_is_master(dev) && dev->num_vfs)
+ atomic_dec(&pf_loading);
+
kfree(priv->dev.dev_vfs);
err_free_dev:
@@ -2604,85 +2604,110 @@
static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
{
+ struct mlx4_priv *priv;
+ struct mlx4_dev *dev;
+
printk_once(KERN_INFO "%s", mlx4_version);
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ dev = &priv->dev;
+ pci_set_drvdata(pdev, dev);
+ priv->pci_dev_data = id->driver_data;
+
return __mlx4_init_one(pdev, id->driver_data);
}
+static void __mlx4_remove_one(struct pci_dev *pdev)
+{
+ struct mlx4_dev *dev = pci_get_drvdata(pdev);
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int pci_dev_data;
+ int p;
+
+ if (priv->removed)
+ return;
+
+ pci_dev_data = priv->pci_dev_data;
+
+ /* in SRIOV it is not allowed to unload the pf's
+ * driver while there are alive vf's */
+ if (mlx4_is_master(dev) && mlx4_how_many_lives_vf(dev))
+ printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
+ mlx4_stop_sense(dev);
+ mlx4_unregister_device(dev);
+
+ for (p = 1; p <= dev->caps.num_ports; p++) {
+ mlx4_cleanup_port_info(&priv->port[p]);
+ mlx4_CLOSE_PORT(dev, p);
+ }
+
+ if (mlx4_is_master(dev))
+ mlx4_free_resource_tracker(dev,
+ RES_TR_FREE_SLAVES_ONLY);
+
+ mlx4_cleanup_counters_table(dev);
+ mlx4_cleanup_qp_table(dev);
+ mlx4_cleanup_srq_table(dev);
+ mlx4_cleanup_cq_table(dev);
+ mlx4_cmd_use_polling(dev);
+ mlx4_cleanup_eq_table(dev);
+ mlx4_cleanup_mcg_table(dev);
+ mlx4_cleanup_mr_table(dev);
+ mlx4_cleanup_xrcd_table(dev);
+ mlx4_cleanup_pd_table(dev);
+
+ if (mlx4_is_master(dev))
+ mlx4_free_resource_tracker(dev,
+ RES_TR_FREE_STRUCTS_ONLY);
+
+ iounmap(priv->kar);
+ mlx4_uar_free(dev, &priv->driver_uar);
+ mlx4_cleanup_uar_table(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_clear_steering(dev);
+ mlx4_free_eq_table(dev);
+ if (mlx4_is_master(dev))
+ mlx4_multi_func_cleanup(dev);
+ mlx4_close_hca(dev);
+ if (mlx4_is_slave(dev))
+ mlx4_multi_func_cleanup(dev);
+ mlx4_cmd_cleanup(dev);
+
+ if (dev->flags & MLX4_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+ if (dev->flags & MLX4_FLAG_SRIOV) {
+ mlx4_warn(dev, "Disabling SR-IOV\n");
+ pci_disable_sriov(pdev);
+ dev->num_vfs = 0;
+ }
+
+ if (!mlx4_is_slave(dev))
+ mlx4_free_ownership(dev);
+
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ kfree(dev->dev_vfs);
+
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ memset(priv, 0, sizeof(*priv));
+ priv->pci_dev_data = pci_dev_data;
+ priv->removed = 1;
+}
+
static void mlx4_remove_one(struct pci_dev *pdev)
{
struct mlx4_dev *dev = pci_get_drvdata(pdev);
struct mlx4_priv *priv = mlx4_priv(dev);
- int p;
- if (dev) {
- /* in SRIOV it is not allowed to unload the pf's
- * driver while there are alive vf's */
- if (mlx4_is_master(dev)) {
- if (mlx4_how_many_lives_vf(dev))
- printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
- }
- mlx4_stop_sense(dev);
- mlx4_unregister_device(dev);
-
- for (p = 1; p <= dev->caps.num_ports; p++) {
- mlx4_cleanup_port_info(&priv->port[p]);
- mlx4_CLOSE_PORT(dev, p);
- }
-
- if (mlx4_is_master(dev))
- mlx4_free_resource_tracker(dev,
- RES_TR_FREE_SLAVES_ONLY);
-
- mlx4_cleanup_counters_table(dev);
- mlx4_cleanup_qp_table(dev);
- mlx4_cleanup_srq_table(dev);
- mlx4_cleanup_cq_table(dev);
- mlx4_cmd_use_polling(dev);
- mlx4_cleanup_eq_table(dev);
- mlx4_cleanup_mcg_table(dev);
- mlx4_cleanup_mr_table(dev);
- mlx4_cleanup_xrcd_table(dev);
- mlx4_cleanup_pd_table(dev);
-
- if (mlx4_is_master(dev))
- mlx4_free_resource_tracker(dev,
- RES_TR_FREE_STRUCTS_ONLY);
-
- iounmap(priv->kar);
- mlx4_uar_free(dev, &priv->driver_uar);
- mlx4_cleanup_uar_table(dev);
- if (!mlx4_is_slave(dev))
- mlx4_clear_steering(dev);
- mlx4_free_eq_table(dev);
- if (mlx4_is_master(dev))
- mlx4_multi_func_cleanup(dev);
- mlx4_close_hca(dev);
- if (mlx4_is_slave(dev))
- mlx4_multi_func_cleanup(dev);
- mlx4_cmd_cleanup(dev);
-
- if (dev->flags & MLX4_FLAG_MSI_X)
- pci_disable_msix(pdev);
- if (dev->flags & MLX4_FLAG_SRIOV) {
- mlx4_warn(dev, "Disabling SR-IOV\n");
- pci_disable_sriov(pdev);
- }
-
- if (!mlx4_is_slave(dev))
- mlx4_free_ownership(dev);
-
- kfree(dev->caps.qp0_tunnel);
- kfree(dev->caps.qp0_proxy);
- kfree(dev->caps.qp1_tunnel);
- kfree(dev->caps.qp1_proxy);
- kfree(dev->dev_vfs);
-
- kfree(priv);
- pci_release_regions(pdev);
- pci_disable_device(pdev);
- pci_set_drvdata(pdev, NULL);
- }
+ __mlx4_remove_one(pdev);
+ kfree(priv);
+ pci_set_drvdata(pdev, NULL);
}
int mlx4_restart_one(struct pci_dev *pdev)
@@ -2692,7 +2717,7 @@
int pci_dev_data;
pci_dev_data = priv->pci_dev_data;
- mlx4_remove_one(pdev);
+ __mlx4_remove_one(pdev);
return __mlx4_init_one(pdev, pci_dev_data);
}
@@ -2747,7 +2772,7 @@
static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
- mlx4_remove_one(pdev);
+ __mlx4_remove_one(pdev);
return state == pci_channel_io_perm_failure ?
PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
@@ -2755,11 +2780,11 @@
static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
{
- const struct pci_device_id *id;
- int ret;
+ struct mlx4_dev *dev = pci_get_drvdata(pdev);
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int ret;
- id = pci_match_id(mlx4_pci_table, pdev);
- ret = __mlx4_init_one(pdev, id->driver_data);
+ ret = __mlx4_init_one(pdev, priv->pci_dev_data);
return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index cf8be41..f9c4651 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -800,6 +800,7 @@
spinlock_t ctx_lock;
int pci_dev_data;
+ int removed;
struct list_head pgdir_list;
struct mutex pgdir_mutex;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 7a733c2..04d9b6fe 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -319,7 +319,6 @@
struct mlx4_cq mcq;
struct mlx4_hwq_resources wqres;
int ring;
- spinlock_t lock;
struct net_device *dev;
struct napi_struct napi;
int size;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index b48737d..ba20c72 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
@@ -2139,8 +2139,6 @@
ahw->max_mac_filters = nic_info.max_mac_filters;
ahw->max_mtu = nic_info.max_mtu;
- adapter->max_tx_rings = ahw->max_tx_ques;
- adapter->max_sds_rings = ahw->max_rx_ques;
/* eSwitch capability indicates vNIC mode.
* vNIC and SRIOV are mutually exclusive operational modes.
* If SR-IOV capability is detected, SR-IOV physical function
@@ -2161,6 +2159,7 @@
int qlcnic_83xx_configure_opmode(struct qlcnic_adapter *adapter)
{
struct qlcnic_hardware_context *ahw = adapter->ahw;
+ u16 max_sds_rings, max_tx_rings;
int ret;
ret = qlcnic_83xx_get_nic_configuration(adapter);
@@ -2173,18 +2172,21 @@
if (qlcnic_83xx_config_vnic_opmode(adapter))
return -EIO;
- adapter->max_sds_rings = QLCNIC_MAX_VNIC_SDS_RINGS;
- adapter->max_tx_rings = QLCNIC_MAX_VNIC_TX_RINGS;
+ max_sds_rings = QLCNIC_MAX_VNIC_SDS_RINGS;
+ max_tx_rings = QLCNIC_MAX_VNIC_TX_RINGS;
} else if (ret == QLC_83XX_DEFAULT_OPMODE) {
ahw->nic_mode = QLCNIC_DEFAULT_MODE;
adapter->nic_ops->init_driver = qlcnic_83xx_init_default_driver;
ahw->idc.state_entry = qlcnic_83xx_idc_ready_state_entry;
- adapter->max_sds_rings = QLCNIC_MAX_SDS_RINGS;
- adapter->max_tx_rings = QLCNIC_MAX_TX_RINGS;
+ max_sds_rings = QLCNIC_MAX_SDS_RINGS;
+ max_tx_rings = QLCNIC_MAX_TX_RINGS;
} else {
return -EIO;
}
+ adapter->max_sds_rings = min(ahw->max_rx_ques, max_sds_rings);
+ adapter->max_tx_rings = min(ahw->max_tx_ques, max_tx_rings);
+
return 0;
}
@@ -2348,15 +2350,16 @@
goto disable_intr;
}
+ INIT_DELAYED_WORK(&adapter->idc_aen_work, qlcnic_83xx_idc_aen_work);
+
err = qlcnic_83xx_setup_mbx_intr(adapter);
if (err)
goto disable_mbx_intr;
qlcnic_83xx_clear_function_resources(adapter);
-
- INIT_DELAYED_WORK(&adapter->idc_aen_work, qlcnic_83xx_idc_aen_work);
-
+ qlcnic_dcb_enable(adapter->dcb);
qlcnic_83xx_initialize_nic(adapter, 1);
+ qlcnic_dcb_get_info(adapter->dcb);
/* Configure default, SR-IOV or Virtual NIC mode of operation */
err = qlcnic_83xx_configure_opmode(adapter);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index 64dcbf3..c1e11f5 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -883,8 +883,6 @@
npar_info->max_rx_ques = le16_to_cpu(nic_info->max_rx_ques);
npar_info->capabilities = le32_to_cpu(nic_info->capabilities);
npar_info->max_mtu = le16_to_cpu(nic_info->max_mtu);
- adapter->max_tx_rings = npar_info->max_tx_ques;
- adapter->max_sds_rings = npar_info->max_rx_ques;
}
qlcnic_free_mbx_args(&cmd);
@@ -1356,6 +1354,7 @@
arg2 &= ~BIT_3;
break;
case QLCNIC_ADD_VLAN:
+ arg1 &= ~(0x0ffff << 16);
arg1 |= (BIT_2 | BIT_5);
arg1 |= (esw_cfg->vlan_id << 16);
break;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c
index 7d4f549..a51fe18 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c
@@ -330,8 +330,6 @@
goto out_free_cfg;
}
- qlcnic_dcb_get_info(dcb);
-
return 0;
out_free_cfg:
kfree(dcb->cfg);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 309d056..dbf7539 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -670,7 +670,7 @@
else
num_msix += adapter->drv_tx_rings;
- if (adapter->drv_rss_rings > 0)
+ if (adapter->drv_rss_rings > 0)
num_msix += adapter->drv_rss_rings;
else
num_msix += adapter->drv_sds_rings;
@@ -686,19 +686,15 @@
return -ENOMEM;
}
-restore:
for (vector = 0; vector < num_msix; vector++)
adapter->msix_entries[vector].entry = vector;
+restore:
err = pci_enable_msix(pdev, adapter->msix_entries, num_msix);
- if (err == 0) {
- adapter->ahw->num_msix = num_msix;
- if (adapter->drv_tss_rings > 0)
- adapter->drv_tx_rings = adapter->drv_tss_rings;
+ if (err > 0) {
+ if (!adapter->drv_tss_rings && !adapter->drv_rss_rings)
+ return -ENOSPC;
- if (adapter->drv_rss_rings > 0)
- adapter->drv_sds_rings = adapter->drv_rss_rings;
- } else {
netdev_info(adapter->netdev,
"Unable to allocate %d MSI-X vectors, Available vectors %d\n",
num_msix, err);
@@ -716,12 +712,20 @@
"Restoring %d Tx, %d SDS rings for total %d vectors.\n",
adapter->drv_tx_rings, adapter->drv_sds_rings,
num_msix);
- goto restore;
- err = -EIO;
+ goto restore;
+ } else if (err < 0) {
+ return err;
}
- return err;
+ adapter->ahw->num_msix = num_msix;
+ if (adapter->drv_tss_rings > 0)
+ adapter->drv_tx_rings = adapter->drv_tss_rings;
+
+ if (adapter->drv_rss_rings > 0)
+ adapter->drv_sds_rings = adapter->drv_rss_rings;
+
+ return 0;
}
int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix)
@@ -2528,8 +2532,6 @@
goto err_out_free_hw;
}
- qlcnic_dcb_enable(adapter->dcb);
-
if (qlcnic_read_mac_addr(adapter))
dev_warn(&pdev->dev, "failed to read mac addr\n");
@@ -2549,7 +2551,10 @@
"Device does not support MSI interrupts\n");
if (qlcnic_82xx_check(adapter)) {
+ qlcnic_dcb_enable(adapter->dcb);
+ qlcnic_dcb_get_info(adapter->dcb);
err = qlcnic_setup_intr(adapter);
+
if (err) {
dev_err(&pdev->dev, "Failed to setup interrupt\n");
goto err_out_disable_msi;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index 14f748c..2801379 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
@@ -461,6 +461,16 @@
{
struct net_device *netdev = adapter->netdev;
+ if (pci_vfs_assigned(adapter->pdev)) {
+ netdev_err(adapter->netdev,
+ "SR-IOV VFs belonging to port %d are assigned to VMs. SR-IOV can not be disabled on this port\n",
+ adapter->portnum);
+ netdev_info(adapter->netdev,
+ "Please detach SR-IOV VFs belonging to port %d from VMs, and then try to disable SR-IOV on this port\n",
+ adapter->portnum);
+ return -EPERM;
+ }
+
rtnl_lock();
if (netif_running(netdev))
__qlcnic_down(adapter, netdev);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
index 448d156..cd346e2 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
@@ -354,7 +354,7 @@
{
int i;
- for (i = 0; i < adapter->ahw->max_vnic_func; i++) {
+ for (i = 0; i < adapter->ahw->total_nic_func; i++) {
if (adapter->npars[i].pci_func == pci_func)
return i;
}
@@ -720,6 +720,7 @@
struct qlcnic_adapter *adapter = dev_get_drvdata(dev);
struct qlcnic_npar_func_cfg *np_cfg;
struct qlcnic_info nic_info;
+ u8 pci_func;
int i, ret;
u32 count;
@@ -729,26 +730,28 @@
count = size / sizeof(struct qlcnic_npar_func_cfg);
for (i = 0; i < adapter->ahw->total_nic_func; i++) {
- if (qlcnic_is_valid_nic_func(adapter, i) < 0)
- continue;
if (adapter->npars[i].pci_func >= count) {
dev_dbg(dev, "%s: Total nic functions[%d], App sent function count[%d]\n",
__func__, adapter->ahw->total_nic_func, count);
continue;
}
- ret = qlcnic_get_nic_info(adapter, &nic_info, i);
- if (ret)
- return ret;
if (!adapter->npars[i].eswitch_status)
continue;
- np_cfg[i].pci_func = i;
- np_cfg[i].op_mode = (u8)nic_info.op_mode;
- np_cfg[i].port_num = nic_info.phys_port;
- np_cfg[i].fw_capab = nic_info.capabilities;
- np_cfg[i].min_bw = nic_info.min_tx_bw;
- np_cfg[i].max_bw = nic_info.max_tx_bw;
- np_cfg[i].max_tx_queues = nic_info.max_tx_ques;
- np_cfg[i].max_rx_queues = nic_info.max_rx_ques;
+ pci_func = adapter->npars[i].pci_func;
+ if (qlcnic_is_valid_nic_func(adapter, pci_func) < 0)
+ continue;
+ ret = qlcnic_get_nic_info(adapter, &nic_info, pci_func);
+ if (ret)
+ return ret;
+
+ np_cfg[pci_func].pci_func = pci_func;
+ np_cfg[pci_func].op_mode = (u8)nic_info.op_mode;
+ np_cfg[pci_func].port_num = nic_info.phys_port;
+ np_cfg[pci_func].fw_capab = nic_info.capabilities;
+ np_cfg[pci_func].min_bw = nic_info.min_tx_bw;
+ np_cfg[pci_func].max_bw = nic_info.max_tx_bw;
+ np_cfg[pci_func].max_tx_queues = nic_info.max_tx_ques;
+ np_cfg[pci_func].max_rx_queues = nic_info.max_rx_ques;
}
return size;
}
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 21c20ea0..b5ed30a 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -738,8 +738,11 @@
/* If it was a port reset, trigger reallocation of MC resources.
* Note that on an MC reset nothing needs to be done now because we'll
* detect the MC reset later and handle it then.
+ * For an FLR, we never get an MC reset event, but the MC has reset all
+ * resources assigned to us, so we have to trigger reallocation now.
*/
- if (reset_type == RESET_TYPE_ALL && !rc)
+ if ((reset_type == RESET_TYPE_ALL ||
+ reset_type == RESET_TYPE_MCDI_TIMEOUT) && !rc)
efx_ef10_reset_mc_allocations(efx);
return rc;
}
@@ -2141,6 +2144,11 @@
return 0;
}
+static void efx_ef10_prepare_flr(struct efx_nic *efx)
+{
+ atomic_set(&efx->active_queues, 0);
+}
+
static bool efx_ef10_filter_equal(const struct efx_filter_spec *left,
const struct efx_filter_spec *right)
{
@@ -3603,6 +3611,8 @@
.probe_port = efx_mcdi_port_probe,
.remove_port = efx_mcdi_port_remove,
.fini_dmaq = efx_ef10_fini_dmaq,
+ .prepare_flr = efx_ef10_prepare_flr,
+ .finish_flr = efx_port_dummy_op_void,
.describe_stats = efx_ef10_describe_stats,
.update_stats = efx_ef10_update_stats,
.start_stats = efx_mcdi_mac_start_stats,
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 57b971e..63d595f 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -76,6 +76,7 @@
[RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL",
[RESET_TYPE_WORLD] = "WORLD",
[RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
+ [RESET_TYPE_MC_BIST] = "MC_BIST",
[RESET_TYPE_DISABLE] = "DISABLE",
[RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG",
[RESET_TYPE_INT_ERROR] = "INT_ERROR",
@@ -83,7 +84,7 @@
[RESET_TYPE_DMA_ERROR] = "DMA_ERROR",
[RESET_TYPE_TX_SKIP] = "TX_SKIP",
[RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
- [RESET_TYPE_MC_BIST] = "MC_BIST",
+ [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)",
};
/* Reset workqueue. If any NIC has a hardware failure then a reset will be
@@ -1739,7 +1740,8 @@
/* Check that it is appropriate to restart the interface. All
* of these flags are safe to read under just the rtnl lock */
- if (efx->port_enabled || !netif_running(efx->net_dev))
+ if (efx->port_enabled || !netif_running(efx->net_dev) ||
+ efx->reset_pending)
return;
efx_start_port(efx);
@@ -2334,6 +2336,9 @@
{
EFX_ASSERT_RESET_SERIALISED(efx);
+ if (method == RESET_TYPE_MCDI_TIMEOUT)
+ efx->type->prepare_flr(efx);
+
efx_stop_all(efx);
efx_disable_interrupts(efx);
@@ -2354,6 +2359,10 @@
EFX_ASSERT_RESET_SERIALISED(efx);
+ if (method == RESET_TYPE_MCDI_TIMEOUT)
+ efx->type->finish_flr(efx);
+
+ /* Ensure that SRAM is initialised even if we're disabling the device */
rc = efx->type->init(efx);
if (rc) {
netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
@@ -2417,7 +2426,10 @@
/* Clear flags for the scopes we covered. We assume the NIC and
* driver are now quiescent so that there is no race here.
*/
- efx->reset_pending &= -(1 << (method + 1));
+ if (method < RESET_TYPE_MAX_METHOD)
+ efx->reset_pending &= -(1 << (method + 1));
+ else /* it doesn't fit into the well-ordered scope hierarchy */
+ __clear_bit(method, &efx->reset_pending);
/* Reinitialise bus-mastering, which may have been turned off before
* the reset was scheduled. This is still appropriate, even in the
@@ -2546,6 +2558,7 @@
case RESET_TYPE_DISABLE:
case RESET_TYPE_RECOVER_OR_DISABLE:
case RESET_TYPE_MC_BIST:
+ case RESET_TYPE_MCDI_TIMEOUT:
method = type;
netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
RESET_TYPE(method));
diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h
index 75ef7ef..d1dbb5f 100644
--- a/drivers/net/ethernet/sfc/enum.h
+++ b/drivers/net/ethernet/sfc/enum.h
@@ -143,6 +143,7 @@
* @RESET_TYPE_WORLD: Reset as much as possible
* @RESET_TYPE_RECOVER_OR_DISABLE: Try to recover. Apply RESET_TYPE_DISABLE if
* unsuccessful.
+ * @RESET_TYPE_MC_BIST: MC entering BIST mode.
* @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled
* @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog
* @RESET_TYPE_INT_ERROR: reset due to internal error
@@ -150,14 +151,16 @@
* @RESET_TYPE_DMA_ERROR: DMA error
* @RESET_TYPE_TX_SKIP: hardware completed empty tx descriptors
* @RESET_TYPE_MC_FAILURE: MC reboot/assertion
+ * @RESET_TYPE_MCDI_TIMEOUT: MCDI timeout.
*/
enum reset_type {
- RESET_TYPE_INVISIBLE = 0,
- RESET_TYPE_RECOVER_OR_ALL = 1,
- RESET_TYPE_ALL = 2,
- RESET_TYPE_WORLD = 3,
- RESET_TYPE_RECOVER_OR_DISABLE = 4,
- RESET_TYPE_DISABLE = 5,
+ RESET_TYPE_INVISIBLE,
+ RESET_TYPE_RECOVER_OR_ALL,
+ RESET_TYPE_ALL,
+ RESET_TYPE_WORLD,
+ RESET_TYPE_RECOVER_OR_DISABLE,
+ RESET_TYPE_MC_BIST,
+ RESET_TYPE_DISABLE,
RESET_TYPE_MAX_METHOD,
RESET_TYPE_TX_WATCHDOG,
RESET_TYPE_INT_ERROR,
@@ -165,7 +168,13 @@
RESET_TYPE_DMA_ERROR,
RESET_TYPE_TX_SKIP,
RESET_TYPE_MC_FAILURE,
- RESET_TYPE_MC_BIST,
+ /* RESET_TYPE_MCDI_TIMEOUT is actually a method, not just a reason, but
+ * it doesn't fit the scope hierarchy (not well-ordered by inclusion).
+ * We encode this by having its enum value be greater than
+ * RESET_TYPE_MAX_METHOD. This also prevents issuing it with
+ * efx_ioctl_reset.
+ */
+ RESET_TYPE_MCDI_TIMEOUT,
RESET_TYPE_MAX,
};
diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c
index 8ec20b7..fae25a4 100644
--- a/drivers/net/ethernet/sfc/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon.c
@@ -2696,6 +2696,8 @@
.fini_dmaq = efx_farch_fini_dmaq,
.prepare_flush = falcon_prepare_flush,
.finish_flush = efx_port_dummy_op_void,
+ .prepare_flr = efx_port_dummy_op_void,
+ .finish_flr = efx_farch_finish_flr,
.describe_stats = falcon_describe_nic_stats,
.update_stats = falcon_update_nic_stats,
.start_stats = falcon_start_nic_stats,
@@ -2790,6 +2792,8 @@
.fini_dmaq = efx_farch_fini_dmaq,
.prepare_flush = falcon_prepare_flush,
.finish_flush = efx_port_dummy_op_void,
+ .prepare_flr = efx_port_dummy_op_void,
+ .finish_flr = efx_farch_finish_flr,
.describe_stats = falcon_describe_nic_stats,
.update_stats = falcon_update_nic_stats,
.start_stats = falcon_start_nic_stats,
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index a087613..0537381 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -741,6 +741,28 @@
return rc;
}
+/* Reset queue and flush accounting after FLR
+ *
+ * One possible cause of FLR recovery is that DMA may be failing (eg. if bus
+ * mastering was disabled), in which case we don't receive (RXQ) flush
+ * completion events. This means that efx->rxq_flush_outstanding remained at 4
+ * after the FLR; also, efx->active_queues was non-zero (as no flush completion
+ * events were received, and we didn't go through efx_check_tx_flush_complete())
+ * If we don't fix this up, on the next call to efx_realloc_channels() we won't
+ * flush any RX queues because efx->rxq_flush_outstanding is at the limit of 4
+ * for batched flush requests; and the efx->active_queues gets messed up because
+ * we keep incrementing for the newly initialised queues, but it never went to
+ * zero previously. Then we get a timeout every time we try to restart the
+ * queues, as it doesn't go back to zero when we should be flushing the queues.
+ */
+void efx_farch_finish_flr(struct efx_nic *efx)
+{
+ atomic_set(&efx->rxq_flush_pending, 0);
+ atomic_set(&efx->rxq_flush_outstanding, 0);
+ atomic_set(&efx->active_queues, 0);
+}
+
+
/**************************************************************************
*
* Event queue processing
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 7bd4b14..5239cf9 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -52,12 +52,7 @@
static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating,
bool *was_attached_out);
static bool efx_mcdi_poll_once(struct efx_nic *efx);
-
-static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx)
-{
- EFX_BUG_ON_PARANOID(!efx->mcdi);
- return &efx->mcdi->iface;
-}
+static void efx_mcdi_abandon(struct efx_nic *efx);
int efx_mcdi_init(struct efx_nic *efx)
{
@@ -558,6 +553,8 @@
rc = 0;
}
+ efx_mcdi_abandon(efx);
+
/* Close the race with efx_mcdi_ev_cpl() executing just too late
* and completing a request we've just cancelled, by ensuring
* that the seqno check therein fails.
@@ -672,6 +669,9 @@
if (efx->mc_bist_for_other_fn)
return -ENETDOWN;
+ if (mcdi->mode == MCDI_MODE_FAIL)
+ return -ENETDOWN;
+
efx_mcdi_acquire_sync(mcdi);
efx_mcdi_send_request(efx, cmd, inbuf, inlen);
return 0;
@@ -812,7 +812,11 @@
return;
mcdi = efx_mcdi(efx);
- if (mcdi->mode == MCDI_MODE_POLL)
+ /* If already in polling mode, nothing to do.
+ * If in fail-fast state, don't switch to polled completion.
+ * FLR recovery will do that later.
+ */
+ if (mcdi->mode == MCDI_MODE_POLL || mcdi->mode == MCDI_MODE_FAIL)
return;
/* We can switch from event completion to polled completion, because
@@ -841,8 +845,8 @@
mcdi = efx_mcdi(efx);
- /* We must be in polling mode so no more requests can be queued */
- BUG_ON(mcdi->mode != MCDI_MODE_POLL);
+ /* We must be in poll or fail mode so no more requests can be queued */
+ BUG_ON(mcdi->mode == MCDI_MODE_EVENTS);
del_timer_sync(&mcdi->async_timer);
@@ -875,8 +879,11 @@
return;
mcdi = efx_mcdi(efx);
-
- if (mcdi->mode == MCDI_MODE_EVENTS)
+ /* If already in event completion mode, nothing to do.
+ * If in fail-fast state, don't switch to event completion. FLR
+ * recovery will do that later.
+ */
+ if (mcdi->mode == MCDI_MODE_EVENTS || mcdi->mode == MCDI_MODE_FAIL)
return;
/* We can't switch from polled to event completion in the middle of a
@@ -966,6 +973,19 @@
spin_unlock(&mcdi->iface_lock);
}
+/* MCDI timeouts seen, so make all MCDI calls fail-fast and issue an FLR to try
+ * to recover.
+ */
+static void efx_mcdi_abandon(struct efx_nic *efx)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+
+ if (xchg(&mcdi->mode, MCDI_MODE_FAIL) == MCDI_MODE_FAIL)
+ return; /* it had already been done */
+ netif_dbg(efx, hw, efx->net_dev, "MCDI is timing out; trying to recover\n");
+ efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT);
+}
+
/* Called from falcon_process_eventq for MCDI events */
void efx_mcdi_process_event(struct efx_channel *channel,
efx_qword_t *event)
@@ -1512,6 +1532,19 @@
{
int rc;
+ /* If MCDI is down, we can't handle_assertion */
+ if (method == RESET_TYPE_MCDI_TIMEOUT) {
+ rc = pci_reset_function(efx->pci_dev);
+ if (rc)
+ return rc;
+ /* Re-enable polled MCDI completion */
+ if (efx->mcdi) {
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ mcdi->mode = MCDI_MODE_POLL;
+ }
+ return 0;
+ }
+
/* Recover from a failed assertion pre-reset */
rc = efx_mcdi_handle_assertion(efx);
if (rc)
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index 52931ae..56465f7 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -28,9 +28,16 @@
MCDI_STATE_COMPLETED,
};
+/**
+ * enum efx_mcdi_mode - MCDI transaction mode
+ * @MCDI_MODE_POLL: poll for MCDI completion, until timeout
+ * @MCDI_MODE_EVENTS: wait for an mcdi_event. On timeout, poll once
+ * @MCDI_MODE_FAIL: we think MCDI is dead, so fail-fast all calls
+ */
enum efx_mcdi_mode {
MCDI_MODE_POLL,
MCDI_MODE_EVENTS,
+ MCDI_MODE_FAIL,
};
/**
@@ -104,6 +111,12 @@
u32 fn_flags;
};
+static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx)
+{
+ EFX_BUG_ON_PARANOID(!efx->mcdi);
+ return &efx->mcdi->iface;
+}
+
#ifdef CONFIG_SFC_MCDI_MON
static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx)
{
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 8a400a0..5bdae8e 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -972,6 +972,8 @@
* (for Falcon architecture)
* @finish_flush: Clean up after flushing the DMA queues (for Falcon
* architecture)
+ * @prepare_flr: Prepare for an FLR
+ * @finish_flr: Clean up after an FLR
* @describe_stats: Describe statistics for ethtool
* @update_stats: Update statistics not provided by event handling.
* Either argument may be %NULL.
@@ -1100,6 +1102,8 @@
int (*fini_dmaq)(struct efx_nic *efx);
void (*prepare_flush)(struct efx_nic *efx);
void (*finish_flush)(struct efx_nic *efx);
+ void (*prepare_flr)(struct efx_nic *efx);
+ void (*finish_flr)(struct efx_nic *efx);
size_t (*describe_stats)(struct efx_nic *efx, u8 *names);
size_t (*update_stats)(struct efx_nic *efx, u64 *full_stats,
struct rtnl_link_stats64 *core_stats);
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index a001fae..d3ad8ed 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -757,6 +757,7 @@
int efx_nic_flush_queues(struct efx_nic *efx);
void siena_prepare_flush(struct efx_nic *efx);
int efx_farch_fini_dmaq(struct efx_nic *efx);
+void efx_farch_finish_flr(struct efx_nic *efx);
void siena_finish_flush(struct efx_nic *efx);
void falcon_start_nic_stats(struct efx_nic *efx);
void falcon_stop_nic_stats(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 23f3a6f..50ffefe 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -921,6 +921,8 @@
.fini_dmaq = efx_farch_fini_dmaq,
.prepare_flush = siena_prepare_flush,
.finish_flush = siena_finish_flush,
+ .prepare_flr = efx_port_dummy_op_void,
+ .finish_flr = efx_farch_finish_flr,
.describe_stats = siena_describe_nic_stats,
.update_stats = siena_update_nic_stats,
.start_stats = efx_mcdi_mac_start_stats,
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 430bb0d..e36f194 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -365,7 +365,7 @@
dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
if (status == 0)
- *data = buf[1];
+ *data = (buf[1] & mask) >> shift;
return status;
}
@@ -1025,14 +1025,6 @@
return -EINVAL;
}
- rc = at86rf230_read_subreg(lp, SR_AVDD_OK, &status);
- if (rc)
- return rc;
- if (!status) {
- dev_err(&lp->spi->dev, "AVDD error\n");
- return -EINVAL;
- }
-
return 0;
}
diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index e701433..9c4defd 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -32,29 +32,39 @@
struct mdio_gpio_info {
struct mdiobb_ctrl ctrl;
- int mdc, mdio;
+ int mdc, mdio, mdo;
+ int mdc_active_low, mdio_active_low, mdo_active_low;
};
static void *mdio_gpio_of_get_data(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
struct mdio_gpio_platform_data *pdata;
+ enum of_gpio_flags flags;
int ret;
pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
if (!pdata)
return NULL;
- ret = of_get_gpio(np, 0);
+ ret = of_get_gpio_flags(np, 0, &flags);
if (ret < 0)
return NULL;
pdata->mdc = ret;
+ pdata->mdc_active_low = flags & OF_GPIO_ACTIVE_LOW;
- ret = of_get_gpio(np, 1);
+ ret = of_get_gpio_flags(np, 1, &flags);
if (ret < 0)
return NULL;
pdata->mdio = ret;
+ pdata->mdio_active_low = flags & OF_GPIO_ACTIVE_LOW;
+
+ ret = of_get_gpio_flags(np, 2, &flags);
+ if (ret > 0) {
+ pdata->mdo = ret;
+ pdata->mdo_active_low = flags & OF_GPIO_ACTIVE_LOW;
+ }
return pdata;
}
@@ -64,8 +74,19 @@
struct mdio_gpio_info *bitbang =
container_of(ctrl, struct mdio_gpio_info, ctrl);
+ if (bitbang->mdo) {
+ /* Separate output pin. Always set its value to high
+ * when changing direction. If direction is input,
+ * assume the pin serves as pull-up. If direction is
+ * output, the default value is high.
+ */
+ gpio_set_value(bitbang->mdo, 1 ^ bitbang->mdo_active_low);
+ return;
+ }
+
if (dir)
- gpio_direction_output(bitbang->mdio, 1);
+ gpio_direction_output(bitbang->mdio,
+ 1 ^ bitbang->mdio_active_low);
else
gpio_direction_input(bitbang->mdio);
}
@@ -75,7 +96,7 @@
struct mdio_gpio_info *bitbang =
container_of(ctrl, struct mdio_gpio_info, ctrl);
- return gpio_get_value(bitbang->mdio);
+ return gpio_get_value(bitbang->mdio) ^ bitbang->mdio_active_low;
}
static void mdio_set(struct mdiobb_ctrl *ctrl, int what)
@@ -83,7 +104,10 @@
struct mdio_gpio_info *bitbang =
container_of(ctrl, struct mdio_gpio_info, ctrl);
- gpio_set_value(bitbang->mdio, what);
+ if (bitbang->mdo)
+ gpio_set_value(bitbang->mdo, what ^ bitbang->mdo_active_low);
+ else
+ gpio_set_value(bitbang->mdio, what ^ bitbang->mdio_active_low);
}
static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
@@ -91,7 +115,7 @@
struct mdio_gpio_info *bitbang =
container_of(ctrl, struct mdio_gpio_info, ctrl);
- gpio_set_value(bitbang->mdc, what);
+ gpio_set_value(bitbang->mdc, what ^ bitbang->mdc_active_low);
}
static struct mdiobb_ops mdio_gpio_ops = {
@@ -110,18 +134,22 @@
struct mdio_gpio_info *bitbang;
int i;
- bitbang = kzalloc(sizeof(*bitbang), GFP_KERNEL);
+ bitbang = devm_kzalloc(dev, sizeof(*bitbang), GFP_KERNEL);
if (!bitbang)
goto out;
bitbang->ctrl.ops = &mdio_gpio_ops;
bitbang->ctrl.reset = pdata->reset;
bitbang->mdc = pdata->mdc;
+ bitbang->mdc_active_low = pdata->mdc_active_low;
bitbang->mdio = pdata->mdio;
+ bitbang->mdio_active_low = pdata->mdio_active_low;
+ bitbang->mdo = pdata->mdo;
+ bitbang->mdo_active_low = pdata->mdo_active_low;
new_bus = alloc_mdio_bitbang(&bitbang->ctrl);
if (!new_bus)
- goto out_free_bitbang;
+ goto out;
new_bus->name = "GPIO Bitbanged MDIO",
@@ -138,11 +166,18 @@
snprintf(new_bus->id, MII_BUS_ID_SIZE, "gpio-%x", bus_id);
- if (gpio_request(bitbang->mdc, "mdc"))
+ if (devm_gpio_request(dev, bitbang->mdc, "mdc"))
goto out_free_bus;
- if (gpio_request(bitbang->mdio, "mdio"))
- goto out_free_mdc;
+ if (devm_gpio_request(dev, bitbang->mdio, "mdio"))
+ goto out_free_bus;
+
+ if (bitbang->mdo) {
+ if (devm_gpio_request(dev, bitbang->mdo, "mdo"))
+ goto out_free_bus;
+ gpio_direction_output(bitbang->mdo, 1);
+ gpio_direction_input(bitbang->mdio);
+ }
gpio_direction_output(bitbang->mdc, 0);
@@ -150,12 +185,8 @@
return new_bus;
-out_free_mdc:
- gpio_free(bitbang->mdc);
out_free_bus:
free_mdio_bitbang(new_bus);
-out_free_bitbang:
- kfree(bitbang);
out:
return NULL;
}
@@ -163,13 +194,8 @@
static void mdio_gpio_bus_deinit(struct device *dev)
{
struct mii_bus *bus = dev_get_drvdata(dev);
- struct mdio_gpio_info *bitbang = bus->priv;
- dev_set_drvdata(dev, NULL);
- gpio_free(bitbang->mdio);
- gpio_free(bitbang->mdc);
free_mdio_bitbang(bus);
- kfree(bitbang);
}
static void mdio_gpio_bus_destroy(struct device *dev)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index c55e316..82355d5 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1755,8 +1755,8 @@
if (err)
return err;
- return iptunnel_xmit(rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df,
- false);
+ return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
+ tos, ttl, df, false);
}
EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 84734a8..83c39e2 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -1521,11 +1521,7 @@
cosa_putstatus(cosa, 0);
cosa_getdata8(cosa);
cosa_putstatus(cosa, SR_RST);
-#ifdef MODULE
msleep(500);
-#else
- udelay(5*100000);
-#endif
/* Disable all IRQs from the card */
cosa_putstatus(cosa, 0);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8807442..8add255 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2754,7 +2754,7 @@
for (i = 0; i < rdata->nr_pages; i++) {
struct page *page = rdata->pages[i];
- size_t copy = min(remaining, PAGE_SIZE);
+ size_t copy = min_t(size_t, remaining, PAGE_SIZE);
size_t written = copy_page_to_iter(page, 0, copy, iter);
remaining -= written;
if (written < copy && iov_iter_count(iter) > 0)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 262dcbb..024fd03 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -220,7 +220,6 @@
BPF_S_ANC_RXHASH,
BPF_S_ANC_CPU,
BPF_S_ANC_ALU_XOR_X,
- BPF_S_ANC_SECCOMP_LD_W,
BPF_S_ANC_VLAN_TAG,
BPF_S_ANC_VLAN_TAG_PRESENT,
BPF_S_ANC_PAY_OFFSET,
diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h
index 7c9fe3c..66c30a7 100644
--- a/include/linux/mdio-gpio.h
+++ b/include/linux/mdio-gpio.h
@@ -17,6 +17,11 @@
/* GPIO numbers for bus pins */
unsigned int mdc;
unsigned int mdio;
+ unsigned int mdo;
+
+ bool mdc_active_low;
+ bool mdio_active_low;
+ bool mdo_active_low;
unsigned int phy_mask;
int irqs[PHY_MAX_ADDR];
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2b58d19..8967e20 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -124,6 +124,8 @@
union {
struct list_head lru; /* Pageout list, eg. active_list
* protected by zone->lru_lock !
+ * Can be used as a generic list
+ * by the page owner.
*/
struct { /* slub per cpu partial pages */
struct page *next; /* Next partial slab */
@@ -136,7 +138,6 @@
#endif
};
- struct list_head list; /* slobs list of pages */
struct slab *slab_page; /* slab fields */
struct rcu_head rcu_head; /* Used by SLAB
* when destroying via RCU
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index ec2ffaf..df78dc2 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -87,7 +87,6 @@
/* delete keymap entries */
void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
-void nf_ct_gre_keymap_flush(struct net *net);
void nf_nat_need_gre(void);
#endif /* __KERNEL__ */
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 24126c4..4d0221f 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -75,6 +75,7 @@
PHY_INTERFACE_MODE_SMII,
PHY_INTERFACE_MODE_XGMII,
PHY_INTERFACE_MODE_MOCA,
+ PHY_INTERFACE_MODE_QSGMII,
PHY_INTERFACE_MODE_MAX,
} phy_interface_t;
@@ -116,6 +117,8 @@
return "xgmii";
case PHY_INTERFACE_MODE_MOCA:
return "moca";
+ case PHY_INTERFACE_MODE_QSGMII:
+ return "qsgmii";
default:
return "unknown";
}
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 3dd389a..307bfbe 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -242,6 +242,17 @@
#define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW)
#endif
+/*
+ * This restriction comes from byte sized index implementation.
+ * Page size is normally 2^12 bytes and, in this case, if we want to use
+ * byte sized index which can represent 2^8 entries, the size of the object
+ * should be equal or greater to 2^12 / 2^8 = 2^4 = 16.
+ * If minimum size of kmalloc is less than 16, we use it as minimum object
+ * size and give up to use byte sized index.
+ */
+#define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \
+ (KMALLOC_MIN_SIZE) : 16)
+
#ifndef CONFIG_SLOB
extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
#ifdef CONFIG_ZONE_DMA
diff --git a/include/net/dst.h b/include/net/dst.h
index 46ed958..71c60f4 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -45,7 +45,7 @@
void *__pad1;
#endif
int (*input)(struct sk_buff *);
- int (*output)(struct sk_buff *);
+ int (*output)(struct sock *sk, struct sk_buff *skb);
unsigned short flags;
#define DST_HOST 0x0001
@@ -367,7 +367,11 @@
return child;
}
-int dst_discard(struct sk_buff *skb);
+int dst_discard_sk(struct sock *sk, struct sk_buff *skb);
+static inline int dst_discard(struct sk_buff *skb)
+{
+ return dst_discard_sk(skb->sk, skb);
+}
void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref,
int initial_obsolete, unsigned short flags);
void __dst_free(struct dst_entry *dst);
@@ -449,9 +453,13 @@
}
/* Output packet to network from transport. */
+static inline int dst_output_sk(struct sock *sk, struct sk_buff *skb)
+{
+ return skb_dst(skb)->output(sk, skb);
+}
static inline int dst_output(struct sk_buff *skb)
{
- return skb_dst(skb)->output(skb);
+ return dst_output_sk(skb->sk, skb);
}
/* Input packet from network to transport. */
diff --git a/include/net/flow.h b/include/net/flow.h
index 64fd248..8109a15 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -11,6 +11,14 @@
#include <linux/in6.h>
#include <linux/atomic.h>
+/*
+ * ifindex generation is per-net namespace, and loopback is
+ * always the 1st device in ns (see net_dev_init), thus any
+ * loopback device should get ifindex 1
+ */
+
+#define LOOPBACK_IFINDEX 1
+
struct flowi_common {
int flowic_oif;
int flowic_iif;
@@ -80,7 +88,7 @@
__be16 dport, __be16 sport)
{
fl4->flowi4_oif = oif;
- fl4->flowi4_iif = 0;
+ fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_mark = mark;
fl4->flowi4_tos = tos;
fl4->flowi4_scope = scope;
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index f981ba7..74af137 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -40,7 +40,7 @@
void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
-int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl);
+int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu);
#endif /* _INET6_CONNECTION_SOCK_H */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c55aeed..7a43138 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -36,7 +36,7 @@
* (i.e. things that depend on the address family)
*/
struct inet_connection_sock_af_ops {
- int (*queue_xmit)(struct sk_buff *skb, struct flowi *fl);
+ int (*queue_xmit)(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
void (*send_check)(struct sock *sk, struct sk_buff *skb);
int (*rebuild_header)(struct sock *sk);
void (*sk_rx_dst_set)(struct sock *sk, const struct sk_buff *skb);
diff --git a/include/net/ip.h b/include/net/ip.h
index 25064c2..3ec2b0f 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -104,14 +104,19 @@
struct net_device *orig_dev);
int ip_local_deliver(struct sk_buff *skb);
int ip_mr_input(struct sk_buff *skb);
-int ip_output(struct sk_buff *skb);
-int ip_mc_output(struct sk_buff *skb);
+int ip_output(struct sock *sk, struct sk_buff *skb);
+int ip_mc_output(struct sock *sk, struct sk_buff *skb);
int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
int ip_do_nat(struct sk_buff *skb);
void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct sk_buff *skb);
-int ip_local_out(struct sk_buff *skb);
-int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl);
+int ip_local_out_sk(struct sock *sk, struct sk_buff *skb);
+static inline int ip_local_out(struct sk_buff *skb)
+{
+ return ip_local_out_sk(skb->sk, skb);
+}
+
+int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
void ip_init(void);
int ip_append_data(struct sock *sk, struct flowi4 *fl4,
int getfrag(void *from, char *to, int offset, int len,
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 3c3bb18..6c4f5ea 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -32,6 +32,11 @@
#define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010
#define RT6_LOOKUP_F_SRCPREF_COA 0x00000020
+/* We do not (yet ?) support IPv6 jumbograms (RFC 2675)
+ * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header
+ */
+#define IP6_MAX_MTU (0xFFFF + sizeof(struct ipv6hdr))
+
/*
* rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
* between IPV6_ADDR_PREFERENCES socket option values
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index e77c104..a4daf9e 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -153,7 +153,7 @@
}
int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
-int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
+int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df, bool xnet);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4f541f1..d640925 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -731,7 +731,7 @@
* skb processing functions
*/
-int ip6_output(struct sk_buff *skb);
+int ip6_output(struct sock *sk, struct sk_buff *skb);
int ip6_forward(struct sk_buff *skb);
int ip6_input(struct sk_buff *skb);
int ip6_mc_input(struct sk_buff *skb);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 79387f7..5f9eb26 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -9,6 +9,7 @@
#include <linux/list.h>
#include <linux/sysctl.h>
+#include <net/flow.h>
#include <net/netns/core.h>
#include <net/netns/mib.h>
#include <net/netns/unix.h>
@@ -131,14 +132,6 @@
atomic_t fnhe_genid;
};
-/*
- * ifindex generation is per-net namespace, and loopback is
- * always the 1st device in ns (see net_dev_init), thus any
- * loopback device should get ifindex 1
- */
-
-#define LOOPBACK_IFINDEX 1
-
#include <linux/seq_file_net.h>
/* Init's network namespace */
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index cf2b7ae..a75fc8e 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -13,6 +13,16 @@
u8 len;
};
+/* Calculate the mask for the nft_cmp_fast expression. On big endian the
+ * mask needs to include the *upper* bytes when interpreting that data as
+ * something smaller than the full u32, therefore a cpu_to_le32 is done.
+ */
+static inline u32 nft_cmp_fast_mask(unsigned int len)
+{
+ return cpu_to_le32(~0U >> (FIELD_SIZEOF(struct nft_cmp_fast_expr,
+ data) * BITS_PER_BYTE - len));
+}
+
extern const struct nft_expr_ops nft_cmp_fast_ops;
int nft_cmp_module_init(void);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 6ee76c8..d992ca3 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1653,6 +1653,17 @@
/* This is the last advertised value of rwnd over a SACK chunk. */
__u32 a_rwnd;
+ /* Number of bytes by which the rwnd has slopped. The rwnd is allowed
+ * to slop over a maximum of the association's frag_point.
+ */
+ __u32 rwnd_over;
+
+ /* Keeps treack of rwnd pressure. This happens when we have
+ * a window, but not recevie buffer (i.e small packets). This one
+ * is releases slowly (1 PMTU at a time ).
+ */
+ __u32 rwnd_press;
+
/* This is the sndbuf size in use for the association.
* This corresponds to the sndbuf size for the association,
* as specified in the sk->sndbuf.
@@ -1881,7 +1892,8 @@
__u32 sctp_association_get_next_tsn(struct sctp_association *);
void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *);
-void sctp_assoc_rwnd_update(struct sctp_association *, bool);
+void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
+void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
void sctp_assoc_set_primary(struct sctp_association *,
struct sctp_transport *);
void sctp_assoc_del_nonprimary_peers(struct sctp_association *,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 32682ae..116e9c7 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -333,7 +333,7 @@
const xfrm_address_t *saddr);
int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
- int (*output)(struct sk_buff *skb);
+ int (*output)(struct sock *sk, struct sk_buff *skb);
int (*output_finish)(struct sk_buff *skb);
int (*extract_input)(struct xfrm_state *x,
struct sk_buff *skb);
@@ -1540,7 +1540,7 @@
int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
-int xfrm4_output(struct sk_buff *skb);
+int xfrm4_output(struct sock *sk, struct sk_buff *skb);
int xfrm4_output_finish(struct sk_buff *skb);
int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err);
int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol);
@@ -1565,7 +1565,7 @@
__be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr);
int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
-int xfrm6_output(struct sk_buff *skb);
+int xfrm6_output(struct sock *sk, struct sk_buff *skb);
int xfrm6_output_finish(struct sk_buff *skb);
int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
u8 **prevhdr);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d8d046c..b35c215 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -69,18 +69,17 @@
{
struct task_struct *task = current;
struct pt_regs *regs = task_pt_regs(task);
+ unsigned long args[6];
sd->nr = syscall_get_nr(task, regs);
sd->arch = syscall_get_arch();
-
- /* Unroll syscall_get_args to help gcc on arm. */
- syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]);
- syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]);
- syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]);
- syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]);
- syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]);
- syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]);
-
+ syscall_get_arguments(task, regs, 0, 6, args);
+ sd->args[0] = args[0];
+ sd->args[1] = args[1];
+ sd->args[2] = args[2];
+ sd->args[3] = args[3];
+ sd->args[4] = args[4];
+ sd->args[5] = args[5];
sd->instruction_pointer = KSTK_EIP(task);
}
@@ -256,6 +255,7 @@
goto free_prog;
/* Allocate a new seccomp_filter */
+ ret = -ENOMEM;
filter = kzalloc(sizeof(struct seccomp_filter) +
sizeof(struct sock_filter_int) * new_len,
GFP_KERNEL|__GFP_NOWARN);
@@ -265,6 +265,7 @@
ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len);
if (ret)
goto free_filter;
+ kfree(fp);
atomic_set(&filter->usage, 1);
filter->len = new_len;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0d8f602..bf71b4b 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -152,7 +152,7 @@
/* Find the matching extent */
extents = map->nr_extents;
- smp_read_barrier_depends();
+ smp_rmb();
for (idx = 0; idx < extents; idx++) {
first = map->extent[idx].first;
last = first + map->extent[idx].count - 1;
@@ -176,7 +176,7 @@
/* Find the matching extent */
extents = map->nr_extents;
- smp_read_barrier_depends();
+ smp_rmb();
for (idx = 0; idx < extents; idx++) {
first = map->extent[idx].first;
last = first + map->extent[idx].count - 1;
@@ -199,7 +199,7 @@
/* Find the matching extent */
extents = map->nr_extents;
- smp_read_barrier_depends();
+ smp_rmb();
for (idx = 0; idx < extents; idx++) {
first = map->extent[idx].lower_first;
last = first + map->extent[idx].count - 1;
@@ -615,9 +615,8 @@
* were written before the count of the extents.
*
* To achieve this smp_wmb() is used on guarantee the write
- * order and smp_read_barrier_depends() is guaranteed that we
- * don't have crazy architectures returning stale data.
- *
+ * order and smp_rmb() is guaranteed that we don't have crazy
+ * architectures returning stale data.
*/
mutex_lock(&id_map_mutex);
diff --git a/mm/shmem.c b/mm/shmem.c
index 8f1a9540..9f70e02 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1411,7 +1411,7 @@
pgoff_t index;
unsigned long offset;
enum sgp_type sgp = SGP_READ;
- int error;
+ int error = 0;
ssize_t retval;
size_t count;
loff_t *ppos = &iocb->ki_pos;
diff --git a/mm/slab.c b/mm/slab.c
index 3db4cb0..388cb1a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -157,6 +157,17 @@
#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
#endif
+#define FREELIST_BYTE_INDEX (((PAGE_SIZE >> BITS_PER_BYTE) \
+ <= SLAB_OBJ_MIN_SIZE) ? 1 : 0)
+
+#if FREELIST_BYTE_INDEX
+typedef unsigned char freelist_idx_t;
+#else
+typedef unsigned short freelist_idx_t;
+#endif
+
+#define SLAB_OBJ_MAX_NUM (1 << sizeof(freelist_idx_t) * BITS_PER_BYTE)
+
/*
* true if a page was allocated from pfmemalloc reserves for network-based
* swap
@@ -277,8 +288,8 @@
* OTOH the cpuarrays can contain lots of objects,
* which could lock up otherwise freeable slabs.
*/
-#define REAPTIMEOUT_CPUC (2*HZ)
-#define REAPTIMEOUT_LIST3 (4*HZ)
+#define REAPTIMEOUT_AC (2*HZ)
+#define REAPTIMEOUT_NODE (4*HZ)
#if STATS
#define STATS_INC_ACTIVE(x) ((x)->num_active++)
@@ -565,9 +576,31 @@
return cachep->array[smp_processor_id()];
}
-static size_t slab_mgmt_size(size_t nr_objs, size_t align)
+static int calculate_nr_objs(size_t slab_size, size_t buffer_size,
+ size_t idx_size, size_t align)
{
- return ALIGN(nr_objs * sizeof(unsigned int), align);
+ int nr_objs;
+ size_t freelist_size;
+
+ /*
+ * Ignore padding for the initial guess. The padding
+ * is at most @align-1 bytes, and @buffer_size is at
+ * least @align. In the worst case, this result will
+ * be one greater than the number of objects that fit
+ * into the memory allocation when taking the padding
+ * into account.
+ */
+ nr_objs = slab_size / (buffer_size + idx_size);
+
+ /*
+ * This calculated number will be either the right
+ * amount, or one greater than what we want.
+ */
+ freelist_size = slab_size - nr_objs * buffer_size;
+ if (freelist_size < ALIGN(nr_objs * idx_size, align))
+ nr_objs--;
+
+ return nr_objs;
}
/*
@@ -600,25 +633,9 @@
nr_objs = slab_size / buffer_size;
} else {
- /*
- * Ignore padding for the initial guess. The padding
- * is at most @align-1 bytes, and @buffer_size is at
- * least @align. In the worst case, this result will
- * be one greater than the number of objects that fit
- * into the memory allocation when taking the padding
- * into account.
- */
- nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int));
-
- /*
- * This calculated number will be either the right
- * amount, or one greater than what we want.
- */
- if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
- > slab_size)
- nr_objs--;
-
- mgmt_size = slab_mgmt_size(nr_objs, align);
+ nr_objs = calculate_nr_objs(slab_size, buffer_size,
+ sizeof(freelist_idx_t), align);
+ mgmt_size = ALIGN(nr_objs * sizeof(freelist_idx_t), align);
}
*num = nr_objs;
*left_over = slab_size - nr_objs*buffer_size - mgmt_size;
@@ -1067,7 +1084,7 @@
list_for_each_entry(cachep, &slab_caches, list) {
/*
- * Set up the size64 kmemlist for cpu before we can
+ * Set up the kmem_cache_node for cpu before we can
* begin anything. Make sure some other cpu on this
* node has not already allocated this
*/
@@ -1076,12 +1093,12 @@
if (!n)
return -ENOMEM;
kmem_cache_node_init(n);
- n->next_reap = jiffies + REAPTIMEOUT_LIST3 +
- ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+ n->next_reap = jiffies + REAPTIMEOUT_NODE +
+ ((unsigned long)cachep) % REAPTIMEOUT_NODE;
/*
- * The l3s don't come and go as CPUs come and
- * go. slab_mutex is sufficient
+ * The kmem_cache_nodes don't come and go as CPUs
+ * come and go. slab_mutex is sufficient
* protection here.
*/
cachep->node[node] = n;
@@ -1406,8 +1423,8 @@
for_each_online_node(node) {
cachep->node[node] = &init_kmem_cache_node[index + node];
cachep->node[node]->next_reap = jiffies +
- REAPTIMEOUT_LIST3 +
- ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+ REAPTIMEOUT_NODE +
+ ((unsigned long)cachep) % REAPTIMEOUT_NODE;
}
}
@@ -2010,6 +2027,10 @@
if (!num)
continue;
+ /* Can't handle number of objects more than SLAB_OBJ_MAX_NUM */
+ if (num > SLAB_OBJ_MAX_NUM)
+ break;
+
if (flags & CFLGS_OFF_SLAB) {
/*
* Max number of objs-per-slab for caches which
@@ -2017,7 +2038,7 @@
* looping condition in cache_grow().
*/
offslab_limit = size;
- offslab_limit /= sizeof(unsigned int);
+ offslab_limit /= sizeof(freelist_idx_t);
if (num > offslab_limit)
break;
@@ -2103,8 +2124,8 @@
}
}
cachep->node[numa_mem_id()]->next_reap =
- jiffies + REAPTIMEOUT_LIST3 +
- ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+ jiffies + REAPTIMEOUT_NODE +
+ ((unsigned long)cachep) % REAPTIMEOUT_NODE;
cpu_cache_get(cachep)->avail = 0;
cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
@@ -2243,7 +2264,7 @@
* it too early on. Always use on-slab management when
* SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)
*/
- if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&
+ if ((size >= (PAGE_SIZE >> 5)) && !slab_early_init &&
!(flags & SLAB_NOLEAKTRACE))
/*
* Size is large, assume best to place the slab management obj
@@ -2252,6 +2273,12 @@
flags |= CFLGS_OFF_SLAB;
size = ALIGN(size, cachep->align);
+ /*
+ * We should restrict the number of objects in a slab to implement
+ * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition.
+ */
+ if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)
+ size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);
left_over = calculate_slab_order(cachep, size, cachep->align, flags);
@@ -2259,7 +2286,7 @@
return -E2BIG;
freelist_size =
- ALIGN(cachep->num * sizeof(unsigned int), cachep->align);
+ ALIGN(cachep->num * sizeof(freelist_idx_t), cachep->align);
/*
* If the slab has been placed off-slab, and we have enough space then
@@ -2272,7 +2299,7 @@
if (flags & CFLGS_OFF_SLAB) {
/* really off slab. No need for manual alignment */
- freelist_size = cachep->num * sizeof(unsigned int);
+ freelist_size = cachep->num * sizeof(freelist_idx_t);
#ifdef CONFIG_PAGE_POISONING
/* If we're going to use the generic kernel_map_pages()
@@ -2300,10 +2327,10 @@
if (flags & CFLGS_OFF_SLAB) {
cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
/*
- * This is a possibility for one of the malloc_sizes caches.
+ * This is a possibility for one of the kmalloc_{dma,}_caches.
* But since we go off slab only for object size greater than
- * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
- * this should not happen at all.
+ * PAGE_SIZE/8, and kmalloc_{dma,}_caches get created
+ * in ascending order,this should not happen at all.
* But leave a BUG_ON for some lucky dude.
*/
BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache));
@@ -2511,14 +2538,17 @@
/*
* Get the memory for a slab management obj.
- * For a slab cache when the slab descriptor is off-slab, slab descriptors
- * always come from malloc_sizes caches. The slab descriptor cannot
- * come from the same cache which is getting created because,
- * when we are searching for an appropriate cache for these
- * descriptors in kmem_cache_create, we search through the malloc_sizes array.
- * If we are creating a malloc_sizes cache here it would not be visible to
- * kmem_find_general_cachep till the initialization is complete.
- * Hence we cannot have freelist_cache same as the original cache.
+ *
+ * For a slab cache when the slab descriptor is off-slab, the
+ * slab descriptor can't come from the same cache which is being created,
+ * Because if it is the case, that means we defer the creation of
+ * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point.
+ * And we eventually call down to __kmem_cache_create(), which
+ * in turn looks up in the kmalloc_{dma,}_caches for the disired-size one.
+ * This is a "chicken-and-egg" problem.
+ *
+ * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches,
+ * which are all initialized during kmem_cache_init().
*/
static void *alloc_slabmgmt(struct kmem_cache *cachep,
struct page *page, int colour_off,
@@ -2542,9 +2572,15 @@
return freelist;
}
-static inline unsigned int *slab_freelist(struct page *page)
+static inline freelist_idx_t get_free_obj(struct page *page, unsigned char idx)
{
- return (unsigned int *)(page->freelist);
+ return ((freelist_idx_t *)page->freelist)[idx];
+}
+
+static inline void set_free_obj(struct page *page,
+ unsigned char idx, freelist_idx_t val)
+{
+ ((freelist_idx_t *)(page->freelist))[idx] = val;
}
static void cache_init_objs(struct kmem_cache *cachep,
@@ -2589,7 +2625,7 @@
if (cachep->ctor)
cachep->ctor(objp);
#endif
- slab_freelist(page)[i] = i;
+ set_free_obj(page, i, i);
}
}
@@ -2608,7 +2644,7 @@
{
void *objp;
- objp = index_to_obj(cachep, page, slab_freelist(page)[page->active]);
+ objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
page->active++;
#if DEBUG
WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
@@ -2629,7 +2665,7 @@
/* Verify double free bug */
for (i = page->active; i < cachep->num; i++) {
- if (slab_freelist(page)[i] == objnr) {
+ if (get_free_obj(page, i) == objnr) {
printk(KERN_ERR "slab: double free detected in cache "
"'%s', objp %p\n", cachep->name, objp);
BUG();
@@ -2637,7 +2673,7 @@
}
#endif
page->active--;
- slab_freelist(page)[page->active] = objnr;
+ set_free_obj(page, page->active, objnr);
}
/*
@@ -2886,9 +2922,9 @@
/* move slabp to correct slabp list: */
list_del(&page->lru);
if (page->active == cachep->num)
- list_add(&page->list, &n->slabs_full);
+ list_add(&page->lru, &n->slabs_full);
else
- list_add(&page->list, &n->slabs_partial);
+ list_add(&page->lru, &n->slabs_partial);
}
must_grow:
@@ -3245,11 +3281,11 @@
kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
flags);
- if (likely(ptr))
+ if (likely(ptr)) {
kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size);
-
- if (unlikely((flags & __GFP_ZERO) && ptr))
- memset(ptr, 0, cachep->object_size);
+ if (unlikely(flags & __GFP_ZERO))
+ memset(ptr, 0, cachep->object_size);
+ }
return ptr;
}
@@ -3310,17 +3346,17 @@
flags);
prefetchw(objp);
- if (likely(objp))
+ if (likely(objp)) {
kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size);
-
- if (unlikely((flags & __GFP_ZERO) && objp))
- memset(objp, 0, cachep->object_size);
+ if (unlikely(flags & __GFP_ZERO))
+ memset(objp, 0, cachep->object_size);
+ }
return objp;
}
/*
- * Caller needs to acquire correct kmem_list's list_lock
+ * Caller needs to acquire correct kmem_cache_node's list_lock
*/
static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
int node)
@@ -3574,11 +3610,6 @@
struct kmem_cache *cachep;
void *ret;
- /* If you want to save a few bytes .text space: replace
- * __ with kmem_.
- * Then kmalloc uses the uninlined functions instead of the inline
- * functions.
- */
cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
@@ -3670,7 +3701,7 @@
/*
* This initializes kmem_cache_node or resizes various caches for all nodes.
*/
-static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
+static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
{
int node;
struct kmem_cache_node *n;
@@ -3726,8 +3757,8 @@
}
kmem_cache_node_init(n);
- n->next_reap = jiffies + REAPTIMEOUT_LIST3 +
- ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+ n->next_reap = jiffies + REAPTIMEOUT_NODE +
+ ((unsigned long)cachep) % REAPTIMEOUT_NODE;
n->shared = new_shared;
n->alien = new_alien;
n->free_limit = (1 + nr_cpus_node(node)) *
@@ -3813,7 +3844,7 @@
kfree(ccold);
}
kfree(new);
- return alloc_kmemlist(cachep, gfp);
+ return alloc_kmem_cache_node(cachep, gfp);
}
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
@@ -3982,7 +4013,7 @@
if (time_after(n->next_reap, jiffies))
goto next;
- n->next_reap = jiffies + REAPTIMEOUT_LIST3;
+ n->next_reap = jiffies + REAPTIMEOUT_NODE;
drain_array(searchp, n, n->shared, 0, node);
@@ -4003,7 +4034,7 @@
next_reap_node();
out:
/* Set up the next iteration */
- schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
+ schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_AC));
}
#ifdef CONFIG_SLABINFO
@@ -4210,7 +4241,7 @@
for (j = page->active; j < c->num; j++) {
/* Skip freed item */
- if (slab_freelist(page)[j] == i) {
+ if (get_free_obj(page, j) == i) {
active = false;
break;
}
diff --git a/mm/slob.c b/mm/slob.c
index 4bf8809..730cad4 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -111,13 +111,13 @@
static void set_slob_page_free(struct page *sp, struct list_head *list)
{
- list_add(&sp->list, list);
+ list_add(&sp->lru, list);
__SetPageSlobFree(sp);
}
static inline void clear_slob_page_free(struct page *sp)
{
- list_del(&sp->list);
+ list_del(&sp->lru);
__ClearPageSlobFree(sp);
}
@@ -282,7 +282,7 @@
spin_lock_irqsave(&slob_lock, flags);
/* Iterate through each partially free page, try to find room */
- list_for_each_entry(sp, slob_list, list) {
+ list_for_each_entry(sp, slob_list, lru) {
#ifdef CONFIG_NUMA
/*
* If there's a node specification, search for a partial
@@ -296,7 +296,7 @@
continue;
/* Attempt to alloc */
- prev = sp->list.prev;
+ prev = sp->lru.prev;
b = slob_page_alloc(sp, size, align);
if (!b)
continue;
@@ -322,7 +322,7 @@
spin_lock_irqsave(&slob_lock, flags);
sp->units = SLOB_UNITS(PAGE_SIZE);
sp->freelist = b;
- INIT_LIST_HEAD(&sp->list);
+ INIT_LIST_HEAD(&sp->lru);
set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
set_slob_page_free(sp, slob_list);
b = slob_page_alloc(sp, size, align);
diff --git a/mm/slub.c b/mm/slub.c
index f620bbf..5e234f1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1352,11 +1352,12 @@
page = alloc_slab_page(alloc_gfp, node, oo);
if (unlikely(!page)) {
oo = s->min;
+ alloc_gfp = flags;
/*
* Allocation may have failed due to fragmentation.
* Try a lower order alloc if possible
*/
- page = alloc_slab_page(flags, node, oo);
+ page = alloc_slab_page(alloc_gfp, node, oo);
if (page)
stat(s, ORDER_FALLBACK);
@@ -1366,7 +1367,7 @@
&& !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
int pages = 1 << oo_order(oo);
- kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
+ kmemcheck_alloc_shadow(page, oo_order(oo), alloc_gfp, node);
/*
* Objects from caches that have a constructor don't get
diff --git a/net/core/dev.c b/net/core/dev.c
index 14dac06..5b3042e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2284,7 +2284,7 @@
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
__be16 type = skb->protocol;
- int vlan_depth = ETH_HLEN;
+ int vlan_depth = skb->mac_len;
/* Tunnel gso handlers can set protocol to ethernet. */
if (type == htons(ETH_P_TEB)) {
diff --git a/net/core/dst.c b/net/core/dst.c
index ca4231e..80d6286 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -142,12 +142,12 @@
mutex_unlock(&dst_gc_mutex);
}
-int dst_discard(struct sk_buff *skb)
+int dst_discard_sk(struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
-EXPORT_SYMBOL(dst_discard);
+EXPORT_SYMBOL(dst_discard_sk);
const u32 dst_default_metrics[RTAX_MAX + 1] = {
/* This initializer is needed to force linker to place this variable
@@ -184,7 +184,7 @@
dst->xfrm = NULL;
#endif
dst->input = dst_discard;
- dst->output = dst_discard;
+ dst->output = dst_discard_sk;
dst->error = 0;
dst->obsolete = initial_obsolete;
dst->header_len = 0;
@@ -209,8 +209,10 @@
/* The first case (dev==NULL) is required, when
protocol module is unloaded.
*/
- if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
- dst->input = dst->output = dst_discard;
+ if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+ dst->input = dst_discard;
+ dst->output = dst_discard_sk;
+ }
dst->obsolete = DST_OBSOLETE_DEAD;
}
@@ -361,7 +363,8 @@
return;
if (!unregister) {
- dst->input = dst->output = dst_discard;
+ dst->input = dst_discard;
+ dst->output = dst_discard_sk;
} else {
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
diff --git a/net/core/filter.c b/net/core/filter.c
index e08b382..cd58614 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -600,6 +600,9 @@
if (skb_is_nonlinear(skb))
return 0;
+ if (skb->len < sizeof(struct nlattr))
+ return 0;
+
if (A > skb->len - sizeof(struct nlattr))
return 0;
@@ -618,11 +621,14 @@
if (skb_is_nonlinear(skb))
return 0;
+ if (skb->len < sizeof(struct nlattr))
+ return 0;
+
if (A > skb->len - sizeof(struct nlattr))
return 0;
nla = (struct nlattr *) &skb->data[A];
- if (nla->nla_len > A - skb->len)
+ if (nla->nla_len > skb->len - A)
return 0;
nla = nla_find_nested(nla, X);
@@ -1737,7 +1743,6 @@
[BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 8876078..0248e8a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -138,7 +138,7 @@
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
- err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
+ err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
return net_xmit_eval(err);
}
return -ENOBUFS;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index ce0cbbf..daccc4a 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -752,7 +752,7 @@
return n->output(n, skb);
}
-static int dn_output(struct sk_buff *skb)
+static int dn_output(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *)dst;
@@ -838,6 +838,18 @@
* Used to catch bugs. This should never normally get
* called.
*/
+static int dn_rt_bug_sk(struct sock *sk, struct sk_buff *skb)
+{
+ struct dn_skb_cb *cb = DN_SKB_CB(skb);
+
+ net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
+ le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
+
+ kfree_skb(skb);
+
+ return NET_RX_DROP;
+}
+
static int dn_rt_bug(struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
@@ -1463,7 +1475,7 @@
rt->n = neigh;
rt->dst.lastuse = jiffies;
- rt->dst.output = dn_rt_bug;
+ rt->dst.output = dn_rt_bug_sk;
switch (res.type) {
case RTN_UNICAST:
rt->dst.input = dn_forward;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 1a629f8..255aa99 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -250,7 +250,7 @@
bool dev_match;
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = oif;
+ fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
fl4.daddr = src;
fl4.saddr = dst;
fl4.flowi4_tos = tos;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b53f0bf..8a043f0 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -631,6 +631,7 @@
.daddr = nh->nh_gw,
.flowi4_scope = cfg->fc_scope + 1,
.flowi4_oif = nh->nh_oif,
+ .flowi4_iif = LOOPBACK_IFINDEX,
};
/* It is not necessary, but requires a bit of thinking */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1a0755f..1cbeba5 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -101,17 +101,17 @@
skb_dst(skb)->dev, dst_output);
}
-int ip_local_out(struct sk_buff *skb)
+int ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
{
int err;
err = __ip_local_out(skb);
if (likely(err == 1))
- err = dst_output(skb);
+ err = dst_output_sk(sk, skb);
return err;
}
-EXPORT_SYMBOL_GPL(ip_local_out);
+EXPORT_SYMBOL_GPL(ip_local_out_sk);
static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
{
@@ -226,9 +226,8 @@
return ip_finish_output2(skb);
}
-int ip_mc_output(struct sk_buff *skb)
+int ip_mc_output(struct sock *sk, struct sk_buff *skb)
{
- struct sock *sk = skb->sk;
struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->dst.dev;
@@ -287,7 +286,7 @@
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
-int ip_output(struct sk_buff *skb)
+int ip_output(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
@@ -315,9 +314,9 @@
sizeof(fl4->saddr) + sizeof(fl4->daddr));
}
-int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
+/* Note: skb->sk can be different from sk, in case of tunnels */
+int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
{
- struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
struct ip_options_rcu *inet_opt;
struct flowi4 *fl4;
@@ -389,6 +388,7 @@
ip_select_ident_more(skb, &rt->dst, sk,
(skb_shinfo(skb)->gso_segs ?: 1) - 1);
+ /* TODO : should we use skb->sk here instead of sk ? */
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e77381d..fa5b751 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -670,7 +670,7 @@
return;
}
- err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
+ err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
@@ -722,19 +722,18 @@
int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
{
int err = 0;
- struct ip_tunnel *t;
- struct net *net = dev_net(dev);
- struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct net *net = t->net;
+ struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
BUG_ON(!itn->fb_tunnel_dev);
switch (cmd) {
case SIOCGETTUNNEL:
- t = NULL;
- if (dev == itn->fb_tunnel_dev)
+ if (dev == itn->fb_tunnel_dev) {
t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
- if (t == NULL)
- t = netdev_priv(dev);
+ if (t == NULL)
+ t = netdev_priv(dev);
+ }
memcpy(p, &t->parms, sizeof(*p));
break;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index e0c2b1d..bcf206c 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -46,7 +46,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
-int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
+int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df, bool xnet)
{
@@ -76,7 +76,7 @@
iph->ttl = ttl;
__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
- err = ip_local_out(skb);
+ err = ip_local_out_sk(sk, skb);
if (unlikely(net_xmit_eval(err)))
pkt_len = 0;
return pkt_len;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 28863570..d84dc8d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -455,7 +455,7 @@
struct mr_table *mrt;
struct flowi4 fl4 = {
.flowi4_oif = dev->ifindex,
- .flowi4_iif = skb->skb_iif,
+ .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi4_mark = skb->mark,
};
int err;
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index c49dcd0..4bfaedf 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -89,11 +89,8 @@
if (ipv4_is_multicast(iph->daddr)) {
if (ipv4_is_zeronet(iph->saddr))
return ipv4_is_local_multicast(iph->daddr) ^ invert;
- flow.flowi4_iif = 0;
- } else {
- flow.flowi4_iif = LOOPBACK_IFINDEX;
}
-
+ flow.flowi4_iif = LOOPBACK_IFINDEX;
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_oif = 0;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index f4b19e5..8210964 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -252,26 +252,33 @@
{
struct net *net = sock_net(sk);
kgid_t group = current_egid();
- struct group_info *group_info = get_current_groups();
- int i, j, count = group_info->ngroups;
+ struct group_info *group_info;
+ int i, j, count;
kgid_t low, high;
+ int ret = 0;
inet_get_ping_group_range_net(net, &low, &high);
if (gid_lte(low, group) && gid_lte(group, high))
return 0;
+ group_info = get_current_groups();
+ count = group_info->ngroups;
for (i = 0; i < group_info->nblocks; i++) {
int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
for (j = 0; j < cp_count; j++) {
kgid_t gid = group_info->blocks[i][j];
if (gid_lte(low, gid) && gid_lte(gid, high))
- return 0;
+ goto out_release_group;
}
count -= cp_count;
}
- return -EACCES;
+ ret = -EACCES;
+
+out_release_group:
+ put_group_info(group_info);
+ return ret;
}
EXPORT_SYMBOL_GPL(ping_init_sock);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 34d094c..db1e0da 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1129,7 +1129,7 @@
dst_set_expires(&rt->dst, 0);
}
-static int ip_rt_bug(struct sk_buff *skb)
+static int ip_rt_bug(struct sock *sk, struct sk_buff *skb)
{
pr_debug("%s: %pI4 -> %pI4, %s\n",
__func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
@@ -1700,8 +1700,7 @@
if (res.type == RTN_LOCAL) {
err = fib_validate_source(skb, saddr, daddr, tos,
- LOOPBACK_IFINDEX,
- dev, in_dev, &itag);
+ 0, dev, in_dev, &itag);
if (err < 0)
goto martian_source_keep_err;
goto local_input;
@@ -2218,7 +2217,7 @@
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard;
+ new->output = dst_discard_sk;
new->dev = ort->dst.dev;
if (new->dev)
@@ -2357,7 +2356,7 @@
}
} else
#endif
- if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
+ if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
goto nla_put_failure;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 699fb10..025e250 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -981,7 +981,7 @@
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
tcp_skb_pcount(skb));
- err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
+ err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
if (likely(err <= 0))
return err;
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index baa0f63..40e701f 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -86,7 +86,7 @@
return xfrm_output(skb);
}
-int xfrm4_output(struct sk_buff *skb)
+int xfrm4_output(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index c913818..d4ade34 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -224,9 +224,8 @@
return dst;
}
-int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused)
{
- struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
struct flowi6 fl6;
struct dst_entry *dst;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c98338b..9d92146 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1559,6 +1559,15 @@
return 0;
}
+static void ip6gre_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ if (dev != ign->fb_tunnel_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
static size_t ip6gre_get_size(const struct net_device *dev)
{
return
@@ -1636,6 +1645,7 @@
.validate = ip6gre_tunnel_validate,
.newlink = ip6gre_newlink,
.changelink = ip6gre_changelink,
+ .dellink = ip6gre_dellink,
.get_size = ip6gre_get_size,
.fill_info = ip6gre_fill_info,
};
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3284d61..40e7581 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -132,7 +132,7 @@
return ip6_finish_output2(skb);
}
-int ip6_output(struct sk_buff *skb)
+int ip6_output(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index e1df691..b05b609 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1340,8 +1340,8 @@
int err = 0;
struct ip6_tnl_parm p;
struct __ip6_tnl_parm p1;
- struct ip6_tnl *t = NULL;
- struct net *net = dev_net(dev);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
switch (cmd) {
@@ -1353,11 +1353,11 @@
}
ip6_tnl_parm_from_user(&p1, &p);
t = ip6_tnl_locate(net, &p1, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
} else {
memset(&p, 0, sizeof(p));
}
- if (t == NULL)
- t = netdev_priv(dev);
ip6_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
err = -EFAULT;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8737400..8659067 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -700,7 +700,7 @@
struct mr6_table *mrt;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
- .flowi6_iif = skb->skb_iif,
+ .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_mark = skb->mark,
};
int err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5015c50..4011617 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -84,9 +84,9 @@
static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
-static int ip6_pkt_discard_out(struct sk_buff *skb);
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
static int ip6_pkt_prohibit(struct sk_buff *skb);
-static int ip6_pkt_prohibit_out(struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
@@ -290,7 +290,7 @@
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
.input = dst_discard,
- .output = dst_discard,
+ .output = dst_discard_sk,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_protocol = RTPROT_KERNEL,
@@ -1058,7 +1058,7 @@
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard;
+ new->output = dst_discard_sk;
if (dst_metrics_read_only(&ort->dst))
new->_metrics = ort->dst._metrics;
@@ -1338,7 +1338,7 @@
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
- return mtu;
+ goto out;
mtu = IPV6_MIN_MTU;
@@ -1348,7 +1348,8 @@
mtu = idev->cnf.mtu6;
rcu_read_unlock();
- return mtu;
+out:
+ return min_t(unsigned int, mtu, IP6_MAX_MTU);
}
static struct dst_entry *icmp6_dst_gc_list;
@@ -1576,7 +1577,7 @@
switch (cfg->fc_type) {
case RTN_BLACKHOLE:
rt->dst.error = -EINVAL;
- rt->dst.output = dst_discard;
+ rt->dst.output = dst_discard_sk;
rt->dst.input = dst_discard;
break;
case RTN_PROHIBIT:
@@ -2128,7 +2129,7 @@
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_discard_out(struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
@@ -2139,7 +2140,7 @@
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_prohibit_out(struct sk_buff *skb)
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1693c8d..e5a453c 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -974,8 +974,9 @@
goto out;
}
- err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
- ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+ err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
+ IPPROTO_IPV6, tos, ttl, df,
+ !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
@@ -1126,8 +1127,8 @@
int err = 0;
struct ip_tunnel_parm p;
struct ip_tunnel_prl prl;
- struct ip_tunnel *t;
- struct net *net = dev_net(dev);
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
@@ -1138,16 +1139,15 @@
#ifdef CONFIG_IPV6_SIT_6RD
case SIOCGET6RD:
#endif
- t = NULL;
if (dev == sitn->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
break;
}
t = ipip6_tunnel_locate(net, &p, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
}
- if (t == NULL)
- t = netdev_priv(dev);
err = -EFAULT;
if (cmd == SIOCGETTUNNEL) {
@@ -1243,9 +1243,6 @@
err = -EINVAL;
if (dev == sitn->fb_tunnel_dev)
goto done;
- err = -ENOENT;
- if (!(t = netdev_priv(dev)))
- goto done;
err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
break;
@@ -1261,9 +1258,6 @@
err = -EFAULT;
if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
goto done;
- err = -ENOENT;
- if (!(t = netdev_priv(dev)))
- goto done;
switch (cmd) {
case SIOCDELPRL:
@@ -1291,8 +1285,6 @@
sizeof(ip6rd)))
goto done;
- t = netdev_priv(dev);
-
if (cmd != SIOCDEL6RD) {
err = ipip6_tunnel_update_6rd(t, &ip6rd);
if (err < 0)
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6cd625e..19ef329 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -163,7 +163,7 @@
return x->outer_mode->afinfo->output_finish(skb);
}
-int xfrm6_output(struct sk_buff *skb)
+int xfrm6_output(struct sock *sk, struct sk_buff *skb)
{
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
skb_dst(skb)->dev, __xfrm6_output);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 47f7a54..a4e37d7 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1131,10 +1131,10 @@
skb->local_df = 1;
#if IS_ENABLED(CONFIG_IPV6)
if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
- error = inet6_csk_xmit(skb, NULL);
+ error = inet6_csk_xmit(tunnel->sock, skb, NULL);
else
#endif
- error = ip_queue_xmit(skb, fl);
+ error = ip_queue_xmit(tunnel->sock, skb, fl);
/* Update stats */
if (error >= 0) {
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0b44d85..3397fe6 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -487,7 +487,7 @@
xmit:
/* Queue the packet to IP for output */
- rc = ip_queue_xmit(skb, &inet->cork.fl);
+ rc = ip_queue_xmit(sk, skb, &inet->cork.fl);
rcu_read_unlock();
error:
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6dba48e..75421f2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1795,6 +1795,7 @@
int cpu;
atomic_set(&net->ct.count, 0);
+ seqcount_init(&net->ct.generation);
net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
if (!net->ct.pcpu_lists)
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 7bd03de..825c3e3 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -605,32 +605,14 @@
.expect_policy = &pptp_exp_policy,
};
-static void nf_conntrack_pptp_net_exit(struct net *net)
-{
- nf_ct_gre_keymap_flush(net);
-}
-
-static struct pernet_operations nf_conntrack_pptp_net_ops = {
- .exit = nf_conntrack_pptp_net_exit,
-};
-
static int __init nf_conntrack_pptp_init(void)
{
- int rv;
-
- rv = nf_conntrack_helper_register(&pptp);
- if (rv < 0)
- return rv;
- rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops);
- if (rv < 0)
- nf_conntrack_helper_unregister(&pptp);
- return rv;
+ return nf_conntrack_helper_register(&pptp);
}
static void __exit nf_conntrack_pptp_fini(void)
{
nf_conntrack_helper_unregister(&pptp);
- unregister_pernet_subsys(&nf_conntrack_pptp_net_ops);
}
module_init(nf_conntrack_pptp_init);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9d9c0da..d566573 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -66,7 +66,7 @@
return net_generic(net, proto_gre_net_id);
}
-void nf_ct_gre_keymap_flush(struct net *net)
+static void nf_ct_gre_keymap_flush(struct net *net)
{
struct netns_proto_gre *net_gre = gre_pernet(net);
struct nf_ct_gre_keymap *km, *tmp;
@@ -78,7 +78,6 @@
}
write_unlock_bh(&net_gre->keymap_lock);
}
-EXPORT_SYMBOL(nf_ct_gre_keymap_flush);
static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
const struct nf_conntrack_tuple *t)
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 90998a6..8041053 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -25,9 +25,8 @@
struct nft_data data[NFT_REG_MAX + 1])
{
const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
- u32 mask;
+ u32 mask = nft_cmp_fast_mask(priv->len);
- mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len);
if ((data[priv->sreg].data[0] & mask) == priv->data)
return;
data[NFT_REG_VERDICT].verdict = NFT_BREAK;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 954925d..e2b3f51 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -128,7 +128,7 @@
BUG_ON(err < 0);
desc.len *= BITS_PER_BYTE;
- mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len);
+ mask = nft_cmp_fast_mask(desc.len);
priv->data = data.data[0] & mask;
priv->len = desc.len;
return 0;
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index a3d6951..ebb6e24 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -174,7 +174,7 @@
skb->local_df = 1;
- return iptunnel_xmit(rt, skb, fl.saddr,
+ return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
OVS_CB(skb)->tun_key->ipv4_tos,
OVS_CB(skb)->tun_key->ipv4_ttl, df, false);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 4f6d6f9..39579c3 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1395,35 +1395,44 @@
return false;
}
-/* Update asoc's rwnd for the approximated state in the buffer,
- * and check whether SACK needs to be sent.
- */
-void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
+/* Increase asoc's rwnd by len and send any window update SACK if needed. */
+void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
{
- int rx_count;
struct sctp_chunk *sack;
struct timer_list *timer;
- if (asoc->ep->rcvbuf_policy)
- rx_count = atomic_read(&asoc->rmem_alloc);
- else
- rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+ if (asoc->rwnd_over) {
+ if (asoc->rwnd_over >= len) {
+ asoc->rwnd_over -= len;
+ } else {
+ asoc->rwnd += (len - asoc->rwnd_over);
+ asoc->rwnd_over = 0;
+ }
+ } else {
+ asoc->rwnd += len;
+ }
- if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0)
- asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1;
- else
- asoc->rwnd = 0;
+ /* If we had window pressure, start recovering it
+ * once our rwnd had reached the accumulated pressure
+ * threshold. The idea is to recover slowly, but up
+ * to the initial advertised window.
+ */
+ if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) {
+ int change = min(asoc->pathmtu, asoc->rwnd_press);
+ asoc->rwnd += change;
+ asoc->rwnd_press -= change;
+ }
- pr_debug("%s: asoc:%p rwnd=%u, rx_count=%d, sk_rcvbuf=%d\n",
- __func__, asoc, asoc->rwnd, rx_count,
- asoc->base.sk->sk_rcvbuf);
+ pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n",
+ __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
+ asoc->a_rwnd);
/* Send a window update SACK if the rwnd has increased by at least the
* minimum of the association's PMTU and half of the receive buffer.
* The algorithm used is similar to the one described in
* Section 4.2.3.3 of RFC 1122.
*/
- if (update_peer && sctp_peer_needs_update(asoc)) {
+ if (sctp_peer_needs_update(asoc)) {
asoc->a_rwnd = asoc->rwnd;
pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u "
@@ -1445,6 +1454,45 @@
}
}
+/* Decrease asoc's rwnd by len. */
+void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
+{
+ int rx_count;
+ int over = 0;
+
+ if (unlikely(!asoc->rwnd || asoc->rwnd_over))
+ pr_debug("%s: association:%p has asoc->rwnd:%u, "
+ "asoc->rwnd_over:%u!\n", __func__, asoc,
+ asoc->rwnd, asoc->rwnd_over);
+
+ if (asoc->ep->rcvbuf_policy)
+ rx_count = atomic_read(&asoc->rmem_alloc);
+ else
+ rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+
+ /* If we've reached or overflowed our receive buffer, announce
+ * a 0 rwnd if rwnd would still be positive. Store the
+ * the potential pressure overflow so that the window can be restored
+ * back to original value.
+ */
+ if (rx_count >= asoc->base.sk->sk_rcvbuf)
+ over = 1;
+
+ if (asoc->rwnd >= len) {
+ asoc->rwnd -= len;
+ if (over) {
+ asoc->rwnd_press += asoc->rwnd;
+ asoc->rwnd = 0;
+ }
+ } else {
+ asoc->rwnd_over = len - asoc->rwnd;
+ asoc->rwnd = 0;
+ }
+
+ pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n",
+ __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
+ asoc->rwnd_press);
+}
/* Build the bind address list for the association based on info from the
* local endpoint and the remote peer.
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4e1d0fc..c09757f 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -957,7 +957,7 @@
SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
- return ip_queue_xmit(skb, &transport->fl);
+ return ip_queue_xmit(&inet->sk, skb, &transport->fl);
}
static struct sctp_af sctp_af_inet;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 01e0024..ae9fbeb 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6178,7 +6178,7 @@
* PMTU. In cases, such as loopback, this might be a rather
* large spill over.
*/
- if ((!chunk->data_accepted) && (!asoc->rwnd ||
+ if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over ||
(datalen > asoc->rwnd + asoc->frag_point))) {
/* If this is the next TSN, consider reneging to make
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e13519e..ff20e2d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2115,6 +2115,12 @@
sctp_skb_pull(skb, copied);
skb_queue_head(&sk->sk_receive_queue, skb);
+ /* When only partial message is copied to the user, increase
+ * rwnd by that amount. If all the data in the skb is read,
+ * rwnd is updated when the event is freed.
+ */
+ if (!sctp_ulpevent_is_notification(event))
+ sctp_assoc_rwnd_increase(event->asoc, copied);
goto out;
} else if ((event->msg_flags & MSG_NOTIFICATION) ||
(event->msg_flags & MSG_EOR))
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 8d198ae..85c6465 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -989,7 +989,7 @@
skb = sctp_event2skb(event);
/* Set the owner and charge rwnd for bytes received. */
sctp_ulpevent_set_owner(event, asoc);
- sctp_assoc_rwnd_update(asoc, false);
+ sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb));
if (!skb->data_len)
return;
@@ -1011,7 +1011,6 @@
{
struct sk_buff *skb, *frag;
unsigned int len;
- struct sctp_association *asoc;
/* Current stack structures assume that the rcv buffer is
* per socket. For UDP style sockets this is not true as
@@ -1036,11 +1035,8 @@
}
done:
- asoc = event->asoc;
- sctp_association_hold(asoc);
+ sctp_assoc_rwnd_increase(event->asoc, len);
sctp_ulpevent_release_owner(event);
- sctp_assoc_rwnd_update(asoc, true);
- sctp_association_put(asoc);
}
static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event)
diff --git a/net/socket.c b/net/socket.c
index 1b1e7e6..abf56b2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1880,8 +1880,8 @@
* Receive a datagram from a socket.
*/
-asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
- unsigned int flags)
+SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
+ unsigned int, flags)
{
return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f02f511..c08fbd1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1842,7 +1842,7 @@
xfrm_pol_put(pol);
}
-static int xdst_queue_output(struct sk_buff *skb)
+static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
{
unsigned long sched_next;
struct dst_entry *dst = skb_dst(skb);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index d4b6015..2458a1d 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -97,6 +97,14 @@
bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
}
+static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
+
+static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic)
+{
+ if (WARN_ON(ioapic->rtc_status.pending_eoi < 0))
+ kvm_rtc_eoi_tracking_restore_all(ioapic);
+}
+
static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
{
bool new_val, old_val;
@@ -120,9 +128,8 @@
} else {
__clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
ioapic->rtc_status.pending_eoi--;
+ rtc_status_pending_eoi_check_valid(ioapic);
}
-
- WARN_ON(ioapic->rtc_status.pending_eoi < 0);
}
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
@@ -149,10 +156,10 @@
static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
{
- if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map))
+ if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) {
--ioapic->rtc_status.pending_eoi;
-
- WARN_ON(ioapic->rtc_status.pending_eoi < 0);
+ rtc_status_pending_eoi_check_valid(ioapic);
+ }
}
static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
@@ -353,10 +360,16 @@
ioapic->irr &= ~(1 << irq);
if (irq == RTC_GSI && line_status) {
+ /*
+ * pending_eoi cannot ever become negative (see
+ * rtc_status_pending_eoi_check_valid) and the caller
+ * ensures that it is only called if it is >= zero, namely
+ * if rtc_irq_check_coalesced returns false).
+ */
BUG_ON(ioapic->rtc_status.pending_eoi != 0);
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
ioapic->rtc_status.dest_map);
- ioapic->rtc_status.pending_eoi = ret;
+ ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
} else
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);