| /* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */ |
| /* |
| * aoecmd.c |
| * Filesystem request handling methods |
| */ |
| |
| #include <linux/hdreg.h> |
| #include <linux/blkdev.h> |
| #include <linux/skbuff.h> |
| #include <linux/netdevice.h> |
| #include <linux/genhd.h> |
| #include <asm/unaligned.h> |
| #include "aoe.h" |
| |
| #define TIMERTICK (HZ / 10) |
| #define MINTIMER (2 * TIMERTICK) |
| #define MAXTIMER (HZ << 1) |
| #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */ |
| |
| struct sk_buff * |
| new_skb(ulong len) |
| { |
| struct sk_buff *skb; |
| |
| skb = alloc_skb(len, GFP_ATOMIC); |
| if (skb) { |
| skb->nh.raw = skb->mac.raw = skb->data; |
| skb->protocol = __constant_htons(ETH_P_AOE); |
| skb->priority = 0; |
| skb_put(skb, len); |
| memset(skb->head, 0, len); |
| skb->next = skb->prev = NULL; |
| |
| /* tell the network layer not to perform IP checksums |
| * or to get the NIC to do it |
| */ |
| skb->ip_summed = CHECKSUM_NONE; |
| } |
| return skb; |
| } |
| |
| static struct frame * |
| getframe(struct aoedev *d, int tag) |
| { |
| struct frame *f, *e; |
| |
| f = d->frames; |
| e = f + d->nframes; |
| for (; f<e; f++) |
| if (f->tag == tag) |
| return f; |
| return NULL; |
| } |
| |
| /* |
| * Leave the top bit clear so we have tagspace for userland. |
| * The bottom 16 bits are the xmit tick for rexmit/rttavg processing. |
| * This driver reserves tag -1 to mean "unused frame." |
| */ |
| static int |
| newtag(struct aoedev *d) |
| { |
| register ulong n; |
| |
| n = jiffies & 0xffff; |
| return n |= (++d->lasttag & 0x7fff) << 16; |
| } |
| |
| static int |
| aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h) |
| { |
| u32 host_tag = newtag(d); |
| |
| memcpy(h->src, d->ifp->dev_addr, sizeof h->src); |
| memcpy(h->dst, d->addr, sizeof h->dst); |
| h->type = __constant_cpu_to_be16(ETH_P_AOE); |
| h->verfl = AOE_HVER; |
| h->major = cpu_to_be16(d->aoemajor); |
| h->minor = d->aoeminor; |
| h->cmd = AOECMD_ATA; |
| h->tag = cpu_to_be32(host_tag); |
| |
| return host_tag; |
| } |
| |
| static void |
| aoecmd_ata_rw(struct aoedev *d, struct frame *f) |
| { |
| struct aoe_hdr *h; |
| struct aoe_atahdr *ah; |
| struct buf *buf; |
| struct sk_buff *skb; |
| ulong bcnt; |
| register sector_t sector; |
| char writebit, extbit; |
| |
| writebit = 0x10; |
| extbit = 0x4; |
| |
| buf = d->inprocess; |
| |
| sector = buf->sector; |
| bcnt = buf->bv_resid; |
| if (bcnt > MAXATADATA) |
| bcnt = MAXATADATA; |
| |
| /* initialize the headers & frame */ |
| skb = f->skb; |
| h = (struct aoe_hdr *) skb->mac.raw; |
| ah = (struct aoe_atahdr *) (h+1); |
| skb->len = sizeof *h + sizeof *ah; |
| memset(h, 0, skb->len); |
| f->tag = aoehdr_atainit(d, h); |
| f->waited = 0; |
| f->buf = buf; |
| f->bufaddr = buf->bufaddr; |
| |
| /* set up ata header */ |
| ah->scnt = bcnt >> 9; |
| ah->lba0 = sector; |
| ah->lba1 = sector >>= 8; |
| ah->lba2 = sector >>= 8; |
| ah->lba3 = sector >>= 8; |
| if (d->flags & DEVFL_EXT) { |
| ah->aflags |= AOEAFL_EXT; |
| ah->lba4 = sector >>= 8; |
| ah->lba5 = sector >>= 8; |
| } else { |
| extbit = 0; |
| ah->lba3 &= 0x0f; |
| ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ |
| } |
| |
| if (bio_data_dir(buf->bio) == WRITE) { |
| skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr), |
| offset_in_page(f->bufaddr), bcnt); |
| ah->aflags |= AOEAFL_WRITE; |
| } else { |
| skb_shinfo(skb)->nr_frags = 0; |
| skb->len = ETH_ZLEN; |
| writebit = 0; |
| } |
| |
| ah->cmdstat = WIN_READ | writebit | extbit; |
| |
| /* mark all tracking fields and load out */ |
| buf->nframesout += 1; |
| buf->bufaddr += bcnt; |
| buf->bv_resid -= bcnt; |
| /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */ |
| buf->resid -= bcnt; |
| buf->sector += bcnt >> 9; |
| if (buf->resid == 0) { |
| d->inprocess = NULL; |
| } else if (buf->bv_resid == 0) { |
| buf->bv++; |
| buf->bv_resid = buf->bv->bv_len; |
| buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; |
| } |
| |
| skb->dev = d->ifp; |
| skb_get(skb); |
| skb->next = NULL; |
| if (d->sendq_hd) |
| d->sendq_tl->next = skb; |
| else |
| d->sendq_hd = skb; |
| d->sendq_tl = skb; |
| } |
| |
| /* some callers cannot sleep, and they can call this function, |
| * transmitting the packets later, when interrupts are on |
| */ |
| static struct sk_buff * |
| aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail) |
| { |
| struct aoe_hdr *h; |
| struct aoe_cfghdr *ch; |
| struct sk_buff *skb, *sl, *sl_tail; |
| struct net_device *ifp; |
| |
| sl = sl_tail = NULL; |
| |
| read_lock(&dev_base_lock); |
| for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) { |
| dev_hold(ifp); |
| if (!is_aoe_netif(ifp)) |
| continue; |
| |
| skb = new_skb(sizeof *h + sizeof *ch); |
| if (skb == NULL) { |
| printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n"); |
| continue; |
| } |
| skb->dev = ifp; |
| if (sl_tail == NULL) |
| sl_tail = skb; |
| h = (struct aoe_hdr *) skb->mac.raw; |
| memset(h, 0, sizeof *h + sizeof *ch); |
| |
| memset(h->dst, 0xff, sizeof h->dst); |
| memcpy(h->src, ifp->dev_addr, sizeof h->src); |
| h->type = __constant_cpu_to_be16(ETH_P_AOE); |
| h->verfl = AOE_HVER; |
| h->major = cpu_to_be16(aoemajor); |
| h->minor = aoeminor; |
| h->cmd = AOECMD_CFG; |
| |
| skb->next = sl; |
| sl = skb; |
| } |
| read_unlock(&dev_base_lock); |
| |
| if (tail != NULL) |
| *tail = sl_tail; |
| return sl; |
| } |
| |
| /* enters with d->lock held */ |
| void |
| aoecmd_work(struct aoedev *d) |
| { |
| struct frame *f; |
| struct buf *buf; |
| |
| if (d->flags & DEVFL_PAUSE) { |
| if (!aoedev_isbusy(d)) |
| d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor, |
| d->aoeminor, &d->sendq_tl); |
| return; |
| } |
| |
| loop: |
| f = getframe(d, FREETAG); |
| if (f == NULL) |
| return; |
| if (d->inprocess == NULL) { |
| if (list_empty(&d->bufq)) |
| return; |
| buf = container_of(d->bufq.next, struct buf, bufs); |
| list_del(d->bufq.next); |
| /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */ |
| d->inprocess = buf; |
| } |
| aoecmd_ata_rw(d, f); |
| goto loop; |
| } |
| |
| static void |
| rexmit(struct aoedev *d, struct frame *f) |
| { |
| struct sk_buff *skb; |
| struct aoe_hdr *h; |
| char buf[128]; |
| u32 n; |
| |
| n = newtag(d); |
| |
| snprintf(buf, sizeof buf, |
| "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n", |
| "retransmit", |
| d->aoemajor, d->aoeminor, f->tag, jiffies, n); |
| aoechr_error(buf); |
| |
| skb = f->skb; |
| h = (struct aoe_hdr *) skb->mac.raw; |
| f->tag = n; |
| h->tag = cpu_to_be32(n); |
| memcpy(h->dst, d->addr, sizeof h->dst); |
| memcpy(h->src, d->ifp->dev_addr, sizeof h->src); |
| |
| skb->dev = d->ifp; |
| skb_get(skb); |
| skb->next = NULL; |
| if (d->sendq_hd) |
| d->sendq_tl->next = skb; |
| else |
| d->sendq_hd = skb; |
| d->sendq_tl = skb; |
| } |
| |
| static int |
| tsince(int tag) |
| { |
| int n; |
| |
| n = jiffies & 0xffff; |
| n -= tag & 0xffff; |
| if (n < 0) |
| n += 1<<16; |
| return n; |
| } |
| |
| static void |
| rexmit_timer(ulong vp) |
| { |
| struct aoedev *d; |
| struct frame *f, *e; |
| struct sk_buff *sl; |
| register long timeout; |
| ulong flags, n; |
| |
| d = (struct aoedev *) vp; |
| sl = NULL; |
| |
| /* timeout is always ~150% of the moving average */ |
| timeout = d->rttavg; |
| timeout += timeout >> 1; |
| |
| spin_lock_irqsave(&d->lock, flags); |
| |
| if (d->flags & DEVFL_TKILL) { |
| spin_unlock_irqrestore(&d->lock, flags); |
| return; |
| } |
| f = d->frames; |
| e = f + d->nframes; |
| for (; f<e; f++) { |
| if (f->tag != FREETAG && tsince(f->tag) >= timeout) { |
| n = f->waited += timeout; |
| n /= HZ; |
| if (n > MAXWAIT) { /* waited too long. device failure. */ |
| aoedev_downdev(d); |
| break; |
| } |
| rexmit(d, f); |
| } |
| } |
| |
| sl = d->sendq_hd; |
| d->sendq_hd = d->sendq_tl = NULL; |
| if (sl) { |
| n = d->rttavg <<= 1; |
| if (n > MAXTIMER) |
| d->rttavg = MAXTIMER; |
| } |
| |
| d->timer.expires = jiffies + TIMERTICK; |
| add_timer(&d->timer); |
| |
| spin_unlock_irqrestore(&d->lock, flags); |
| |
| aoenet_xmit(sl); |
| } |
| |
| /* this function performs work that has been deferred until sleeping is OK |
| */ |
| void |
| aoecmd_sleepwork(void *vp) |
| { |
| struct aoedev *d = (struct aoedev *) vp; |
| |
| if (d->flags & DEVFL_GDALLOC) |
| aoeblk_gdalloc(d); |
| |
| if (d->flags & DEVFL_NEWSIZE) { |
| struct block_device *bd; |
| unsigned long flags; |
| u64 ssize; |
| |
| ssize = d->gd->capacity; |
| bd = bdget_disk(d->gd, 0); |
| |
| if (bd) { |
| mutex_lock(&bd->bd_inode->i_mutex); |
| i_size_write(bd->bd_inode, (loff_t)ssize<<9); |
| mutex_unlock(&bd->bd_inode->i_mutex); |
| bdput(bd); |
| } |
| spin_lock_irqsave(&d->lock, flags); |
| d->flags |= DEVFL_UP; |
| d->flags &= ~DEVFL_NEWSIZE; |
| spin_unlock_irqrestore(&d->lock, flags); |
| } |
| } |
| |
| static void |
| ataid_complete(struct aoedev *d, unsigned char *id) |
| { |
| u64 ssize; |
| u16 n; |
| |
| /* word 83: command set supported */ |
| n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1])); |
| |
| /* word 86: command set/feature enabled */ |
| n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1])); |
| |
| if (n & (1<<10)) { /* bit 10: LBA 48 */ |
| d->flags |= DEVFL_EXT; |
| |
| /* word 100: number lba48 sectors */ |
| ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1])); |
| |
| /* set as in ide-disk.c:init_idedisk_capacity */ |
| d->geo.cylinders = ssize; |
| d->geo.cylinders /= (255 * 63); |
| d->geo.heads = 255; |
| d->geo.sectors = 63; |
| } else { |
| d->flags &= ~DEVFL_EXT; |
| |
| /* number lba28 sectors */ |
| ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1])); |
| |
| /* NOTE: obsolete in ATA 6 */ |
| d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1])); |
| d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1])); |
| d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1])); |
| } |
| |
| if (d->ssize != ssize) |
| printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu " |
| "sectors\n", (unsigned long long)mac_addr(d->addr), |
| d->aoemajor, d->aoeminor, |
| d->fw_ver, (long long)ssize); |
| d->ssize = ssize; |
| d->geo.start = 0; |
| if (d->gd != NULL) { |
| d->gd->capacity = ssize; |
| d->flags |= DEVFL_NEWSIZE; |
| } else { |
| if (d->flags & DEVFL_GDALLOC) { |
| printk(KERN_INFO "aoe: %s: %s e%lu.%lu, %s\n", |
| __FUNCTION__, |
| "can't schedule work for", |
| d->aoemajor, d->aoeminor, |
| "it's already on! (This really shouldn't happen).\n"); |
| return; |
| } |
| d->flags |= DEVFL_GDALLOC; |
| } |
| schedule_work(&d->work); |
| } |
| |
| static void |
| calc_rttavg(struct aoedev *d, int rtt) |
| { |
| register long n; |
| |
| n = rtt; |
| if (n < MINTIMER) |
| n = MINTIMER; |
| else if (n > MAXTIMER) |
| n = MAXTIMER; |
| |
| /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */ |
| n -= d->rttavg; |
| d->rttavg += n >> 2; |
| } |
| |
| void |
| aoecmd_ata_rsp(struct sk_buff *skb) |
| { |
| struct aoedev *d; |
| struct aoe_hdr *hin; |
| struct aoe_atahdr *ahin, *ahout; |
| struct frame *f; |
| struct buf *buf; |
| struct sk_buff *sl; |
| register long n; |
| ulong flags; |
| char ebuf[128]; |
| u16 aoemajor; |
| |
| hin = (struct aoe_hdr *) skb->mac.raw; |
| aoemajor = be16_to_cpu(hin->major); |
| d = aoedev_by_aoeaddr(aoemajor, hin->minor); |
| if (d == NULL) { |
| snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " |
| "for unknown device %d.%d\n", |
| aoemajor, hin->minor); |
| aoechr_error(ebuf); |
| return; |
| } |
| |
| spin_lock_irqsave(&d->lock, flags); |
| |
| f = getframe(d, be32_to_cpu(hin->tag)); |
| if (f == NULL) { |
| spin_unlock_irqrestore(&d->lock, flags); |
| snprintf(ebuf, sizeof ebuf, |
| "%15s e%d.%d tag=%08x@%08lx\n", |
| "unexpected rsp", |
| be16_to_cpu(hin->major), |
| hin->minor, |
| be32_to_cpu(hin->tag), |
| jiffies); |
| aoechr_error(ebuf); |
| return; |
| } |
| |
| calc_rttavg(d, tsince(f->tag)); |
| |
| ahin = (struct aoe_atahdr *) (hin+1); |
| ahout = (struct aoe_atahdr *) (f->skb->mac.raw + sizeof(struct aoe_hdr)); |
| buf = f->buf; |
| |
| if (ahout->cmdstat == WIN_IDENTIFY) |
| d->flags &= ~DEVFL_PAUSE; |
| if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ |
| printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh " |
| "stat=%2.2Xh from e%ld.%ld\n", |
| ahout->cmdstat, ahin->cmdstat, |
| d->aoemajor, d->aoeminor); |
| if (buf) |
| buf->flags |= BUFFL_FAIL; |
| } else { |
| switch (ahout->cmdstat) { |
| case WIN_READ: |
| case WIN_READ_EXT: |
| n = ahout->scnt << 9; |
| if (skb->len - sizeof *hin - sizeof *ahin < n) { |
| printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt " |
| "ata data size in read. skb->len=%d\n", |
| skb->len); |
| /* fail frame f? just returning will rexmit. */ |
| spin_unlock_irqrestore(&d->lock, flags); |
| return; |
| } |
| memcpy(f->bufaddr, ahin+1, n); |
| case WIN_WRITE: |
| case WIN_WRITE_EXT: |
| break; |
| case WIN_IDENTIFY: |
| if (skb->len - sizeof *hin - sizeof *ahin < 512) { |
| printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size " |
| "in ataid. skb->len=%d\n", skb->len); |
| spin_unlock_irqrestore(&d->lock, flags); |
| return; |
| } |
| ataid_complete(d, (char *) (ahin+1)); |
| break; |
| default: |
| printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized " |
| "outbound ata command %2.2Xh for %d.%d\n", |
| ahout->cmdstat, |
| be16_to_cpu(hin->major), |
| hin->minor); |
| } |
| } |
| |
| if (buf) { |
| buf->nframesout -= 1; |
| if (buf->nframesout == 0 && buf->resid == 0) { |
| unsigned long duration = jiffies - buf->start_time; |
| unsigned long n_sect = buf->bio->bi_size >> 9; |
| struct gendisk *disk = d->gd; |
| const int rw = bio_data_dir(buf->bio); |
| |
| disk_stat_inc(disk, ios[rw]); |
| disk_stat_add(disk, ticks[rw], duration); |
| disk_stat_add(disk, sectors[rw], n_sect); |
| disk_stat_add(disk, io_ticks, duration); |
| n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; |
| bio_endio(buf->bio, buf->bio->bi_size, n); |
| mempool_free(buf, d->bufpool); |
| } |
| } |
| |
| f->buf = NULL; |
| f->tag = FREETAG; |
| |
| aoecmd_work(d); |
| sl = d->sendq_hd; |
| d->sendq_hd = d->sendq_tl = NULL; |
| |
| spin_unlock_irqrestore(&d->lock, flags); |
| aoenet_xmit(sl); |
| } |
| |
| void |
| aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) |
| { |
| struct sk_buff *sl; |
| |
| sl = aoecmd_cfg_pkts(aoemajor, aoeminor, NULL); |
| |
| aoenet_xmit(sl); |
| } |
| |
| /* |
| * Since we only call this in one place (and it only prepares one frame) |
| * we just return the skb. Usually we'd chain it up to the aoedev sendq. |
| */ |
| static struct sk_buff * |
| aoecmd_ata_id(struct aoedev *d) |
| { |
| struct aoe_hdr *h; |
| struct aoe_atahdr *ah; |
| struct frame *f; |
| struct sk_buff *skb; |
| |
| f = getframe(d, FREETAG); |
| if (f == NULL) { |
| printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. " |
| "This shouldn't happen.\n"); |
| return NULL; |
| } |
| |
| /* initialize the headers & frame */ |
| skb = f->skb; |
| h = (struct aoe_hdr *) skb->mac.raw; |
| ah = (struct aoe_atahdr *) (h+1); |
| skb->len = sizeof *h + sizeof *ah; |
| memset(h, 0, skb->len); |
| f->tag = aoehdr_atainit(d, h); |
| f->waited = 0; |
| |
| /* set up ata header */ |
| ah->scnt = 1; |
| ah->cmdstat = WIN_IDENTIFY; |
| ah->lba3 = 0xa0; |
| |
| skb->dev = d->ifp; |
| skb_get(skb); |
| |
| d->rttavg = MAXTIMER; |
| d->timer.function = rexmit_timer; |
| |
| return skb; |
| } |
| |
| void |
| aoecmd_cfg_rsp(struct sk_buff *skb) |
| { |
| struct aoedev *d; |
| struct aoe_hdr *h; |
| struct aoe_cfghdr *ch; |
| ulong flags, sysminor, aoemajor; |
| u16 bufcnt; |
| struct sk_buff *sl; |
| enum { MAXFRAMES = 16 }; |
| |
| h = (struct aoe_hdr *) skb->mac.raw; |
| ch = (struct aoe_cfghdr *) (h+1); |
| |
| /* |
| * Enough people have their dip switches set backwards to |
| * warrant a loud message for this special case. |
| */ |
| aoemajor = be16_to_cpu(h->major); |
| if (aoemajor == 0xfff) { |
| printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf " |
| "address is all ones. Check shelf dip switches\n"); |
| return; |
| } |
| |
| sysminor = SYSMINOR(aoemajor, h->minor); |
| if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) { |
| printk(KERN_INFO |
| "aoe: e%ld.%d: minor number too large\n", |
| aoemajor, (int) h->minor); |
| return; |
| } |
| |
| bufcnt = be16_to_cpu(ch->bufcnt); |
| if (bufcnt > MAXFRAMES) /* keep it reasonable */ |
| bufcnt = MAXFRAMES; |
| |
| d = aoedev_by_sysminor_m(sysminor, bufcnt); |
| if (d == NULL) { |
| printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device sysminor_m failure\n"); |
| return; |
| } |
| |
| spin_lock_irqsave(&d->lock, flags); |
| |
| /* permit device to migrate mac and network interface */ |
| d->ifp = skb->dev; |
| memcpy(d->addr, h->src, sizeof d->addr); |
| |
| /* don't change users' perspective */ |
| if (d->nopen && !(d->flags & DEVFL_PAUSE)) { |
| spin_unlock_irqrestore(&d->lock, flags); |
| return; |
| } |
| d->flags |= DEVFL_PAUSE; /* force pause */ |
| d->fw_ver = be16_to_cpu(ch->fwver); |
| |
| /* check for already outstanding ataid */ |
| sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL; |
| |
| spin_unlock_irqrestore(&d->lock, flags); |
| |
| aoenet_xmit(sl); |
| } |
| |