| /* |
| * Copyright (C) 2016 Red Hat, Inc. |
| * Author: Michael S. Tsirkin <mst@redhat.com> |
| * This work is licensed under the terms of the GNU GPL, version 2. |
| * |
| * Simple descriptor-based ring. virtio 0.9 compatible event index is used for |
| * signalling, unconditionally. |
| */ |
| #define _GNU_SOURCE |
| #include "main.h" |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <string.h> |
| |
| /* Next - Where next entry will be written. |
| * Prev - "Next" value when event triggered previously. |
| * Event - Peer requested event after writing this entry. |
| */ |
| static inline bool need_event(unsigned short event, |
| unsigned short next, |
| unsigned short prev) |
| { |
| return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); |
| } |
| |
| /* Design: |
| * Guest adds descriptors with unique index values and DESC_HW in flags. |
| * Host overwrites used descriptors with correct len, index, and DESC_HW clear. |
| * Flags are always set last. |
| */ |
| #define DESC_HW 0x1 |
| |
| struct desc { |
| unsigned short flags; |
| unsigned short index; |
| unsigned len; |
| unsigned long long addr; |
| }; |
| |
| /* how much padding is needed to avoid false cache sharing */ |
| #define HOST_GUEST_PADDING 0x80 |
| |
| /* Mostly read */ |
| struct event { |
| unsigned short kick_index; |
| unsigned char reserved0[HOST_GUEST_PADDING - 2]; |
| unsigned short call_index; |
| unsigned char reserved1[HOST_GUEST_PADDING - 2]; |
| }; |
| |
| struct data { |
| void *buf; /* descriptor is writeable, we can't get buf from there */ |
| void *data; |
| } *data; |
| |
| struct desc *ring; |
| struct event *event; |
| |
| struct guest { |
| unsigned avail_idx; |
| unsigned last_used_idx; |
| unsigned num_free; |
| unsigned kicked_avail_idx; |
| unsigned char reserved[HOST_GUEST_PADDING - 12]; |
| } guest; |
| |
| struct host { |
| /* we do not need to track last avail index |
| * unless we have more than one in flight. |
| */ |
| unsigned used_idx; |
| unsigned called_used_idx; |
| unsigned char reserved[HOST_GUEST_PADDING - 4]; |
| } host; |
| |
| /* implemented by ring */ |
| void alloc_ring(void) |
| { |
| int ret; |
| int i; |
| |
| ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); |
| if (ret) { |
| perror("Unable to allocate ring buffer.\n"); |
| exit(3); |
| } |
| event = malloc(sizeof *event); |
| if (!event) { |
| perror("Unable to allocate event buffer.\n"); |
| exit(3); |
| } |
| memset(event, 0, sizeof *event); |
| guest.avail_idx = 0; |
| guest.kicked_avail_idx = -1; |
| guest.last_used_idx = 0; |
| host.used_idx = 0; |
| host.called_used_idx = -1; |
| for (i = 0; i < ring_size; ++i) { |
| struct desc desc = { |
| .index = i, |
| }; |
| ring[i] = desc; |
| } |
| guest.num_free = ring_size; |
| data = malloc(ring_size * sizeof *data); |
| if (!data) { |
| perror("Unable to allocate data buffer.\n"); |
| exit(3); |
| } |
| memset(data, 0, ring_size * sizeof *data); |
| } |
| |
| /* guest side */ |
| int add_inbuf(unsigned len, void *buf, void *datap) |
| { |
| unsigned head, index; |
| |
| if (!guest.num_free) |
| return -1; |
| |
| guest.num_free--; |
| head = (ring_size - 1) & (guest.avail_idx++); |
| |
| /* Start with a write. On MESI architectures this helps |
| * avoid a shared state with consumer that is polling this descriptor. |
| */ |
| ring[head].addr = (unsigned long)(void*)buf; |
| ring[head].len = len; |
| /* read below might bypass write above. That is OK because it's just an |
| * optimization. If this happens, we will get the cache line in a |
| * shared state which is unfortunate, but probably not worth it to |
| * add an explicit full barrier to avoid this. |
| */ |
| barrier(); |
| index = ring[head].index; |
| data[index].buf = buf; |
| data[index].data = datap; |
| /* Barrier A (for pairing) */ |
| smp_release(); |
| ring[head].flags = DESC_HW; |
| |
| return 0; |
| } |
| |
| void *get_buf(unsigned *lenp, void **bufp) |
| { |
| unsigned head = (ring_size - 1) & guest.last_used_idx; |
| unsigned index; |
| void *datap; |
| |
| if (ring[head].flags & DESC_HW) |
| return NULL; |
| /* Barrier B (for pairing) */ |
| smp_acquire(); |
| *lenp = ring[head].len; |
| index = ring[head].index & (ring_size - 1); |
| datap = data[index].data; |
| *bufp = data[index].buf; |
| data[index].buf = NULL; |
| data[index].data = NULL; |
| guest.num_free++; |
| guest.last_used_idx++; |
| return datap; |
| } |
| |
| bool used_empty() |
| { |
| unsigned head = (ring_size - 1) & guest.last_used_idx; |
| |
| return (ring[head].flags & DESC_HW); |
| } |
| |
| void disable_call() |
| { |
| /* Doing nothing to disable calls might cause |
| * extra interrupts, but reduces the number of cache misses. |
| */ |
| } |
| |
| bool enable_call() |
| { |
| event->call_index = guest.last_used_idx; |
| /* Flush call index write */ |
| /* Barrier D (for pairing) */ |
| smp_mb(); |
| return used_empty(); |
| } |
| |
| void kick_available(void) |
| { |
| /* Flush in previous flags write */ |
| /* Barrier C (for pairing) */ |
| smp_mb(); |
| if (!need_event(event->kick_index, |
| guest.avail_idx, |
| guest.kicked_avail_idx)) |
| return; |
| |
| guest.kicked_avail_idx = guest.avail_idx; |
| kick(); |
| } |
| |
| /* host side */ |
| void disable_kick() |
| { |
| /* Doing nothing to disable kicks might cause |
| * extra interrupts, but reduces the number of cache misses. |
| */ |
| } |
| |
| bool enable_kick() |
| { |
| event->kick_index = host.used_idx; |
| /* Barrier C (for pairing) */ |
| smp_mb(); |
| return avail_empty(); |
| } |
| |
| bool avail_empty() |
| { |
| unsigned head = (ring_size - 1) & host.used_idx; |
| |
| return !(ring[head].flags & DESC_HW); |
| } |
| |
| bool use_buf(unsigned *lenp, void **bufp) |
| { |
| unsigned head = (ring_size - 1) & host.used_idx; |
| |
| if (!(ring[head].flags & DESC_HW)) |
| return false; |
| |
| /* make sure length read below is not speculated */ |
| /* Barrier A (for pairing) */ |
| smp_acquire(); |
| |
| /* simple in-order completion: we don't need |
| * to touch index at all. This also means we |
| * can just modify the descriptor in-place. |
| */ |
| ring[head].len--; |
| /* Make sure len is valid before flags. |
| * Note: alternative is to write len and flags in one access - |
| * possible on 64 bit architectures but wmb is free on Intel anyway |
| * so I have no way to test whether it's a gain. |
| */ |
| /* Barrier B (for pairing) */ |
| smp_release(); |
| ring[head].flags = 0; |
| host.used_idx++; |
| return true; |
| } |
| |
| void call_used(void) |
| { |
| /* Flush in previous flags write */ |
| /* Barrier D (for pairing) */ |
| smp_mb(); |
| if (!need_event(event->call_index, |
| host.used_idx, |
| host.called_used_idx)) |
| return; |
| |
| host.called_used_idx = host.used_idx; |
| call(); |
| } |