1 /*
2  * Copyright (c) 2014-2015 Travis Geiselbrecht
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files
6  * (the "Software"), to deal in the Software without restriction,
7  * including without limitation the rights to use, copy, modify, merge,
8  * publish, distribute, sublicense, and/or sell copies of the Software,
9  * and to permit persons to whom the Software is furnished to do so,
10  * subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 #include <dev/virtio/block.h>
24 
25 #include <debug.h>
26 #include <assert.h>
27 #include <trace.h>
28 #include <compiler.h>
29 #include <list.h>
30 #include <err.h>
31 #include <kernel/thread.h>
32 #include <kernel/event.h>
33 #include <kernel/mutex.h>
34 #include <kernel/vm.h>
35 #include <lib/bio.h>
36 
37 #define LOCAL_TRACE 0
38 
39 struct virtio_blk_config {
40     uint64_t capacity;
41     uint32_t size_max;
42     uint32_t seg_max;
43     struct virtio_blk_geometry {
44         uint16_t cylinders;
45         uint8_t heads;
46         uint8_t sectors;
47     } geometry;
48     uint32_t blk_size;
49 } __PACKED;
50 
51 struct virtio_blk_req {
52     uint32_t type;
53     uint32_t ioprio;
54     uint64_t sector;
55 } __PACKED;
56 
57 #define VIRTIO_BLK_F_BARRIER  (1<<0)
58 #define VIRTIO_BLK_F_SIZE_MAX (1<<1)
59 #define VIRTIO_BLK_F_SEG_MAX  (1<<2)
60 #define VIRTIO_BLK_F_GEOMETRY (1<<4)
61 #define VIRTIO_BLK_F_RO       (1<<5)
62 #define VIRTIO_BLK_F_BLK_SIZE (1<<6)
63 #define VIRTIO_BLK_F_SCSI     (1<<7)
64 #define VIRTIO_BLK_F_FLUSH    (1<<9)
65 #define VIRTIO_BLK_F_TOPOLOGY (1<<10)
66 #define VIRTIO_BLK_F_CONFIG_WCE (1<<11)
67 
68 #define VIRTIO_BLK_T_IN         0
69 #define VIRTIO_BLK_T_OUT        1
70 #define VIRTIO_BLK_T_FLUSH      4
71 
72 #define VIRTIO_BLK_S_OK         0
73 #define VIRTIO_BLK_S_IOERR      1
74 #define VIRTIO_BLK_S_UNSUPP     2
75 
76 static enum handler_return virtio_block_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e);
77 static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block, uint count);
78 static ssize_t virtio_bdev_write_block(struct bdev *bdev, const void *buf, bnum_t block, uint count);
79 
80 struct virtio_block_dev {
81     struct virtio_device *dev;
82 
83     mutex_t lock;
84     event_t io_event;
85 
86     /* bio block device */
87     bdev_t bdev;
88 
89     /* one blk_req structure for io, not crossing a page boundary */
90     struct virtio_blk_req *blk_req;
91     paddr_t blk_req_phys;
92 
93     /* one uint8_t response word */
94     uint8_t blk_response;
95     paddr_t blk_response_phys;
96 };
97 
virtio_block_init(struct virtio_device * dev,uint32_t host_features)98 status_t virtio_block_init(struct virtio_device *dev, uint32_t host_features)
99 {
100     LTRACEF("dev %p, host_features 0x%x\n", dev, host_features);
101 
102     /* allocate a new block device */
103     struct virtio_block_dev *bdev = malloc(sizeof(struct virtio_block_dev));
104     if (!bdev)
105         return ERR_NO_MEMORY;
106 
107     mutex_init(&bdev->lock);
108     event_init(&bdev->io_event, false, EVENT_FLAG_AUTOUNSIGNAL);
109 
110     bdev->dev = dev;
111     dev->priv = bdev;
112 
113     bdev->blk_req = memalign(sizeof(struct virtio_blk_req), sizeof(struct virtio_blk_req));
114 #if WITH_KERNEL_VM
115     bdev->blk_req_phys = vaddr_to_paddr(bdev->blk_req);
116 #else
117     bdev->blk_freq_phys = (uint64_t)(uintptr_t)bdev->blk_req;
118 #endif
119     LTRACEF("blk_req structure at %p (0x%lx phys)\n", bdev->blk_req, bdev->blk_req_phys);
120 
121 #if WITH_KERNEL_VM
122     bdev->blk_response_phys = vaddr_to_paddr(&bdev->blk_response);
123 #else
124     bdev->blk_response_phys = (uint64_t)(uintptr_t)&bdev->blk_response;
125 #endif
126 
127     /* make sure the device is reset */
128     virtio_reset_device(dev);
129 
130     volatile struct virtio_blk_config *config = (struct virtio_blk_config *)dev->config_ptr;
131 
132     LTRACEF("capacity 0x%llx\n", config->capacity);
133     LTRACEF("size_max 0x%x\n", config->size_max);
134     LTRACEF("seg_max  0x%x\n", config->seg_max);
135     LTRACEF("blk_size 0x%x\n", config->blk_size);
136 
137     /* ack and set the driver status bit */
138     virtio_status_acknowledge_driver(dev);
139 
140     // XXX check features bits and ack/nak them
141 
142     /* allocate a virtio ring */
143     virtio_alloc_ring(dev, 0, 256);
144 
145     /* set our irq handler */
146     dev->irq_driver_callback = &virtio_block_irq_driver_callback;
147 
148     /* set DRIVER_OK */
149     virtio_status_driver_ok(dev);
150 
151     /* construct the block device */
152     static uint8_t found_index = 0;
153     char buf[16];
154     snprintf(buf, sizeof(buf), "virtio%u", found_index++);
155     bio_initialize_bdev(&bdev->bdev, buf,
156                         config->blk_size, config->capacity,
157                         0, NULL, BIO_FLAGS_NONE);
158 
159     /* override our block device hooks */
160     bdev->bdev.read_block = &virtio_bdev_read_block;
161     bdev->bdev.write_block = &virtio_bdev_write_block;
162 
163     bio_register_device(&bdev->bdev);
164 
165     printf("found virtio block device of size %lld\n", config->capacity * config->blk_size);
166 
167     return NO_ERROR;
168 }
169 
virtio_block_irq_driver_callback(struct virtio_device * dev,uint ring,const struct vring_used_elem * e)170 static enum handler_return virtio_block_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e)
171 {
172     struct virtio_block_dev *bdev = (struct virtio_block_dev *)dev->priv;
173 
174     LTRACEF("dev %p, ring %u, e %p, id %u, len %u\n", dev, ring, e, e->id, e->len);
175 
176     /* parse our descriptor chain, add back to the free queue */
177     uint16_t i = e->id;
178     for (;;) {
179         int next;
180         struct vring_desc *desc = virtio_desc_index_to_desc(dev, ring, i);
181 
182         //virtio_dump_desc(desc);
183 
184         if (desc->flags & VRING_DESC_F_NEXT) {
185             next = desc->next;
186         } else {
187             /* end of chain */
188             next = -1;
189         }
190 
191         virtio_free_desc(dev, ring, i);
192 
193         if (next < 0)
194             break;
195         i = next;
196     }
197 
198     /* signal our event */
199     event_signal(&bdev->io_event, false);
200 
201     return INT_RESCHEDULE;
202 }
203 
virtio_block_read_write(struct virtio_device * dev,void * buf,off_t offset,size_t len,bool write)204 ssize_t virtio_block_read_write(struct virtio_device *dev, void *buf, off_t offset, size_t len, bool write)
205 {
206     struct virtio_block_dev *bdev = (struct virtio_block_dev *)dev->priv;
207 
208     uint16_t i;
209     struct vring_desc *desc;
210     paddr_t pa;
211     vaddr_t va = (vaddr_t)buf;
212 
213     LTRACEF("dev %p, buf %p, offset 0x%llx, len %zu\n", dev, buf, offset, len);
214 
215     mutex_acquire(&bdev->lock);
216 
217     /* set up the request */
218     bdev->blk_req->type = write ? VIRTIO_BLK_T_OUT : VIRTIO_BLK_T_IN;
219     bdev->blk_req->ioprio = 0;
220     bdev->blk_req->sector = offset / 512;
221     LTRACEF("blk_req type %u ioprio %u sector %llu\n",
222             bdev->blk_req->type, bdev->blk_req->ioprio, bdev->blk_req->sector);
223 
224     /* put together a transfer */
225     desc = virtio_alloc_desc_chain(dev, 0, 3, &i);
226     LTRACEF("after alloc chain desc %p, i %u\n", desc, i);
227 
228     // XXX not cache safe.
229     // At the moment only tested on arm qemu, which doesn't emulate cache.
230 
231     /* set up the descriptor pointing to the head */
232     desc->addr = bdev->blk_req_phys;
233     desc->len = sizeof(struct virtio_blk_req);
234     desc->flags |= VRING_DESC_F_NEXT;
235 
236     /* set up the descriptor pointing to the buffer */
237     desc = virtio_desc_index_to_desc(dev, 0, desc->next);
238 #if WITH_KERNEL_VM
239     /* translate the first buffer */
240     pa = vaddr_to_paddr((void *)va);
241     desc->addr = (uint64_t)pa;
242     /* desc->len is filled in below */
243 #else
244     desc->addr = (uint64_t)(uintptr_t)buf;
245     desc->len = len;
246 #endif
247     desc->flags |= write ? 0 : VRING_DESC_F_WRITE; /* mark buffer as write-only if its a block read */
248     desc->flags |= VRING_DESC_F_NEXT;
249 
250 #if WITH_KERNEL_VM
251     /* see if we need to add more descriptors due to scatter gather */
252     paddr_t next_pa = page_align(pa + 1);
253     desc->len = MIN(next_pa - pa, len);
254     LTRACEF("first descriptor va 0x%lx desc->addr 0x%llx desc->len %u\n", va, desc->addr, desc->len);
255     len -= desc->len;
256     while (len > 0) {
257         /* amount of source buffer handled by this iteration of the loop */
258         size_t len_tohandle = MIN(len, PAGE_SIZE);
259 
260         /* translate the next page in the buffer */
261         va = page_align(va + 1);
262         pa = vaddr_to_paddr((void *)va);
263         LTRACEF("va now 0x%lx, pa 0x%lx, next_pa 0x%lx, remaining len %zu\n", va, pa, next_pa, len);
264 
265         /* is the new translated physical address contiguous to the last one? */
266         if (next_pa == pa) {
267             LTRACEF("extending last one by %zu bytes\n", len_tohandle);
268             desc->len += len_tohandle;
269         } else {
270             uint16_t next_i = virtio_alloc_desc(dev, 0);
271             struct vring_desc *next_desc = virtio_desc_index_to_desc(dev, 0, next_i);
272             DEBUG_ASSERT(next_desc);
273 
274             LTRACEF("doesn't extend, need new desc, allocated desc %i (%p)\n", next_i, next_desc);
275 
276             /* fill this descriptor in and put it after the last one but before the response descriptor */
277             next_desc->addr = (uint64_t)pa;
278             next_desc->len = len_tohandle;
279             next_desc->flags = write ? 0 : VRING_DESC_F_WRITE; /* mark buffer as write-only if its a block read */
280             next_desc->flags |= VRING_DESC_F_NEXT;
281             next_desc->next = desc->next;
282             desc->next = next_i;
283 
284             desc = next_desc;
285         }
286         len -= len_tohandle;
287         next_pa += PAGE_SIZE;
288     }
289 #endif
290 
291     /* set up the descriptor pointing to the response */
292     desc = virtio_desc_index_to_desc(dev, 0, desc->next);
293     desc->addr = bdev->blk_response_phys;
294     desc->len = 1;
295     desc->flags = VRING_DESC_F_WRITE;
296 
297     /* submit the transfer */
298     virtio_submit_chain(dev, 0, i);
299 
300     /* kick it off */
301     virtio_kick(dev, 0);
302 
303     /* wait for the transfer to complete */
304     event_wait(&bdev->io_event);
305 
306     LTRACEF("status 0x%hhx\n", bdev->blk_response);
307 
308     mutex_release(&bdev->lock);
309 
310     return len;
311 }
312 
virtio_bdev_read_block(struct bdev * bdev,void * buf,bnum_t block,uint count)313 static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block, uint count)
314 {
315     struct virtio_block_dev *dev = containerof(bdev, struct virtio_block_dev, bdev);
316 
317     LTRACEF("dev %p, buf %p, block 0x%x, count %u\n", bdev, buf, block, count);
318 
319     if (virtio_block_read_write(dev->dev, buf, (off_t)block * dev->bdev.block_size,
320                                 count * dev->bdev.block_size, false) == 0) {
321         return count * dev->bdev.block_size;
322     } else {
323         return ERR_IO;
324     }
325 }
326 
virtio_bdev_write_block(struct bdev * bdev,const void * buf,bnum_t block,uint count)327 static ssize_t virtio_bdev_write_block(struct bdev *bdev, const void *buf, bnum_t block, uint count)
328 {
329     struct virtio_block_dev *dev = containerof(bdev, struct virtio_block_dev, bdev);
330 
331     LTRACEF("dev %p, buf %p, block 0x%x, count %u\n", bdev, buf, block, count);
332 
333     if (virtio_block_read_write(dev->dev, (void *)buf, (off_t)block * dev->bdev.block_size,
334                                 count * dev->bdev.block_size, true) == 0) {
335         return count * dev->bdev.block_size;
336     } else {
337         return ERR_IO;
338     }
339 }
340 
341