1 /*
2 * Copyright (c) 2014-2015 Travis Geiselbrecht
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files
6 * (the "Software"), to deal in the Software without restriction,
7 * including without limitation the rights to use, copy, modify, merge,
8 * publish, distribute, sublicense, and/or sell copies of the Software,
9 * and to permit persons to whom the Software is furnished to do so,
10 * subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include <dev/virtio/block.h>
24
25 #include <debug.h>
26 #include <assert.h>
27 #include <trace.h>
28 #include <compiler.h>
29 #include <list.h>
30 #include <err.h>
31 #include <kernel/thread.h>
32 #include <kernel/event.h>
33 #include <kernel/mutex.h>
34 #include <kernel/vm.h>
35 #include <lib/bio.h>
36
37 #define LOCAL_TRACE 0
38
39 struct virtio_blk_config {
40 uint64_t capacity;
41 uint32_t size_max;
42 uint32_t seg_max;
43 struct virtio_blk_geometry {
44 uint16_t cylinders;
45 uint8_t heads;
46 uint8_t sectors;
47 } geometry;
48 uint32_t blk_size;
49 } __PACKED;
50
51 struct virtio_blk_req {
52 uint32_t type;
53 uint32_t ioprio;
54 uint64_t sector;
55 } __PACKED;
56
57 #define VIRTIO_BLK_F_BARRIER (1<<0)
58 #define VIRTIO_BLK_F_SIZE_MAX (1<<1)
59 #define VIRTIO_BLK_F_SEG_MAX (1<<2)
60 #define VIRTIO_BLK_F_GEOMETRY (1<<4)
61 #define VIRTIO_BLK_F_RO (1<<5)
62 #define VIRTIO_BLK_F_BLK_SIZE (1<<6)
63 #define VIRTIO_BLK_F_SCSI (1<<7)
64 #define VIRTIO_BLK_F_FLUSH (1<<9)
65 #define VIRTIO_BLK_F_TOPOLOGY (1<<10)
66 #define VIRTIO_BLK_F_CONFIG_WCE (1<<11)
67
68 #define VIRTIO_BLK_T_IN 0
69 #define VIRTIO_BLK_T_OUT 1
70 #define VIRTIO_BLK_T_FLUSH 4
71
72 #define VIRTIO_BLK_S_OK 0
73 #define VIRTIO_BLK_S_IOERR 1
74 #define VIRTIO_BLK_S_UNSUPP 2
75
76 static enum handler_return virtio_block_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e);
77 static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block, uint count);
78 static ssize_t virtio_bdev_write_block(struct bdev *bdev, const void *buf, bnum_t block, uint count);
79
80 struct virtio_block_dev {
81 struct virtio_device *dev;
82
83 mutex_t lock;
84 event_t io_event;
85
86 /* bio block device */
87 bdev_t bdev;
88
89 /* one blk_req structure for io, not crossing a page boundary */
90 struct virtio_blk_req *blk_req;
91 paddr_t blk_req_phys;
92
93 /* one uint8_t response word */
94 uint8_t blk_response;
95 paddr_t blk_response_phys;
96 };
97
virtio_block_init(struct virtio_device * dev,uint32_t host_features)98 status_t virtio_block_init(struct virtio_device *dev, uint32_t host_features)
99 {
100 LTRACEF("dev %p, host_features 0x%x\n", dev, host_features);
101
102 /* allocate a new block device */
103 struct virtio_block_dev *bdev = malloc(sizeof(struct virtio_block_dev));
104 if (!bdev)
105 return ERR_NO_MEMORY;
106
107 mutex_init(&bdev->lock);
108 event_init(&bdev->io_event, false, EVENT_FLAG_AUTOUNSIGNAL);
109
110 bdev->dev = dev;
111 dev->priv = bdev;
112
113 bdev->blk_req = memalign(sizeof(struct virtio_blk_req), sizeof(struct virtio_blk_req));
114 #if WITH_KERNEL_VM
115 bdev->blk_req_phys = vaddr_to_paddr(bdev->blk_req);
116 #else
117 bdev->blk_freq_phys = (uint64_t)(uintptr_t)bdev->blk_req;
118 #endif
119 LTRACEF("blk_req structure at %p (0x%lx phys)\n", bdev->blk_req, bdev->blk_req_phys);
120
121 #if WITH_KERNEL_VM
122 bdev->blk_response_phys = vaddr_to_paddr(&bdev->blk_response);
123 #else
124 bdev->blk_response_phys = (uint64_t)(uintptr_t)&bdev->blk_response;
125 #endif
126
127 /* make sure the device is reset */
128 virtio_reset_device(dev);
129
130 volatile struct virtio_blk_config *config = (struct virtio_blk_config *)dev->config_ptr;
131
132 LTRACEF("capacity 0x%llx\n", config->capacity);
133 LTRACEF("size_max 0x%x\n", config->size_max);
134 LTRACEF("seg_max 0x%x\n", config->seg_max);
135 LTRACEF("blk_size 0x%x\n", config->blk_size);
136
137 /* ack and set the driver status bit */
138 virtio_status_acknowledge_driver(dev);
139
140 // XXX check features bits and ack/nak them
141
142 /* allocate a virtio ring */
143 virtio_alloc_ring(dev, 0, 256);
144
145 /* set our irq handler */
146 dev->irq_driver_callback = &virtio_block_irq_driver_callback;
147
148 /* set DRIVER_OK */
149 virtio_status_driver_ok(dev);
150
151 /* construct the block device */
152 static uint8_t found_index = 0;
153 char buf[16];
154 snprintf(buf, sizeof(buf), "virtio%u", found_index++);
155 bio_initialize_bdev(&bdev->bdev, buf,
156 config->blk_size, config->capacity,
157 0, NULL, BIO_FLAGS_NONE);
158
159 /* override our block device hooks */
160 bdev->bdev.read_block = &virtio_bdev_read_block;
161 bdev->bdev.write_block = &virtio_bdev_write_block;
162
163 bio_register_device(&bdev->bdev);
164
165 printf("found virtio block device of size %lld\n", config->capacity * config->blk_size);
166
167 return NO_ERROR;
168 }
169
virtio_block_irq_driver_callback(struct virtio_device * dev,uint ring,const struct vring_used_elem * e)170 static enum handler_return virtio_block_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e)
171 {
172 struct virtio_block_dev *bdev = (struct virtio_block_dev *)dev->priv;
173
174 LTRACEF("dev %p, ring %u, e %p, id %u, len %u\n", dev, ring, e, e->id, e->len);
175
176 /* parse our descriptor chain, add back to the free queue */
177 uint16_t i = e->id;
178 for (;;) {
179 int next;
180 struct vring_desc *desc = virtio_desc_index_to_desc(dev, ring, i);
181
182 //virtio_dump_desc(desc);
183
184 if (desc->flags & VRING_DESC_F_NEXT) {
185 next = desc->next;
186 } else {
187 /* end of chain */
188 next = -1;
189 }
190
191 virtio_free_desc(dev, ring, i);
192
193 if (next < 0)
194 break;
195 i = next;
196 }
197
198 /* signal our event */
199 event_signal(&bdev->io_event, false);
200
201 return INT_RESCHEDULE;
202 }
203
virtio_block_read_write(struct virtio_device * dev,void * buf,off_t offset,size_t len,bool write)204 ssize_t virtio_block_read_write(struct virtio_device *dev, void *buf, off_t offset, size_t len, bool write)
205 {
206 struct virtio_block_dev *bdev = (struct virtio_block_dev *)dev->priv;
207
208 uint16_t i;
209 struct vring_desc *desc;
210 paddr_t pa;
211 vaddr_t va = (vaddr_t)buf;
212
213 LTRACEF("dev %p, buf %p, offset 0x%llx, len %zu\n", dev, buf, offset, len);
214
215 mutex_acquire(&bdev->lock);
216
217 /* set up the request */
218 bdev->blk_req->type = write ? VIRTIO_BLK_T_OUT : VIRTIO_BLK_T_IN;
219 bdev->blk_req->ioprio = 0;
220 bdev->blk_req->sector = offset / 512;
221 LTRACEF("blk_req type %u ioprio %u sector %llu\n",
222 bdev->blk_req->type, bdev->blk_req->ioprio, bdev->blk_req->sector);
223
224 /* put together a transfer */
225 desc = virtio_alloc_desc_chain(dev, 0, 3, &i);
226 LTRACEF("after alloc chain desc %p, i %u\n", desc, i);
227
228 // XXX not cache safe.
229 // At the moment only tested on arm qemu, which doesn't emulate cache.
230
231 /* set up the descriptor pointing to the head */
232 desc->addr = bdev->blk_req_phys;
233 desc->len = sizeof(struct virtio_blk_req);
234 desc->flags |= VRING_DESC_F_NEXT;
235
236 /* set up the descriptor pointing to the buffer */
237 desc = virtio_desc_index_to_desc(dev, 0, desc->next);
238 #if WITH_KERNEL_VM
239 /* translate the first buffer */
240 pa = vaddr_to_paddr((void *)va);
241 desc->addr = (uint64_t)pa;
242 /* desc->len is filled in below */
243 #else
244 desc->addr = (uint64_t)(uintptr_t)buf;
245 desc->len = len;
246 #endif
247 desc->flags |= write ? 0 : VRING_DESC_F_WRITE; /* mark buffer as write-only if its a block read */
248 desc->flags |= VRING_DESC_F_NEXT;
249
250 #if WITH_KERNEL_VM
251 /* see if we need to add more descriptors due to scatter gather */
252 paddr_t next_pa = page_align(pa + 1);
253 desc->len = MIN(next_pa - pa, len);
254 LTRACEF("first descriptor va 0x%lx desc->addr 0x%llx desc->len %u\n", va, desc->addr, desc->len);
255 len -= desc->len;
256 while (len > 0) {
257 /* amount of source buffer handled by this iteration of the loop */
258 size_t len_tohandle = MIN(len, PAGE_SIZE);
259
260 /* translate the next page in the buffer */
261 va = page_align(va + 1);
262 pa = vaddr_to_paddr((void *)va);
263 LTRACEF("va now 0x%lx, pa 0x%lx, next_pa 0x%lx, remaining len %zu\n", va, pa, next_pa, len);
264
265 /* is the new translated physical address contiguous to the last one? */
266 if (next_pa == pa) {
267 LTRACEF("extending last one by %zu bytes\n", len_tohandle);
268 desc->len += len_tohandle;
269 } else {
270 uint16_t next_i = virtio_alloc_desc(dev, 0);
271 struct vring_desc *next_desc = virtio_desc_index_to_desc(dev, 0, next_i);
272 DEBUG_ASSERT(next_desc);
273
274 LTRACEF("doesn't extend, need new desc, allocated desc %i (%p)\n", next_i, next_desc);
275
276 /* fill this descriptor in and put it after the last one but before the response descriptor */
277 next_desc->addr = (uint64_t)pa;
278 next_desc->len = len_tohandle;
279 next_desc->flags = write ? 0 : VRING_DESC_F_WRITE; /* mark buffer as write-only if its a block read */
280 next_desc->flags |= VRING_DESC_F_NEXT;
281 next_desc->next = desc->next;
282 desc->next = next_i;
283
284 desc = next_desc;
285 }
286 len -= len_tohandle;
287 next_pa += PAGE_SIZE;
288 }
289 #endif
290
291 /* set up the descriptor pointing to the response */
292 desc = virtio_desc_index_to_desc(dev, 0, desc->next);
293 desc->addr = bdev->blk_response_phys;
294 desc->len = 1;
295 desc->flags = VRING_DESC_F_WRITE;
296
297 /* submit the transfer */
298 virtio_submit_chain(dev, 0, i);
299
300 /* kick it off */
301 virtio_kick(dev, 0);
302
303 /* wait for the transfer to complete */
304 event_wait(&bdev->io_event);
305
306 LTRACEF("status 0x%hhx\n", bdev->blk_response);
307
308 mutex_release(&bdev->lock);
309
310 return len;
311 }
312
virtio_bdev_read_block(struct bdev * bdev,void * buf,bnum_t block,uint count)313 static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block, uint count)
314 {
315 struct virtio_block_dev *dev = containerof(bdev, struct virtio_block_dev, bdev);
316
317 LTRACEF("dev %p, buf %p, block 0x%x, count %u\n", bdev, buf, block, count);
318
319 if (virtio_block_read_write(dev->dev, buf, (off_t)block * dev->bdev.block_size,
320 count * dev->bdev.block_size, false) == 0) {
321 return count * dev->bdev.block_size;
322 } else {
323 return ERR_IO;
324 }
325 }
326
virtio_bdev_write_block(struct bdev * bdev,const void * buf,bnum_t block,uint count)327 static ssize_t virtio_bdev_write_block(struct bdev *bdev, const void *buf, bnum_t block, uint count)
328 {
329 struct virtio_block_dev *dev = containerof(bdev, struct virtio_block_dev, bdev);
330
331 LTRACEF("dev %p, buf %p, block 0x%x, count %u\n", bdev, buf, block, count);
332
333 if (virtio_block_read_write(dev->dev, (void *)buf, (off_t)block * dev->bdev.block_size,
334 count * dev->bdev.block_size, true) == 0) {
335 return count * dev->bdev.block_size;
336 } else {
337 return ERR_IO;
338 }
339 }
340
341