1 #include <limits.h>
2 #include <sys/types.h>
3 #include <sys/stat.h>
4 #include "basefs_allocator.h"
5 #include "block_range.h"
6 #include "hashmap.h"
7 #include "base_fs.h"
8
9 struct base_fs_allocator {
10 struct ext2fs_hashmap *entries;
11 struct basefs_entry *cur_entry;
12 /* The next expected logical block to allocate for cur_entry. */
13 blk64_t next_lblk;
14 /* Blocks which are definitely owned by a single inode in BaseFS. */
15 ext2fs_block_bitmap exclusive_block_map;
16 /* Blocks which are available to the first inode that requests it. */
17 ext2fs_block_bitmap dedup_block_map;
18 };
19
20 static errcode_t basefs_block_allocator(ext2_filsys, blk64_t, blk64_t *,
21 struct blk_alloc_ctx *ctx);
22
23 /*
24 * Free any reserved, but unconsumed block ranges in the allocator. This both
25 * frees the block_range_list data structure and unreserves exclusive blocks
26 * from the block map.
27 */
fs_free_blocks_range(ext2_filsys fs,struct base_fs_allocator * allocator,struct block_range_list * list)28 static void fs_free_blocks_range(ext2_filsys fs,
29 struct base_fs_allocator *allocator,
30 struct block_range_list *list)
31 {
32 ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
33
34 blk64_t block;
35 while (list->head) {
36 block = consume_next_block(list);
37 if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
38 ext2fs_unmark_block_bitmap2(fs->block_map, block);
39 ext2fs_unmark_block_bitmap2(exclusive_map, block);
40 }
41 }
42 }
43
44 /*
45 * Free any blocks in the bitmap that were reserved but never used. This is
46 * needed to free dedup_block_map and ensure the free block bitmap is
47 * internally consistent.
48 */
fs_free_blocks_bitmap(ext2_filsys fs,ext2fs_block_bitmap bitmap)49 static void fs_free_blocks_bitmap(ext2_filsys fs, ext2fs_block_bitmap bitmap)
50 {
51 blk64_t block = 0;
52 blk64_t start = fs->super->s_first_data_block;
53 blk64_t end = ext2fs_blocks_count(fs->super) - 1;
54 errcode_t retval;
55
56 for (;;) {
57 retval = ext2fs_find_first_set_block_bitmap2(bitmap, start, end,
58 &block);
59 if (retval)
60 break;
61 ext2fs_unmark_block_bitmap2(fs->block_map, block);
62 start = block + 1;
63 }
64 }
65
basefs_allocator_free(ext2_filsys fs,struct base_fs_allocator * allocator)66 static void basefs_allocator_free(ext2_filsys fs,
67 struct base_fs_allocator *allocator)
68 {
69 struct basefs_entry *e;
70 struct ext2fs_hashmap_entry *it = NULL;
71 struct ext2fs_hashmap *entries = allocator->entries;
72
73 if (entries) {
74 while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) {
75 fs_free_blocks_range(fs, allocator, &e->blocks);
76 delete_block_ranges(&e->blocks);
77 }
78 ext2fs_hashmap_free(entries);
79 }
80 fs_free_blocks_bitmap(fs, allocator->dedup_block_map);
81 ext2fs_free_block_bitmap(allocator->exclusive_block_map);
82 ext2fs_free_block_bitmap(allocator->dedup_block_map);
83 free(allocator);
84 }
85
86 /*
87 * Build a bitmap of which blocks are definitely owned by exactly one file in
88 * Base FS. Blocks which are not valid or are de-duplicated are skipped. This
89 * is called during allocator initialization, to ensure that libext2fs does
90 * not allocate which we want to re-use.
91 *
92 * If a block was allocated in the initial filesystem, it can never be re-used,
93 * so it will appear in neither the exclusive or dedup set. If a block is used
94 * by multiple files, it will be removed from the owned set and instead added
95 * to the dedup set.
96 *
97 * The dedup set is not removed from fs->block_map. This allows us to re-use
98 * dedup blocks separately and not have them be allocated outside of file data.
99 *
100 * This function returns non-zero if the block was owned, and 0 otherwise.
101 */
fs_reserve_block(ext2_filsys fs,struct base_fs_allocator * allocator,blk64_t block)102 static int fs_reserve_block(ext2_filsys fs,
103 struct base_fs_allocator *allocator,
104 blk64_t block)
105 {
106 ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
107 ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
108
109 if (block >= ext2fs_blocks_count(fs->super))
110 return 0;
111
112 if (ext2fs_test_block_bitmap2(fs->block_map, block)) {
113 if (!ext2fs_test_block_bitmap2(exclusive_map, block))
114 return 0;
115 ext2fs_unmark_block_bitmap2(exclusive_map, block);
116 ext2fs_mark_block_bitmap2(dedup_map, block);
117 return 0;
118 } else {
119 ext2fs_mark_block_bitmap2(fs->block_map, block);
120 ext2fs_mark_block_bitmap2(exclusive_map, block);
121 return 1;
122 }
123 }
124
125 /*
126 * Walk the requested block list and reserve blocks, either into the owned
127 * pool or the dedup pool as appropriate. We stop once the file has enough
128 * owned blocks to satisfy |file_size|. This allows any extra blocks to be
129 * re-used, since otherwise a large block movement between files could
130 * trigger block allocation errors.
131 */
fs_reserve_blocks_range(ext2_filsys fs,struct base_fs_allocator * allocator,struct block_range_list * list,off_t file_size)132 static void fs_reserve_blocks_range(ext2_filsys fs,
133 struct base_fs_allocator *allocator,
134 struct block_range_list *list,
135 off_t file_size)
136 {
137 blk64_t block;
138 off_t blocks_needed;
139 off_t blocks_acquired = 0;
140 struct block_range *blocks = list->head;
141
142 blocks_needed = file_size + (fs->blocksize - 1);
143 blocks_needed /= fs->blocksize;
144
145 while (blocks) {
146 for (block = blocks->start; block <= blocks->end; block++) {
147 if (fs_reserve_block(fs, allocator, block))
148 blocks_acquired++;
149 if (blocks_acquired >= blocks_needed)
150 return;
151 }
152 blocks = blocks->next;
153 }
154 }
155
156 /*
157 * For each file in the base FS map, ensure that its blocks are reserved in
158 * the actual block map. This prevents libext2fs from allocating them for
159 * general purpose use, and ensures that if the file needs data blocks, they
160 * can be re-acquired exclusively for that file.
161 *
162 * If a file in the base map is missing, or not a regular file in the new
163 * filesystem, then it's skipped to ensure that its blocks are reusable.
164 */
fs_reserve_blocks(ext2_filsys fs,struct base_fs_allocator * allocator,const char * src_dir)165 static errcode_t fs_reserve_blocks(ext2_filsys fs,
166 struct base_fs_allocator *allocator,
167 const char *src_dir)
168 {
169 int nbytes;
170 char full_path[PATH_MAX];
171 const char *sep = "/";
172 struct stat st;
173 struct basefs_entry *e;
174 struct ext2fs_hashmap_entry *it = NULL;
175 struct ext2fs_hashmap *entries = allocator->entries;
176
177 if (strlen(src_dir) && src_dir[strlen(src_dir) - 1] == '/')
178 sep = "";
179
180 while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) {
181 nbytes = snprintf(full_path, sizeof(full_path), "%s%s%s",
182 src_dir, sep, e->path);
183 if (nbytes >= sizeof(full_path))
184 return ENAMETOOLONG;
185 if (lstat(full_path, &st) || !S_ISREG(st.st_mode))
186 continue;
187 fs_reserve_blocks_range(fs, allocator, &e->blocks, st.st_size);
188 }
189 return 0;
190 }
191
base_fs_alloc_load(ext2_filsys fs,const char * file,const char * mountpoint,const char * src_dir)192 errcode_t base_fs_alloc_load(ext2_filsys fs, const char *file,
193 const char *mountpoint, const char *src_dir)
194 {
195 errcode_t retval = 0;
196 struct base_fs_allocator *allocator;
197
198 allocator = calloc(1, sizeof(*allocator));
199 if (!allocator) {
200 retval = ENOMEM;
201 goto out;
202 }
203
204 retval = ext2fs_read_bitmaps(fs);
205 if (retval)
206 goto err_load;
207
208 allocator->cur_entry = NULL;
209 allocator->entries = basefs_parse(file, mountpoint);
210 if (!allocator->entries) {
211 retval = EIO;
212 goto err_load;
213 }
214 retval = ext2fs_allocate_block_bitmap(fs, "exclusive map",
215 &allocator->exclusive_block_map);
216 if (retval)
217 goto err_load;
218 retval = ext2fs_allocate_block_bitmap(fs, "dedup map",
219 &allocator->dedup_block_map);
220 if (retval)
221 goto err_load;
222
223 retval = fs_reserve_blocks(fs, allocator, src_dir);
224 if (retval)
225 goto err_load;
226
227 /* Override the default allocator */
228 fs->get_alloc_block2 = basefs_block_allocator;
229 fs->priv_data = allocator;
230
231 goto out;
232
233 err_load:
234 basefs_allocator_free(fs, allocator);
235 out:
236 return retval;
237 }
238
239 /* Try and acquire the next usable block from the Base FS map. */
get_next_block(ext2_filsys fs,struct base_fs_allocator * allocator,struct block_range_list * list,blk64_t * ret)240 static errcode_t get_next_block(ext2_filsys fs, struct base_fs_allocator *allocator,
241 struct block_range_list* list, blk64_t *ret)
242 {
243 blk64_t block;
244 ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
245 ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
246
247 if (!list->head)
248 return EXT2_ET_BLOCK_ALLOC_FAIL;
249
250 block = consume_next_block(list);
251 if (block >= ext2fs_blocks_count(fs->super))
252 return EXT2_ET_BLOCK_ALLOC_FAIL;
253 if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
254 ext2fs_unmark_block_bitmap2(exclusive_map, block);
255 *ret = block;
256 return 0;
257 }
258 if (ext2fs_test_block_bitmap2(dedup_map, block)) {
259 ext2fs_unmark_block_bitmap2(dedup_map, block);
260 *ret = block;
261 return 0;
262 }
263 return EXT2_ET_BLOCK_ALLOC_FAIL;
264 }
265
266 /*
267 * BaseFS lists blocks in logical block order. However, the allocator hook is
268 * only called if a block needs to be allocated. In the case of a deduplicated
269 * block, or a hole, the hook is not invoked. This means the next block
270 * allocation request will be out of sequence. For example, consider if BaseFS
271 * specifies the following (0 being a hole):
272 * 1 2 3 0 4 5
273 *
274 * If the new file has a hole at logical block 0, we could accidentally
275 * shift the entire expected block list as follows:
276 * 0 1 2 0 3 4
277 *
278 * To account for this, we track the next expected logical block in the
279 * allocator. If the current request is for a later logical block, we skip and
280 * free the intermediate physical blocks that would have been allocated. This
281 * ensures the original block assignment is respected.
282 */
skip_blocks(ext2_filsys fs,struct base_fs_allocator * allocator,struct blk_alloc_ctx * ctx)283 static void skip_blocks(ext2_filsys fs, struct base_fs_allocator *allocator,
284 struct blk_alloc_ctx *ctx)
285 {
286 blk64_t block;
287 struct block_range_list *list = &allocator->cur_entry->blocks;
288 ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
289
290 while (list->head && allocator->next_lblk < ctx->lblk) {
291 block = consume_next_block(list);
292 if (block >= ext2fs_blocks_count(fs->super))
293 continue;
294 if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
295 ext2fs_unmark_block_bitmap2(exclusive_map, block);
296 ext2fs_unmark_block_bitmap2(fs->block_map, block);
297 }
298 allocator->next_lblk++;
299 }
300 }
301
basefs_block_allocator(ext2_filsys fs,blk64_t goal,blk64_t * ret,struct blk_alloc_ctx * ctx)302 static errcode_t basefs_block_allocator(ext2_filsys fs, blk64_t goal,
303 blk64_t *ret, struct blk_alloc_ctx *ctx)
304 {
305 errcode_t retval;
306 struct base_fs_allocator *allocator = fs->priv_data;
307 struct basefs_entry *e = allocator->cur_entry;
308 ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
309
310 if (e && ctx && (ctx->flags & BLOCK_ALLOC_DATA)) {
311 if (allocator->next_lblk < ctx->lblk)
312 skip_blocks(fs, allocator, ctx);
313 allocator->next_lblk = ctx->lblk + 1;
314
315 if (!get_next_block(fs, allocator, &e->blocks, ret))
316 return 0;
317 }
318
319 retval = ext2fs_new_block2(fs, goal, fs->block_map, ret);
320 if (!retval) {
321 ext2fs_mark_block_bitmap2(fs->block_map, *ret);
322 return 0;
323 }
324 if (retval != EXT2_ET_BLOCK_ALLOC_FAIL)
325 return retval;
326
327 /* Try to steal a block from the dedup pool. */
328 retval = ext2fs_find_first_set_block_bitmap2(dedup_map,
329 fs->super->s_first_data_block,
330 ext2fs_blocks_count(fs->super) - 1, ret);
331 if (!retval) {
332 ext2fs_unmark_block_bitmap2(dedup_map, *ret);
333 return 0;
334 }
335
336 /*
337 * As a last resort, take any block from our file's list. This
338 * risks bloating the diff, but means we are more likely to
339 * successfully build an image.
340 */
341 while (e->blocks.head) {
342 if (!get_next_block(fs, allocator, &e->blocks, ret))
343 return 0;
344 }
345 return EXT2_ET_BLOCK_ALLOC_FAIL;
346 }
347
base_fs_alloc_cleanup(ext2_filsys fs)348 void base_fs_alloc_cleanup(ext2_filsys fs)
349 {
350 basefs_allocator_free(fs, fs->priv_data);
351 fs->priv_data = NULL;
352 fs->get_alloc_block2 = NULL;
353 }
354
base_fs_alloc_set_target(ext2_filsys fs,const char * target_path,const char * name EXT2FS_ATTR ((unused)),ext2_ino_t parent_ino EXT2FS_ATTR ((unused)),ext2_ino_t root EXT2FS_ATTR ((unused)),mode_t mode)355 errcode_t base_fs_alloc_set_target(ext2_filsys fs, const char *target_path,
356 const char *name EXT2FS_ATTR((unused)),
357 ext2_ino_t parent_ino EXT2FS_ATTR((unused)),
358 ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode)
359 {
360 struct base_fs_allocator *allocator = fs->priv_data;
361
362 if (mode != S_IFREG)
363 return 0;
364
365 if (allocator) {
366 allocator->cur_entry = ext2fs_hashmap_lookup(allocator->entries,
367 target_path,
368 strlen(target_path));
369 allocator->next_lblk = 0;
370 }
371 return 0;
372 }
373
base_fs_alloc_unset_target(ext2_filsys fs,const char * target_path EXT2FS_ATTR ((unused)),const char * name EXT2FS_ATTR ((unused)),ext2_ino_t parent_ino EXT2FS_ATTR ((unused)),ext2_ino_t root EXT2FS_ATTR ((unused)),mode_t mode)374 errcode_t base_fs_alloc_unset_target(ext2_filsys fs,
375 const char *target_path EXT2FS_ATTR((unused)),
376 const char *name EXT2FS_ATTR((unused)),
377 ext2_ino_t parent_ino EXT2FS_ATTR((unused)),
378 ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode)
379 {
380 struct base_fs_allocator *allocator = fs->priv_data;
381
382 if (!allocator || !allocator->cur_entry || mode != S_IFREG)
383 return 0;
384
385 fs_free_blocks_range(fs, allocator, &allocator->cur_entry->blocks);
386 delete_block_ranges(&allocator->cur_entry->blocks);
387 return 0;
388 }
389