| /* |
| * Copyright (C) 2008 Oracle. All rights reserved. |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public |
| * License v2 as published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public |
| * License along with this program; if not, write to the |
| * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| * Boston, MA 021110-1307, USA. |
| * |
| * Based on jffs2 zlib code: |
| * Copyright © 2001-2007 Red Hat, Inc. |
| * Created by David Woodhouse <dwmw2@infradead.org> |
| */ |
| |
| #include <linux/kernel.h> |
| #include <linux/slab.h> |
| #include <linux/zlib.h> |
| #include <linux/zutil.h> |
| #include <linux/vmalloc.h> |
| #include <linux/init.h> |
| #include <linux/err.h> |
| #include <linux/sched.h> |
| #include <linux/pagemap.h> |
| #include <linux/bio.h> |
| |
| /* Plan: call deflate() with avail_in == *sourcelen, |
| avail_out = *dstlen - 12 and flush == Z_FINISH. |
| If it doesn't manage to finish, call it again with |
| avail_in == 0 and avail_out set to the remaining 12 |
| bytes for it to clean up. |
| Q: Is 12 bytes sufficient? |
| */ |
| #define STREAM_END_SPACE 12 |
| |
| struct workspace { |
| z_stream inf_strm; |
| z_stream def_strm; |
| char *buf; |
| struct list_head list; |
| }; |
| |
| static LIST_HEAD(idle_workspace); |
| static DEFINE_SPINLOCK(workspace_lock); |
| static unsigned long num_workspace; |
| static atomic_t alloc_workspace = ATOMIC_INIT(0); |
| static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); |
| |
| /* |
| * this finds an available zlib workspace or allocates a new one |
| * NULL or an ERR_PTR is returned if things go bad. |
| */ |
| static struct workspace *find_zlib_workspace(void) |
| { |
| struct workspace *workspace; |
| int ret; |
| int cpus = num_online_cpus(); |
| |
| again: |
| spin_lock(&workspace_lock); |
| if (!list_empty(&idle_workspace)) { |
| workspace = list_entry(idle_workspace.next, struct workspace, |
| list); |
| list_del(&workspace->list); |
| num_workspace--; |
| spin_unlock(&workspace_lock); |
| return workspace; |
| |
| } |
| spin_unlock(&workspace_lock); |
| if (atomic_read(&alloc_workspace) > cpus) { |
| DEFINE_WAIT(wait); |
| prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); |
| if (atomic_read(&alloc_workspace) > cpus) |
| schedule(); |
| finish_wait(&workspace_wait, &wait); |
| goto again; |
| } |
| atomic_inc(&alloc_workspace); |
| workspace = kzalloc(sizeof(*workspace), GFP_NOFS); |
| if (!workspace) { |
| ret = -ENOMEM; |
| goto fail; |
| } |
| |
| workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); |
| if (!workspace->def_strm.workspace) { |
| ret = -ENOMEM; |
| goto fail; |
| } |
| workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); |
| if (!workspace->inf_strm.workspace) { |
| ret = -ENOMEM; |
| goto fail_inflate; |
| } |
| workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
| if (!workspace->buf) { |
| ret = -ENOMEM; |
| goto fail_kmalloc; |
| } |
| return workspace; |
| |
| fail_kmalloc: |
| vfree(workspace->inf_strm.workspace); |
| fail_inflate: |
| vfree(workspace->def_strm.workspace); |
| fail: |
| kfree(workspace); |
| atomic_dec(&alloc_workspace); |
| wake_up(&workspace_wait); |
| return ERR_PTR(ret); |
| } |
| |
| /* |
| * put a workspace struct back on the list or free it if we have enough |
| * idle ones sitting around |
| */ |
| static int free_workspace(struct workspace *workspace) |
| { |
| spin_lock(&workspace_lock); |
| if (num_workspace < num_online_cpus()) { |
| list_add_tail(&workspace->list, &idle_workspace); |
| num_workspace++; |
| spin_unlock(&workspace_lock); |
| if (waitqueue_active(&workspace_wait)) |
| wake_up(&workspace_wait); |
| return 0; |
| } |
| spin_unlock(&workspace_lock); |
| vfree(workspace->def_strm.workspace); |
| vfree(workspace->inf_strm.workspace); |
| kfree(workspace->buf); |
| kfree(workspace); |
| |
| atomic_dec(&alloc_workspace); |
| if (waitqueue_active(&workspace_wait)) |
| wake_up(&workspace_wait); |
| return 0; |
| } |
| |
| /* |
| * cleanup function for module exit |
| */ |
| static void free_workspaces(void) |
| { |
| struct workspace *workspace; |
| while(!list_empty(&idle_workspace)) { |
| workspace = list_entry(idle_workspace.next, struct workspace, |
| list); |
| list_del(&workspace->list); |
| vfree(workspace->def_strm.workspace); |
| vfree(workspace->inf_strm.workspace); |
| kfree(workspace->buf); |
| kfree(workspace); |
| atomic_dec(&alloc_workspace); |
| } |
| } |
| |
| /* |
| * given an address space and start/len, compress the bytes. |
| * |
| * pages are allocated to hold the compressed result and stored |
| * in 'pages' |
| * |
| * out_pages is used to return the number of pages allocated. There |
| * may be pages allocated even if we return an error |
| * |
| * total_in is used to return the number of bytes actually read. It |
| * may be smaller then len if we had to exit early because we |
| * ran out of room in the pages array or because we cross the |
| * max_out threshold. |
| * |
| * total_out is used to return the total number of compressed bytes |
| * |
| * max_out tells us the max number of bytes that we're allowed to |
| * stuff into pages |
| */ |
| int btrfs_zlib_compress_pages(struct address_space *mapping, |
| u64 start, unsigned long len, |
| struct page **pages, |
| unsigned long nr_dest_pages, |
| unsigned long *out_pages, |
| unsigned long *total_in, |
| unsigned long *total_out, |
| unsigned long max_out) |
| { |
| int ret; |
| struct workspace *workspace; |
| char *data_in; |
| char *cpage_out; |
| int nr_pages = 0; |
| struct page *in_page = NULL; |
| struct page *out_page = NULL; |
| int out_written = 0; |
| int in_read = 0; |
| unsigned long bytes_left; |
| |
| *out_pages = 0; |
| *total_out = 0; |
| *total_in = 0; |
| |
| workspace = find_zlib_workspace(); |
| if (!workspace) |
| return -1; |
| |
| if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
| printk(KERN_WARNING "deflateInit failed\n"); |
| ret = -1; |
| goto out; |
| } |
| |
| workspace->def_strm.total_in = 0; |
| workspace->def_strm.total_out = 0; |
| |
| in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); |
| data_in = kmap(in_page); |
| |
| out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
| cpage_out = kmap(out_page); |
| pages[0] = out_page; |
| nr_pages = 1; |
| |
| workspace->def_strm.next_in = data_in; |
| workspace->def_strm.next_out = cpage_out; |
| workspace->def_strm.avail_out = PAGE_CACHE_SIZE; |
| workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); |
| |
| out_written = 0; |
| in_read = 0; |
| |
| while (workspace->def_strm.total_in < len) { |
| ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); |
| if (ret != Z_OK) { |
| printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", |
| ret); |
| zlib_deflateEnd(&workspace->def_strm); |
| ret = -1; |
| goto out; |
| } |
| |
| /* we're making it bigger, give up */ |
| if (workspace->def_strm.total_in > 8192 && |
| workspace->def_strm.total_in < |
| workspace->def_strm.total_out) { |
| ret = -1; |
| goto out; |
| } |
| /* we need another page for writing out. Test this |
| * before the total_in so we will pull in a new page for |
| * the stream end if required |
| */ |
| if (workspace->def_strm.avail_out == 0) { |
| kunmap(out_page); |
| if (nr_pages == nr_dest_pages) { |
| out_page = NULL; |
| ret = -1; |
| goto out; |
| } |
| out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
| cpage_out = kmap(out_page); |
| pages[nr_pages] = out_page; |
| nr_pages++; |
| workspace->def_strm.avail_out = PAGE_CACHE_SIZE; |
| workspace->def_strm.next_out = cpage_out; |
| } |
| /* we're all done */ |
| if (workspace->def_strm.total_in >= len) |
| break; |
| |
| /* we've read in a full page, get a new one */ |
| if (workspace->def_strm.avail_in == 0) { |
| if (workspace->def_strm.total_out > max_out) |
| break; |
| |
| bytes_left = len - workspace->def_strm.total_in; |
| kunmap(in_page); |
| page_cache_release(in_page); |
| |
| start += PAGE_CACHE_SIZE; |
| in_page = find_get_page(mapping, |
| start >> PAGE_CACHE_SHIFT); |
| data_in = kmap(in_page); |
| workspace->def_strm.avail_in = min(bytes_left, |
| PAGE_CACHE_SIZE); |
| workspace->def_strm.next_in = data_in; |
| } |
| } |
| workspace->def_strm.avail_in = 0; |
| ret = zlib_deflate(&workspace->def_strm, Z_FINISH); |
| zlib_deflateEnd(&workspace->def_strm); |
| |
| if (ret != Z_STREAM_END) { |
| ret = -1; |
| goto out; |
| } |
| |
| if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { |
| ret = -1; |
| goto out; |
| } |
| |
| ret = 0; |
| *total_out = workspace->def_strm.total_out; |
| *total_in = workspace->def_strm.total_in; |
| out: |
| *out_pages = nr_pages; |
| if (out_page) |
| kunmap(out_page); |
| |
| if (in_page) { |
| kunmap(in_page); |
| page_cache_release(in_page); |
| } |
| free_workspace(workspace); |
| return ret; |
| } |
| |
| /* |
| * pages_in is an array of pages with compressed data. |
| * |
| * disk_start is the starting logical offset of this array in the file |
| * |
| * bvec is a bio_vec of pages from the file that we want to decompress into |
| * |
| * vcnt is the count of pages in the biovec |
| * |
| * srclen is the number of bytes in pages_in |
| * |
| * The basic idea is that we have a bio that was created by readpages. |
| * The pages in the bio are for the uncompressed data, and they may not |
| * be contiguous. They all correspond to the range of bytes covered by |
| * the compressed extent. |
| */ |
| int btrfs_zlib_decompress_biovec(struct page **pages_in, |
| u64 disk_start, |
| struct bio_vec *bvec, |
| int vcnt, |
| size_t srclen) |
| { |
| int ret = 0; |
| int wbits = MAX_WBITS; |
| struct workspace *workspace; |
| char *data_in; |
| size_t total_out = 0; |
| unsigned long page_bytes_left; |
| unsigned long page_in_index = 0; |
| unsigned long page_out_index = 0; |
| struct page *page_out; |
| unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / |
| PAGE_CACHE_SIZE; |
| unsigned long buf_start; |
| unsigned long buf_offset; |
| unsigned long bytes; |
| unsigned long working_bytes; |
| unsigned long pg_offset; |
| unsigned long start_byte; |
| unsigned long current_buf_start; |
| char *kaddr; |
| |
| workspace = find_zlib_workspace(); |
| if (!workspace) |
| return -ENOMEM; |
| |
| data_in = kmap(pages_in[page_in_index]); |
| workspace->inf_strm.next_in = data_in; |
| workspace->inf_strm.avail_in = min(srclen, PAGE_CACHE_SIZE); |
| workspace->inf_strm.total_in = 0; |
| |
| workspace->inf_strm.total_out = 0; |
| workspace->inf_strm.next_out = workspace->buf; |
| workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
| page_out = bvec[page_out_index].bv_page; |
| page_bytes_left = PAGE_CACHE_SIZE; |
| pg_offset = 0; |
| |
| /* If it's deflate, and it's got no preset dictionary, then |
| we can tell zlib to skip the adler32 check. */ |
| if (srclen > 2 && !(data_in[1] & PRESET_DICT) && |
| ((data_in[0] & 0x0f) == Z_DEFLATED) && |
| !(((data_in[0]<<8) + data_in[1]) % 31)) { |
| |
| wbits = -((data_in[0] >> 4) + 8); |
| workspace->inf_strm.next_in += 2; |
| workspace->inf_strm.avail_in -= 2; |
| } |
| |
| if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
| printk(KERN_WARNING "inflateInit failed\n"); |
| ret = -1; |
| goto out; |
| } |
| while(workspace->inf_strm.total_in < srclen) { |
| ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); |
| if (ret != Z_OK && ret != Z_STREAM_END) { |
| break; |
| } |
| |
| /* |
| * buf start is the byte offset we're of the start of |
| * our workspace buffer |
| */ |
| buf_start = total_out; |
| |
| /* total_out is the last byte of the workspace buffer */ |
| total_out = workspace->inf_strm.total_out; |
| |
| working_bytes = total_out - buf_start; |
| |
| /* |
| * start byte is the first byte of the page we're currently |
| * copying into relative to the start of the compressed data. |
| */ |
| start_byte = page_offset(page_out) - disk_start; |
| |
| if (working_bytes == 0) { |
| /* we didn't make progress in this inflate |
| * call, we're done |
| */ |
| if (ret != Z_STREAM_END) { |
| ret = -1; |
| } |
| break; |
| } |
| |
| /* we haven't yet hit data corresponding to this page */ |
| if (total_out <= start_byte) { |
| goto next; |
| } |
| |
| /* |
| * the start of the data we care about is offset into |
| * the middle of our working buffer |
| */ |
| if (total_out > start_byte && buf_start < start_byte) { |
| buf_offset = start_byte - buf_start; |
| working_bytes -= buf_offset; |
| } else { |
| buf_offset = 0; |
| } |
| current_buf_start = buf_start; |
| |
| /* copy bytes from the working buffer into the pages */ |
| while(working_bytes > 0) { |
| bytes = min(PAGE_CACHE_SIZE - pg_offset, |
| PAGE_CACHE_SIZE - buf_offset); |
| bytes = min(bytes, working_bytes); |
| kaddr = kmap_atomic(page_out, KM_USER0); |
| memcpy(kaddr + pg_offset, workspace->buf + buf_offset, |
| bytes); |
| kunmap_atomic(kaddr, KM_USER0); |
| flush_dcache_page(page_out); |
| |
| pg_offset += bytes; |
| page_bytes_left -= bytes; |
| buf_offset += bytes; |
| working_bytes -= bytes; |
| current_buf_start += bytes; |
| |
| /* check if we need to pick another page */ |
| if (page_bytes_left == 0) { |
| page_out_index++; |
| if (page_out_index >= vcnt) { |
| ret = 0; |
| goto done; |
| } |
| page_out = bvec[page_out_index].bv_page; |
| pg_offset = 0; |
| page_bytes_left = PAGE_CACHE_SIZE; |
| start_byte = page_offset(page_out) - disk_start; |
| |
| /* |
| * make sure our new page is covered by this |
| * working buffer |
| */ |
| if (total_out <= start_byte) { |
| goto next; |
| } |
| |
| /* the next page in the biovec might not |
| * be adjacent to the last page, but it |
| * might still be found inside this working |
| * buffer. bump our offset pointer |
| */ |
| if (total_out > start_byte && |
| current_buf_start < start_byte) { |
| buf_offset = start_byte - buf_start; |
| working_bytes = total_out - start_byte; |
| current_buf_start = buf_start + |
| buf_offset; |
| } |
| } |
| } |
| next: |
| workspace->inf_strm.next_out = workspace->buf; |
| workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
| |
| if (workspace->inf_strm.avail_in == 0) { |
| unsigned long tmp; |
| kunmap(pages_in[page_in_index]); |
| page_in_index++; |
| if (page_in_index >= total_pages_in) { |
| data_in = NULL; |
| break; |
| } |
| data_in = kmap(pages_in[page_in_index]); |
| workspace->inf_strm.next_in = data_in; |
| tmp = srclen - workspace->inf_strm.total_in; |
| workspace->inf_strm.avail_in = min(tmp, |
| PAGE_CACHE_SIZE); |
| } |
| } |
| if (ret != Z_STREAM_END) { |
| ret = -1; |
| } else { |
| ret = 0; |
| } |
| done: |
| zlib_inflateEnd(&workspace->inf_strm); |
| if (data_in) |
| kunmap(pages_in[page_in_index]); |
| out: |
| free_workspace(workspace); |
| return ret; |
| } |
| |
| /* |
| * a less complex decompression routine. Our compressed data fits in a |
| * single page, and we want to read a single page out of it. |
| * start_byte tells us the offset into the compressed data we're interested in |
| */ |
| int btrfs_zlib_decompress(unsigned char *data_in, |
| struct page *dest_page, |
| unsigned long start_byte, |
| size_t srclen, size_t destlen) |
| { |
| int ret = 0; |
| int wbits = MAX_WBITS; |
| struct workspace *workspace; |
| unsigned long bytes_left = destlen; |
| unsigned long total_out = 0; |
| char *kaddr; |
| |
| if (destlen > PAGE_CACHE_SIZE) |
| return -ENOMEM; |
| |
| workspace = find_zlib_workspace(); |
| if (!workspace) |
| return -ENOMEM; |
| |
| workspace->inf_strm.next_in = data_in; |
| workspace->inf_strm.avail_in = srclen; |
| workspace->inf_strm.total_in = 0; |
| |
| workspace->inf_strm.next_out = workspace->buf; |
| workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
| workspace->inf_strm.total_out = 0; |
| /* If it's deflate, and it's got no preset dictionary, then |
| we can tell zlib to skip the adler32 check. */ |
| if (srclen > 2 && !(data_in[1] & PRESET_DICT) && |
| ((data_in[0] & 0x0f) == Z_DEFLATED) && |
| !(((data_in[0]<<8) + data_in[1]) % 31)) { |
| |
| wbits = -((data_in[0] >> 4) + 8); |
| workspace->inf_strm.next_in += 2; |
| workspace->inf_strm.avail_in -= 2; |
| } |
| |
| if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
| printk(KERN_WARNING "inflateInit failed\n"); |
| ret = -1; |
| goto out; |
| } |
| |
| while(bytes_left > 0) { |
| unsigned long buf_start; |
| unsigned long buf_offset; |
| unsigned long bytes; |
| unsigned long pg_offset = 0; |
| |
| ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); |
| if (ret != Z_OK && ret != Z_STREAM_END) { |
| break; |
| } |
| |
| buf_start = total_out; |
| total_out = workspace->inf_strm.total_out; |
| |
| if (total_out == buf_start) { |
| ret = -1; |
| break; |
| } |
| |
| if (total_out <= start_byte) { |
| goto next; |
| } |
| |
| if (total_out > start_byte && buf_start < start_byte) { |
| buf_offset = start_byte - buf_start; |
| } else { |
| buf_offset = 0; |
| } |
| |
| bytes = min(PAGE_CACHE_SIZE - pg_offset, |
| PAGE_CACHE_SIZE - buf_offset); |
| bytes = min(bytes, bytes_left); |
| |
| kaddr = kmap_atomic(dest_page, KM_USER0); |
| memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); |
| kunmap_atomic(kaddr, KM_USER0); |
| |
| pg_offset += bytes; |
| bytes_left -= bytes; |
| next: |
| workspace->inf_strm.next_out = workspace->buf; |
| workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
| } |
| if (ret != Z_STREAM_END && bytes_left != 0) { |
| ret = -1; |
| } else { |
| ret = 0; |
| } |
| zlib_inflateEnd(&workspace->inf_strm); |
| out: |
| free_workspace(workspace); |
| return ret; |
| } |
| |
| void btrfs_zlib_exit(void) |
| { |
| free_workspaces(); |
| } |