diff -Naur linux-2.6.0-udm3/drivers/md/Kconfig linux-2.6.0-evms-2.2.2/drivers/md/Kconfig --- linux-2.6.0-udm3/drivers/md/Kconfig 2004-01-05 12:12:23.000000000 -0600 +++ linux-2.6.0-evms-2.2.2/drivers/md/Kconfig 2004-01-05 12:12:00.000000000 -0600 @@ -191,5 +191,20 @@ If unsure, say N. +config BLK_DEV_DM_SPARSE + tristate "Sparse Device Target (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + Support for sparse devices. Allows for creation of objects much + larger than physical space you actually have by storing in sparse + format. + + *** This module is for testing purposes only! *** + + To compile this as a module, choose M here: the module will be + called dm-sparse. + + If unsure, say N. + endmenu diff -Naur linux-2.6.0-udm3/drivers/md/Makefile linux-2.6.0-evms-2.2.2/drivers/md/Makefile --- linux-2.6.0-udm3/drivers/md/Makefile 2004-01-05 12:12:23.000000000 -0600 +++ linux-2.6.0-evms-2.2.2/drivers/md/Makefile 2004-01-05 12:12:00.000000000 -0600 @@ -29,3 +29,4 @@ obj-$(CONFIG_DM_FLAKEY) += dm-flakey.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_BLK_DEV_DM_BBR) += dm-bbr.o +obj-$(CONFIG_BLK_DEV_DM_SPARSE) += dm-sparse.o diff -Naur linux-2.6.0-udm3/drivers/md/dm-sparse.c linux-2.6.0-evms-2.2.2/drivers/md/dm-sparse.c --- linux-2.6.0-udm3/drivers/md/dm-sparse.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.0-evms-2.2.2/drivers/md/dm-sparse.c 2004-01-05 12:12:00.000000000 -0600 @@ -0,0 +1,717 @@ +/* -*- linux-c -*- */ + +/* + * Copyright (c) International Business Machines Corp., 2002 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * linux/drivers/md/dm-sparse.c + * + * Sparse target for device-mapper. + * + * This target provides the ability to create a sparse device. This + * allows a device to pretend to be larger than it really is. + */ + +#include +#include +#include +#include +#include +#include + +#include "dm.h" +#include "dm-io.h" + +#define MAX_HASH_CHAIN_ENTRIES 10 +#define NAME_SIZE 127 +#define SECTOR_SIZE (1 << SECTOR_SHIFT) + +/* Sparse Ioctl + device + start + chunk_size + chunks + */ + +// Entries in the sparse remapping structure +struct sparse_hash_entry { + u64 org_chunk; // Chunk number, not LBA. + u64 sparse_chunk; // Chunk number, not LBA. + struct sparse_hash_entry * next; + struct sparse_hash_entry * prev; +}; + +//Private data structure +struct sparse_volume { + struct dm_dev *dev; + struct rw_semaphore sparse_semaphore; + struct sparse_hash_entry ** sparse_map; // Hash table of remappings + struct sparse_hash_entry * free_hash_list; + kmem_cache_t * hash_slab; + mempool_t * hash_pool; + u32 dm_io_flag; + u32 chunk_size; // Sectors. + u32 chunk_shift; // Shift value for chunk size. + u32 num_chunks; // In this volume. + u32 next_cow_entry; // Index into current COW table. + u64 current_cow_sector; // LOGICAL sector of current COW table. + u32 next_free_chunk; // Index of next free chunk (not LBA!). + u32 hash_table_size; // Size of the hash table for the remap. + u64 start; + u64 cow_table[64]; // One sector's worth of COW tables. +}; + +/*************************** OLD SERVICES ****************************/ + +/* computes log base 2 of value */ +inline int log2(u32 value) //ok to change to u32? +{ + int result = -1; + long tmp; //ok to change to long? + + if (value) { + tmp = value; + result++; + while (!(tmp & 1)) { + result++; + tmp >>= 1; + } + if (tmp != 1) { + result = -2; + } + } + return result; +} + +/********************************* Functions *********************************/ + +/***************************** Hash Functions *****************************/ + +/* Take and initialize from the free hash list */ +static struct sparse_hash_entry * +allocate_sparse_hash_entry( struct sparse_volume * volume, + u64 org_chunk, + u64 sparse_chunk ) +{ + struct sparse_hash_entry * hash_entry; + + hash_entry = volume->free_hash_list; + if ( hash_entry ) { //should always be the case b/c preallocate these + volume->free_hash_list = hash_entry->next; + hash_entry->org_chunk = org_chunk; + hash_entry->sparse_chunk = sparse_chunk; + hash_entry->next = NULL; + hash_entry->prev = NULL; + } + + return hash_entry; +} + +/* + * This function inserts a new entry into a sparse hash chain, immediately + * following the specified entry. This function should not be used to add + * an entry into an empty list, or as the first entry in an existing list. + * For that case, use insert_sparse_map_entry_at_head(). + */ +static int insert_sparse_hash_entry( struct sparse_hash_entry * entry, + struct sparse_hash_entry * base ) +{ + entry->next = base->next; + entry->prev = base; + base->next = entry; + if ( entry->next ) { + entry->next->prev = entry; + } + return 0; +} + +/* + * This function inserts a new entry into a sparse chain as the first + * entry in the chain. + */ +static int insert_sparse_hash_entry_at_head( struct sparse_hash_entry * entry, + struct sparse_hash_entry ** head ) +{ + entry->next = *head; + entry->prev = NULL; + *head = entry; + if ( entry->next ) { + entry->next->prev = entry; + } + return 0; +} + +/* + * Delete all items in a single chain in the hash table. + */ +static int delete_sparse_hash_chain( struct sparse_volume * vol, + struct sparse_hash_entry * head ) +{ + struct sparse_hash_entry * next; + + while ( head ) { + next = head->next; + mempool_free( head, vol->hash_pool ); + head = next; + } + return 0; +} + +/* + * This function will search the hash chain that is anchored at the + * specified head pointer. If the chunk number is found, a pointer to that + * entry in the chain is set, and a 1 is returned. If the chunk is not + * found, a pointer to the previous entry is set and 0 is returned. If the + * return pointer is NULL, this means either the list is empty, or the + * specified sector should become the first list item. + */ +static int search_sparse_hash_chain( u64 chunk, + struct sparse_hash_entry * head, + struct sparse_hash_entry ** result ) +{ + struct sparse_hash_entry * curr = head; + struct sparse_hash_entry * prev = head; + while ( curr && curr->org_chunk < chunk ) { + prev = curr; + curr = curr->next; + } + if (!curr) { // Either an empty chain or went off the end of the chain. + *result = prev; + return 0; + } + else if ( curr->org_chunk != chunk ) { + *result = curr->prev; + return 0; + } + else { + *result = curr; + return 1; + } +} + +/* + * This function takes a cow table entry (from the on-disk data), and + * converts it into an appropriate entry for the sparse map, and + * inserts it into the appropriate map for the specified volume. + */ +static int add_cow_entry_to_sparse_map( u64 org_chunk, + u64 sparse_chunk, + struct sparse_volume * volume ) +{ + struct sparse_hash_entry * new_entry; + struct sparse_hash_entry * target_entry; + u32 hash_value; + int rc = -EINVAL; + + new_entry = allocate_sparse_hash_entry(volume, org_chunk, sparse_chunk); + if (!new_entry) { + return -ENOMEM; + } + + hash_value = (long)org_chunk % volume->hash_table_size; + + if (! search_sparse_hash_chain( org_chunk, + volume->sparse_map[hash_value], + &target_entry ) ) { + //should always take this path + + if ( target_entry ) { + insert_sparse_hash_entry( new_entry, target_entry ); + } + else { + insert_sparse_hash_entry_at_head + ( new_entry, &(volume->sparse_map[hash_value]) ); + } + rc = 0; + } + return rc; +} + +/* + * Construct the initial hash table state based on + * existing COW tables on the disk. + */ +static int build_sparse_maps(struct sparse_volume * volume) +{ + int rc = 0, done = 0; + struct io_region job; + struct page * page; + unsigned long error, offset; + + while (!done) { + + // Read in one sector's worth of COW tables. + job.bdev = volume->dev->bdev; + job.sector = volume->current_cow_sector; + job.count = 1; + page = virt_to_page(volume->cow_table); + offset = (unsigned long)volume->cow_table & ~PAGE_MASK; + rc = dm_io_sync(1, &job, READ, page, offset, &error); + if (rc) { + return rc; + } + + // Translate every valid COW table entry into + // a sparse map entry. + for ( volume->next_cow_entry = 0; + + volume->next_cow_entry < (SECTOR_SIZE/sizeof(u64)) && + volume->cow_table[volume->next_cow_entry] != + 0xffffffffffffffff; + + volume->next_cow_entry++, volume->next_free_chunk++ ) { + + if ( (rc = add_cow_entry_to_sparse_map + ( le64_to_cpu( volume->cow_table[volume->next_cow_entry] ), + volume->next_free_chunk, volume ))) { + return( rc ); + } + } + // Move on to the next sector if necessary. + if ( volume->next_cow_entry == (SECTOR_SIZE/sizeof(u64)) ) { + volume->current_cow_sector++; + } + else { + done = 1; + } + } + return 0; +} + +/************************* Other Functions ************************/ + +/* + * Function: sparse_remap_chunk + * + * This function performs a sector remap on a sparse volume. This should + * be called from the I/O path, It first determines the base sector + * of the chunk containing the specified sector, and saves the remainder. + * Then it performs a search through the sparse map for the specified + * volume. If a match is found, the sector number is changed to the new + * value. If no match is found, the value is left the same, meaning the + * chunk has not been remapped. + */ +static int sparse_remap_chunk( struct sparse_volume * sparse_volume, + u64 * sector ) +{ + struct sparse_hash_entry * result; + u64 chunk; + u32 hash_value; + u32 remainder; + int rc = 1; + + down_read(&sparse_volume->sparse_semaphore); + + remainder = *sector & (u64)(sparse_volume->chunk_size - 1); + chunk = *sector >> sparse_volume->chunk_shift; + hash_value = ((u32)chunk) % sparse_volume->hash_table_size; + + if ( search_sparse_hash_chain( chunk, + sparse_volume->sparse_map[hash_value], + &result) ) { + *sector = ( result->sparse_chunk << sparse_volume->chunk_shift ) + + remainder; + rc = 0; + } + up_read(&sparse_volume->sparse_semaphore); + return rc; +} + +/* Function: sparse_cow_write + * + * Check this sparse node to see if the given sector/chunk has been + * remapped yet. If it hasn't, create a new hash table entry, update the + * in-memory COW table, write the COW table to disk. + */ + +static int sparse_cow_write( struct sparse_volume * sparse_volume, + u64 * sector ) +{ + struct sparse_hash_entry * target_entry, * new_map_entry; + struct io_region job; + struct page * page; + char * cow = NULL; + unsigned long error, offset; + u64 chunk; + u32 hash_value = 0; + u32 remainder; + int rc; + + down_write(&sparse_volume->sparse_semaphore); + + remainder = *sector & (u64)(sparse_volume->chunk_size - 1); + chunk = *sector >> sparse_volume->chunk_shift; + hash_value = ((u32)chunk) % sparse_volume->hash_table_size; + + if ( search_sparse_hash_chain( chunk, + sparse_volume->sparse_map[hash_value], + &target_entry) ) { + *sector = + ( target_entry->sparse_chunk << sparse_volume->chunk_shift ) + + remainder; + rc = 0; + goto out; + } + + // Is there enough room left on this sparse to remap this chunk? + if ( sparse_volume->next_free_chunk >= sparse_volume->num_chunks ) { + DMERR("dm-sparse: full no new remaps allowed\n"); + rc = -ENOSPC; + goto out; + } + + // Create and initialize a new hash table entry for the new remap. + new_map_entry = allocate_sparse_hash_entry + (sparse_volume, chunk, sparse_volume->next_free_chunk); + if ( ! new_map_entry ) { + // Can't get memory for map entry. Disable this sparse. + DMERR("dm-sparse: memory error allocating hash entry\n"); + rc = -ENOMEM; + goto out; + } + + //Always write cow table so its safe + cow = kmalloc( SECTOR_SIZE, GFP_KERNEL ); + if (! cow ) { + // Can't get I/O buffer. Disable this sparse. + DMERR("dm-sparse: memory error allocating COW table buffer"); + rc = -ENOMEM; + goto out; + } + + // Add the entry to the hash table. + if ( target_entry ) { + insert_sparse_hash_entry( new_map_entry, target_entry ); + } + else { + insert_sparse_hash_entry_at_head + ( new_map_entry, + &(sparse_volume->sparse_map[hash_value]) ); + } + + sparse_volume->next_free_chunk++; + + // Update the appropriate entry in the COW table. + sparse_volume->cow_table[sparse_volume->next_cow_entry] = + cpu_to_le64(chunk); + sparse_volume->next_cow_entry++; + + memcpy(cow, sparse_volume->cow_table, SECTOR_SIZE); + + //because of ordering issues needs to be synchronous + job.bdev = sparse_volume->dev->bdev; + job.sector = sparse_volume->current_cow_sector; + job.count = 1; + page = virt_to_page(cow); + offset = (unsigned long)cow & ~PAGE_MASK; + dm_io_sync(1, &job, WRITE, page, offset, &error); + + // Update the in-memory COW table values. + if ( sparse_volume->next_cow_entry >= (SECTOR_SIZE/sizeof(u64)) ) + { + sparse_volume->next_cow_entry = 0; + sparse_volume->current_cow_sector++; + memset(sparse_volume->cow_table, 0xff, SECTOR_SIZE); + } + + *sector = ( new_map_entry->sparse_chunk << sparse_volume->chunk_shift ) + + remainder; + + rc = 0; + + out: + up_write(&sparse_volume->sparse_semaphore); + if ( cow ) { + kfree( cow ); + } + + return rc; +} + +/************************ EXPORT FUNCTIONS ************************/ + +/* + * Function: sparse_dtr + */ +static void sparse_dtr( struct dm_target *ti ) +{ + struct sparse_volume * vol = (struct sparse_volume *)ti->private; + int i; + + if (vol) { + + if (vol->sparse_map) { + for ( i = 0; i < vol->hash_table_size; i++ ) { + delete_sparse_hash_chain( vol, vol->sparse_map[i] ); + } + delete_sparse_hash_chain( vol, vol->free_hash_list ); + vfree(vol->sparse_map); + } + + if (vol->hash_pool) + mempool_destroy(vol->hash_pool); + + if (vol->hash_slab) + kmem_cache_destroy(vol->hash_slab); + + dm_put_device(ti, vol->dev); + + if (vol->dm_io_flag) { + dm_io_put(1); + } + + kfree( vol ); + } +} + +/* + * Function: sparse_ctr + */ +static int sparse_ctr( struct dm_target *ti, unsigned int argc, char** argv ) +{ + int i, rc = -EINVAL; + struct sparse_hash_entry *new_entry; + struct sparse_volume *vol; + struct dm_dev *dev; + u32 chunk_size, chunks; + u64 start; + char* end, slab_name[NAME_SIZE+1]; + + if ( argc != 4 ) { + ti->error="dm-sparse: wrong number of arguments"; + return rc; + } + + start = simple_strtoull(argv[1], &end, 10); + if (*end) { + ti->error="dm-sparse: Invalid first chunk lba"; + return rc; + } + + chunk_size = simple_strtoul(argv[2], &end, 10); + if (*end) { + ti->error="dm-sparse: Invalid chunk_size"; + return rc; + } + + chunks = simple_strtoul(argv[3], &end, 10); + if (*end) { + ti->error="dm-sparse: Invalid number of chunks"; + return rc; + } + + if ( dm_get_device( ti, argv[0], ti->begin, start + chunks * chunk_size, + dm_table_get_mode(ti->table), &dev ) ) { + ti->error = "dm-sparse: Device lookup failed"; + return rc; + } + + vol = kmalloc(sizeof(struct sparse_volume), GFP_KERNEL); + if ( !vol ) { + ti->error = "dm-sparse: Memory allocation for private-data failed"; + rc = -ENOMEM; + goto out; + } + + memset( vol, 0, sizeof(struct sparse_volume) ); + + rc = dm_io_get(1); + if (rc) { + ti->error = "dm-sparse: failed to initialize dm-io."; + sparse_dtr(ti); + return rc; + } + + // Initialize + vol->dm_io_flag = 1; + vol->chunk_size = chunk_size; + vol->chunk_shift = log2(chunk_size); + vol->num_chunks = chunks; + vol->current_cow_sector = 1; + vol->hash_table_size = chunks / MAX_HASH_CHAIN_ENTRIES + 1; + vol->start = start; + vol->dev = dev; + init_rwsem(&vol->sparse_semaphore); + + snprintf(slab_name, NAME_SIZE, "sparse-%p", vol); + vol->hash_slab = kmem_cache_create(slab_name, + sizeof(struct sparse_hash_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if ( ! vol->hash_slab ) { + ti->error = "dm-sparse: memory allocation error in hash slab create"; + sparse_dtr(ti); + return -ENOMEM; + } + vol->hash_pool = mempool_create(1, mempool_alloc_slab, + mempool_free_slab, + vol->hash_slab); + if ( ! vol->hash_pool ) { + ti->error = "dm-sparse: memory allocation error in hash pool create"; + sparse_dtr(ti); + return -ENOMEM; + } + + // Sparse hash table + vol->sparse_map = vmalloc( vol->hash_table_size * + sizeof( struct sparse_hash_entry * ) ); + if ( ! vol->sparse_map ) { + ti->error = "dm-sparse: Memory allocation error in sparse_map create"; + sparse_dtr(ti); + return -ENOMEM; + } + + memset( vol->sparse_map, 0, vol->hash_table_size * + sizeof( struct sparse_hash_entry * ) ); + + for ( i = 0; i < chunks; i++ ) { + + new_entry = mempool_alloc(vol->hash_pool, GFP_KERNEL ); + if ( ! new_entry ) { + ti->error="dm-sparse: memory allocation error in hash table setup"; + sparse_dtr(ti); + return -ENOMEM; + } + + new_entry->next = vol->free_hash_list; + vol->free_hash_list = new_entry; + } + + rc = build_sparse_maps(vol); + if (rc) { + ti->error = "dm-sparse: error building hash tables"; + sparse_dtr(ti); + return rc; + } + + ti->split_io = chunk_size; + ti->private = vol; + return rc; + + out: + dm_put_device(ti, dev); + return rc; +} + +/* + * Function: sparse_map + */ +static int sparse_map( struct dm_target * ti, struct bio * bio, + union map_info *map_context ) +{ + struct sparse_volume * volume = (struct sparse_volume*)ti->private; + struct bio_vec * vec; + u64 sector = bio->bi_sector; + int i, rc; + + // Check if this sector has been remapped + rc = sparse_remap_chunk( volume, §or ); + + if ( rc < 0 ) { //Error + return rc; + } + + if ( rc == 0 ) { // Remapped I/O : read or write same logic + bio->bi_sector = volume->start + sector; + bio->bi_bdev = volume->dev->bdev; + return 1; + } + + // ( Previously )Un-mapped: read / write different logic + + if ( bio_data_dir(bio) == WRITE ) { //write : + rc = sparse_cow_write( volume, §or ); + + if ( rc < 0 ) { //Error + return rc; + } + //Send write on + bio->bi_sector = volume->start + sector; + bio->bi_bdev = volume->dev->bdev; + return 1; + } + + //Reading something that was never written + //return zeros and indicate complete + bio_for_each_segment( vec, bio, i ) { + memset( ( page_address(vec->bv_page) + vec->bv_offset ), + 0x0, vec->bv_len ); + } + bio_endio(bio, bio->bi_size, 0); + return 0; +} + +static int sparse_status( struct dm_target *ti, status_type_t type, + char *result, unsigned int maxlen ) +{ + struct sparse_volume * vol = (struct sparse_volume * )ti->private; + char b[BDEVNAME_SIZE]; + + switch(type) { + + case STATUSTYPE_INFO: + snprintf( result, maxlen, "%d%%", + ( vol->next_free_chunk * 100 ) / vol->num_chunks ); + break; + + case STATUSTYPE_TABLE: + snprintf( result, maxlen, "%s %Lu %u %u", + format_dev_t(b, vol->dev->bdev->bd_dev), vol->start, + vol->chunk_size, vol->num_chunks ); + break; + + default: + break; + } + + return 0; +} + +/****************** FUNCTION TABLE **********************/ + +static struct target_type sparse_target = { + .name = "sparse", + .version = {1, 0, 1}, + .module = THIS_MODULE, + .ctr = sparse_ctr, + .dtr = sparse_dtr, + .map = sparse_map, + .status = sparse_status, +}; + +/********************* REGISTRATION *****************/ + +int __init sparse_init(void) +{ + int rc = dm_register_target(&sparse_target); + + if ( rc < 0 ) + DMWARN("sparse target registration failed"); + + return rc; +} + +void __exit sparse_exit(void) +{ + if (dm_unregister_target(&sparse_target) ) + DMWARN("sparse target unregistration failed"); + + return; +} + +module_init(sparse_init); +module_exit(sparse_exit); +MODULE_LICENSE("GPL");