diff -Naur linux-2.4.20-dm-10/drivers/md/Config.in linux-2.4.20-evms-2.0.1/drivers/md/Config.in --- linux-2.4.20-dm-10/drivers/md/Config.in 2003-04-28 12:01:54.000000000 -0500 +++ linux-2.4.20-evms-2.0.1/drivers/md/Config.in 2003-04-28 12:01:17.000000000 -0500 @@ -17,6 +17,7 @@ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then dep_tristate ' Device-mapper support (EXPERIMENTAL)' CONFIG_BLK_DEV_DM $CONFIG_MD dep_tristate ' Bad Block Relocation Device Target' CONFIG_BLK_DEV_DM_BBR $CONFIG_BLK_DEV_DM + dep_tristate ' Sparse Device Target' CONFIG_BLK_DEV_DM_SPARSE $CONFIG_BLK_DEV_DM fi endmenu diff -Naur linux-2.4.20-dm-10/drivers/md/Makefile linux-2.4.20-evms-2.0.1/drivers/md/Makefile --- linux-2.4.20-dm-10/drivers/md/Makefile 2003-04-28 12:01:54.000000000 -0500 +++ linux-2.4.20-evms-2.0.1/drivers/md/Makefile 2003-04-28 12:01:17.000000000 -0500 @@ -26,6 +26,7 @@ obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_BLK_DEV_DM_BBR) += dm-bbr.o dm-io.o +obj-$(CONFIG_BLK_DEV_DM_SPARSE) += dm-sparse.o dm-io.o include $(TOPDIR)/Rules.make diff -Naur linux-2.4.20-dm-10/drivers/md/dm-sparse.c linux-2.4.20-evms-2.0.1/drivers/md/dm-sparse.c --- linux-2.4.20-dm-10/drivers/md/dm-sparse.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.4.20-evms-2.0.1/drivers/md/dm-sparse.c 2003-04-28 12:01:17.000000000 -0500 @@ -0,0 +1,713 @@ +/* -*- linux-c -*- */ + +/* + * Copyright (c) International Business Machines Corp., 2002 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * linux/drivers/md/dm-sparse.c + * + * Sparse target for device-mapper. + * + * This target provides the ability to create a sparse device. This + * allows a device to pretend to be larger than it really is. + */ + +#include "dm.h" +#include "syncio.h" + +#include +#include +#include +#include +#include +#include + +#define MAX_HASH_CHAIN_ENTRIES 10 +#define NAME_SIZE 127 + +/* Sparse Ioctl + device + start + chunk_size + chunks + */ + +// Entries in the sparse remapping structure +struct sparse_hash_entry { + u64 org_chunk; // Chunk number, not LBA. + u64 sparse_chunk; // Chunk number, not LBA. + struct sparse_hash_entry * next; + struct sparse_hash_entry * prev; +}; + +//Private data structure +struct sparse_volume { + struct dm_dev *dev; + struct sync_io_handle *syncio_handle; + struct rw_semaphore sparse_semaphore; + struct sparse_hash_entry ** sparse_map; // Hash table of remappings + struct sparse_hash_entry * free_hash_list; + kmem_cache_t * hash_slab; + mempool_t * hash_pool; + u32 chunk_size; // Sectors. + u32 chunk_shift; // Shift value for chunk size. + u32 num_chunks; // In this volume. + u32 next_cow_entry; // Index into current COW table. + u64 current_cow_sector; // LOGICAL sector of current COW table. + u32 next_free_chunk; // Index of next free chunk (not LBA!). + u32 hash_table_size; // Size of the hash table for the remap. + u64 start; + u64 cow_table[64]; // One sector's worth of COW tables. +}; + +/*************************** OLD SERVICES ****************************/ + +/* computes log base 2 of value */ +inline int log2(u32 value) //ok to change to u32? +{ + int result = -1; + long tmp; //ok to change to long? + + if (value) { + tmp = value; + result++; + while (!(tmp & 1)) { + result++; + tmp >>= 1; + } + if (tmp != 1) { + result = -2; + } + } + return result; +} + +/********************************* Functions *********************************/ + +/***************************** Hash Functions *****************************/ + +/* Take and initialize from the free hash list */ +static struct sparse_hash_entry * +allocate_sparse_hash_entry( struct sparse_volume * volume, + u64 org_chunk, + u64 sparse_chunk ) +{ + struct sparse_hash_entry * hash_entry; + + hash_entry = volume->free_hash_list; + if ( hash_entry ) { //should always be the case b/c preallocate these + volume->free_hash_list = hash_entry->next; + hash_entry->org_chunk = org_chunk; + hash_entry->sparse_chunk = sparse_chunk; + hash_entry->next = NULL; + hash_entry->prev = NULL; + } + + return hash_entry; +} + +/* + * This function inserts a new entry into a sparse hash chain, immediately + * following the specified entry. This function should not be used to add + * an entry into an empty list, or as the first entry in an existing list. + * For that case, use insert_sparse_map_entry_at_head(). + */ +static int insert_sparse_hash_entry( struct sparse_hash_entry * entry, + struct sparse_hash_entry * base ) +{ + entry->next = base->next; + entry->prev = base; + base->next = entry; + if ( entry->next ) { + entry->next->prev = entry; + } + return 0; +} + +/* + * This function inserts a new entry into a sparse chain as the first + * entry in the chain. + */ +static int insert_sparse_hash_entry_at_head( struct sparse_hash_entry * entry, + struct sparse_hash_entry ** head ) +{ + entry->next = *head; + entry->prev = NULL; + *head = entry; + if ( entry->next ) { + entry->next->prev = entry; + } + return 0; +} + +/* + * Delete all items in a single chain in the hash table. + */ +static int delete_sparse_hash_chain( struct sparse_volume * vol, + struct sparse_hash_entry * head ) +{ + struct sparse_hash_entry * next; + + while ( head ) { + next = head->next; + mempool_free( head, vol->hash_pool ); + head = next; + } + return 0; +} + +/* + * This function will search the hash chain that is anchored at the + * specified head pointer. If the chunk number is found, a pointer to that + * entry in the chain is set, and a 1 is returned. If the chunk is not + * found, a pointer to the previous entry is set and 0 is returned. If the + * return pointer is NULL, this means either the list is empty, or the + * specified sector should become the first list item. + */ +static int search_sparse_hash_chain( u64 chunk, + struct sparse_hash_entry * head, + struct sparse_hash_entry ** result ) +{ + struct sparse_hash_entry * curr = head; + struct sparse_hash_entry * prev = head; + while ( curr && curr->org_chunk < chunk ) { + prev = curr; + curr = curr->next; + } + if (!curr) { // Either an empty chain or went off the end of the chain. + *result = prev; + return 0; + } + else if ( curr->org_chunk != chunk ) { + *result = curr->prev; + return 0; + } + else { + *result = curr; + return 1; + } +} + +/* + * This function takes a cow table entry (from the on-disk data), and + * converts it into an appropriate entry for the sparse map, and + * inserts it into the appropriate map for the specified volume. + */ +static int add_cow_entry_to_sparse_map( u64 org_chunk, + u64 sparse_chunk, + struct sparse_volume * volume ) +{ + struct sparse_hash_entry * new_entry; + struct sparse_hash_entry * target_entry; + u32 hash_value; + int rc = -EINVAL; + + new_entry = allocate_sparse_hash_entry(volume, org_chunk, sparse_chunk); + if (!new_entry) { + return -ENOMEM; + } + + hash_value = (long)org_chunk % volume->hash_table_size; + + if (! search_sparse_hash_chain( org_chunk, + volume->sparse_map[hash_value], + &target_entry ) ) { + //should always take this path + + if ( target_entry ) { + insert_sparse_hash_entry( new_entry, target_entry ); + } + else { + insert_sparse_hash_entry_at_head + ( new_entry, &(volume->sparse_map[hash_value]) ); + } + rc = 0; + } + return rc; +} + +/* + * Construct the initial hash table state based on + * existing COW tables on the disk. + */ +static int build_sparse_maps(struct sparse_volume * volume) +{ + int rc = 0, done = 0; + struct sync_io_job job; + + while (!done) { + + // Read in one sector's worth of COW tables. + job.handle = volume->syncio_handle; + job.dev = volume->dev->dev; + job.rw = 0; + job.start_lsn = volume->current_cow_sector; + job.num_lsns = 1; + job.data = volume->cow_table; + if ( sync_io(&job) ) { + return -EIO; + } + + // Translate every valid COW table entry into + // a sparse map entry. + for ( volume->next_cow_entry = 0; + + volume->next_cow_entry < (SECTOR_SIZE/sizeof(u64)) && + volume->cow_table[volume->next_cow_entry] != + 0xffffffffffffffff; + + volume->next_cow_entry++, volume->next_free_chunk++ ) { + + if ( (rc = add_cow_entry_to_sparse_map + ( le64_to_cpu( volume->cow_table[volume->next_cow_entry] ), + volume->next_free_chunk, volume ))) { + return( rc ); + } + } + // Move on to the next sector if necessary. + if ( volume->next_cow_entry == (SECTOR_SIZE/sizeof(u64)) ) { + volume->current_cow_sector++; + } + else { + done = 1; + } + } + return 0; +} + +/************************* Other Functions ************************/ + +/* + * Function: sparse_remap_chunk + * + * This function performs a sector remap on a sparse volume. This should + * be called from the I/O path, It first determines the base sector + * of the chunk containing the specified sector, and saves the remainder. + * Then it performs a search through the sparse map for the specified + * volume. If a match is found, the sector number is changed to the new + * value. If no match is found, the value is left the same, meaning the + * chunk has not been remapped. + */ +static int sparse_remap_chunk( struct sparse_volume * sparse_volume, + u64 * sector ) +{ + struct sparse_hash_entry * result; + u64 chunk; + u32 hash_value; + u32 remainder; + int rc = 1; + + down_read(&sparse_volume->sparse_semaphore); + + remainder = *sector & (u64)(sparse_volume->chunk_size - 1); + chunk = *sector >> sparse_volume->chunk_shift; + hash_value = ((u32)chunk) % sparse_volume->hash_table_size; + + if ( search_sparse_hash_chain( chunk, + sparse_volume->sparse_map[hash_value], + &result) ) { + *sector = ( result->sparse_chunk << sparse_volume->chunk_shift ) + + remainder; + rc = 0; + } + up_read(&sparse_volume->sparse_semaphore); + return rc; +} + +/* Function: sparse_cow_write + * + * Check this sparse node to see if the given sector/chunk has been + * remapped yet. If it hasn't, create a new hash table entry, update the + * in-memory COW table, write the COW table to disk. + */ + +static int sparse_cow_write( struct sparse_volume * sparse_volume, + u64 * sector ) +{ + struct sparse_hash_entry * target_entry, * new_map_entry; + struct sync_io_job job; + char * cow = NULL; + u64 chunk; + u32 hash_value = 0; + u32 remainder; + int rc; + + down_write(&sparse_volume->sparse_semaphore); + + remainder = *sector & (u64)(sparse_volume->chunk_size - 1); + chunk = *sector >> sparse_volume->chunk_shift; + hash_value = ((u32)chunk) % sparse_volume->hash_table_size; + + if ( search_sparse_hash_chain( chunk, + sparse_volume->sparse_map[hash_value], + &target_entry) ) { + *sector = + ( target_entry->sparse_chunk << sparse_volume->chunk_shift ) + + remainder; + rc = 0; + goto out; + } + + // Is there enough room left on this sparse to remap this chunk? + if ( sparse_volume->next_free_chunk >= sparse_volume->num_chunks ) { + DMERR("dm-sparse: full no new remaps allowed\n"); + rc = -ENOSPC; + goto out; + } + + // Create and initialize a new hash table entry for the new remap. + new_map_entry = allocate_sparse_hash_entry + (sparse_volume, chunk, sparse_volume->next_free_chunk); + if ( ! new_map_entry ) { + // Can't get memory for map entry. Disable this sparse. + DMERR("dm-sparse: memory error allocating hash entry\n"); + rc = -ENOMEM; + goto out; + } + + //Always write cow table so its safe + cow = kmalloc( SECTOR_SIZE, GFP_KERNEL ); + if (! cow ) { + // Can't get I/O buffer. Disable this sparse. + DMERR("dm-sparse: memory error allocating COW table buffer"); + rc = -ENOMEM; + goto out; + } + + // Add the entry to the hash table. + if ( target_entry ) { + insert_sparse_hash_entry( new_map_entry, target_entry ); + } + else { + insert_sparse_hash_entry_at_head + ( new_map_entry, + &(sparse_volume->sparse_map[hash_value]) ); + } + + sparse_volume->next_free_chunk++; + + // Update the appropriate entry in the COW table. + sparse_volume->cow_table[sparse_volume->next_cow_entry] = + cpu_to_le64(chunk); + sparse_volume->next_cow_entry++; + + memcpy(cow, sparse_volume->cow_table, SECTOR_SIZE); + + //because of ordering issues needs to be synchronous + job.handle = sparse_volume->syncio_handle; + job.dev = sparse_volume->dev->dev; + job.rw = 1; + job.start_lsn = sparse_volume->current_cow_sector; + job.num_lsns = 1; + job.data = (void*)cow; + sync_io(&job); + + // Update the in-memory COW table values. + if ( sparse_volume->next_cow_entry >= (SECTOR_SIZE/sizeof(u64)) ) + { + sparse_volume->next_cow_entry = 0; + sparse_volume->current_cow_sector++; + memset(sparse_volume->cow_table, 0xff, SECTOR_SIZE); + } + + *sector = ( new_map_entry->sparse_chunk << sparse_volume->chunk_shift ) + + remainder; + + rc = 0; + + out: + up_write(&sparse_volume->sparse_semaphore); + if ( cow ) { + kfree( cow ); + } + + return rc; +} + +/************************ EXPORT FUNCTIONS ************************/ + +/* + * Function: sparse_dtr + */ +static void sparse_dtr( struct dm_target *ti ) +{ + struct sparse_volume * vol = (struct sparse_volume *)ti->private; + int i; + + if (vol) { + + if (vol->sparse_map) { + for ( i = 0; i < vol->hash_table_size; i++ ) { + delete_sparse_hash_chain( vol, vol->sparse_map[i] ); + } + delete_sparse_hash_chain( vol, vol->free_hash_list ); + vfree(vol->sparse_map); + } + + if (vol->hash_pool) + mempool_destroy(vol->hash_pool); + + if (vol->hash_slab) + kmem_cache_destroy(vol->hash_slab); + + dm_put_device(ti, vol->dev); + + if (vol->syncio_handle) + sync_io_cleanup(vol->syncio_handle); + + kfree( vol ); + } +} + +/* + * Function: sparse_ctr + */ +static int sparse_ctr( struct dm_target *ti, int argc, char** argv ) +{ + int i, rc = -EINVAL; + struct sparse_hash_entry *new_entry; + struct sparse_volume *vol; + struct dm_dev *dev; + u32 chunk_size, chunks; + u64 start; + char* end, slab_name[NAME_SIZE+1]; + + if ( argc != 4 ) { + ti->error="dm-sparse: wrong number of arguments"; + return rc; + } + + start = simple_strtoull(argv[1], &end, 10); + if (*end) { + ti->error="dm-sparse: Invalid first chunk lba"; + return rc; + } + + chunk_size = simple_strtoul(argv[2], &end, 10); + if (*end) { + ti->error="dm-sparse: Invalid chunk_size"; + return rc; + } + + chunks = simple_strtoul(argv[3], &end, 10); + if (*end) { + ti->error="dm-sparse: Invalid number of chunks"; + return rc; + } + + DMWARN("dm-sparse: %llu %u %u\n", start, chunk_size, chunks ); + + if ( dm_get_device( ti, argv[0], ti->begin, start + chunks * chunk_size, + dm_table_get_mode(ti->table), &dev ) ) { + ti->error = "dm-sparse: Device lookup failed"; + return rc; + } + + rc = -ENOMEM; + + vol = kmalloc(sizeof(struct sparse_volume), GFP_KERNEL); + if ( !vol ) { + ti->error = "dm-sparse: Memory allocation for private-data failed"; + goto out; + } + + memset( vol, 0, sizeof(struct sparse_volume) ); + + strcpy(slab_name, "sparse-syncio-"); + strcat(slab_name, ti->type->name); + if (sync_io_setup(&vol->syncio_handle, slab_name)) { + ti->error = "dm-sparse: failed to initialize syncio"; + sparse_dtr(ti); + return rc; + } + + // Initialize + vol->chunk_size = chunk_size; + vol->chunk_shift = log2(chunk_size); + vol->num_chunks = chunks; + vol->current_cow_sector = 1; + vol->hash_table_size = chunks / MAX_HASH_CHAIN_ENTRIES + 1; + vol->start = start; + vol->dev = dev; + init_rwsem(&vol->sparse_semaphore); + + strcpy(slab_name, "sparse-"); + strcat(slab_name, ti->type->name); + vol->hash_slab = kmem_cache_create(slab_name, + sizeof(struct sparse_hash_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if ( ! vol->hash_slab ) { + ti->error = "dm-sparse: memory allocation error in hash slab create"; + sparse_dtr(ti); + return rc; + } + vol->hash_pool = mempool_create(1, mempool_alloc_slab, + mempool_free_slab, + vol->hash_slab); + if ( ! vol->hash_pool ) { + ti->error = "dm-sparse: memory allocation error in hash pool create"; + sparse_dtr(ti); + return rc; + } + + // Sparse hash table + vol->sparse_map = vmalloc( vol->hash_table_size * + sizeof( struct sparse_hash_entry * ) ); + if ( ! vol->sparse_map ) { + ti->error = "dm-sparse: Memory allocation error in sparse_map create"; + sparse_dtr(ti); + return rc; + } + + memset( vol->sparse_map, 0, vol->hash_table_size * + sizeof( struct sparse_hash_entry * ) ); + + for ( i = 0; i < chunks; i++ ) { + + new_entry = mempool_alloc(vol->hash_pool, GFP_KERNEL ); + if ( ! new_entry ) { + ti->error="dm-sparse: memory allocation error in hash table setup"; + sparse_dtr(ti); + return rc; + } + + new_entry->next = vol->free_hash_list; + vol->free_hash_list = new_entry; + } + + rc = build_sparse_maps(vol); + if (rc) { + ti->error = "dm-sparse: error building hash tables"; + sparse_dtr(ti); + return rc; + } + + ti->private = vol; + return rc; + + out: + dm_put_device(ti, dev); + return rc; +} + +/* + * Function: sparse_map + */ +static int sparse_map( struct dm_target * ti, struct buffer_head * bh, int rw, + void **map_context ) +{ + struct sparse_volume * volume = (struct sparse_volume*)ti->private; + u64 sector = bh->b_rsector; + int rc; + + + + // Check if this sector has been remapped + rc = sparse_remap_chunk( volume, §or ); + + if ( rc < 0 ) { //Error + bh->b_end_io(bh, 0); + return rc; + } + + if ( rc == 0 ) { // Remapped I/O : read or write same logic + bh->b_rsector = volume->start + sector; + bh->b_rdev = volume->dev->dev; + return 1; + } + + // ( Previously )Un-mapped: read / write different logic + + if ( rw ) { //write : + rc = sparse_cow_write( volume, §or ); + + if ( rc < 0 ) { //Error + bh->b_end_io(bh, 0); + return rc; + } + //Send write on + bh->b_rsector = volume->start + sector; + bh->b_rdev = volume->dev->dev; + return 1; + } + + //Reading something that was never written + //return zeros and indicate complete + memset(bh->b_data, 0x0, bh->b_size); + bh->b_end_io(bh, 1); + return 0; +} + +static int sparse_status( struct dm_target *ti, status_type_t type, + char *result, int maxlen ) +{ + struct sparse_volume * vol = (struct sparse_volume * )ti->private; + + switch(type) { + + case STATUSTYPE_INFO: + snprintf( result, maxlen, "%d%%", + ( vol->next_free_chunk * 100 ) / vol->num_chunks ); + break; + + case STATUSTYPE_TABLE: + snprintf( result, maxlen, "%s %Lu %u %u", + kdevname(to_kdev_t(vol->dev->bdev->bd_dev)), vol->start, + vol->chunk_size, vol->num_chunks ); + break; + + default: + break; + } + + return 0; +} + +/****************** FUNCTION TABLE **********************/ + +static struct target_type sparse_target = { + .name = "sparse", + .module = THIS_MODULE, + .ctr = sparse_ctr, + .dtr = sparse_dtr, + .map = sparse_map, + .status = sparse_status, +}; + +/********************* REGISTRATION *****************/ + +int __init sparse_init(void) +{ + int rc = dm_register_target(&sparse_target); + + if ( rc < 0 ) + DMWARN("sparse target registration failed"); + + return rc; +} + +void __exit sparse_exit(void) +{ + if (dm_unregister_target(&sparse_target) ) + DMWARN("sparse target unregistration failed"); + + return; +} + +module_init(sparse_init); +module_exit(sparse_exit); +MODULE_LICENSE("GPL");