diff -Naur linux-2.4.20-dm-10/drivers/md/Makefile linux-2.4.20-evms-2.0.1/drivers/md/Makefile --- linux-2.4.20-dm-10/drivers/md/Makefile 2003-04-28 11:59:00.000000000 -0500 +++ linux-2.4.20-evms-2.0.1/drivers/md/Makefile 2003-04-28 11:58:43.000000000 -0500 @@ -10,6 +10,7 @@ dm-mod-objs := dm.o dm-table.o dm-target.o dm-ioctl.o \ dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \ kcopyd.o +dm-io-objs := syncio.o # Note: link order is important. All raid personalities # and xor.o must come before md.o, as they each initialise @@ -32,3 +33,6 @@ dm-mod.o: $(dm-mod-objs) $(LD) -r -o $@ $(dm-mod-objs) + +dm-io.o: $(dm-io-objs) + $(LD) -r -o $@ $(dm-io-objs) diff -Naur linux-2.4.20-dm-10/drivers/md/syncio.c linux-2.4.20-evms-2.0.1/drivers/md/syncio.c --- linux-2.4.20-dm-10/drivers/md/syncio.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.4.20-evms-2.0.1/drivers/md/syncio.c 2003-04-28 11:58:29.000000000 -0500 @@ -0,0 +1,398 @@ +/* + * Copyright (c) International Business Machines Corp., 2002 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * linux/drivers/md/syncio.c + * + * Provides synchronous I/O support + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dm.h" +#include "syncio.h" + +static LIST_HEAD(syncio_instances); + +/** + * struct sync_io_cb + * @rc Return code + * @count Number of requests + * @wait Wait queue + **/ +struct sync_io_cb { + int rc; + atomic_t count; + wait_queue_head_t wait; + struct sync_io_handle *handle; +}; + +static struct buffer_head * __allocate_bh(struct sync_io_handle *handle, int can_wait) +{ + struct buffer_head *bh; + + while (1) { + bh = kmem_cache_alloc(handle->cachep, SLAB_NOIO); + if (bh || !can_wait) + break; + else { + /* block and wait for an bh */ + atomic_inc(&handle->waiters); + wait_event(handle->cache_wait_queue, (!atomic_read(&handle->waiters))); + } + } + if (bh) { + memset(bh, 0, sizeof(*bh)); + init_waitqueue_head(&bh->b_wait); + } + return bh; +} + +static void __deallocate_bh(struct sync_io_handle *handle, struct buffer_head *bh) +{ + kmem_cache_free(handle->cachep, bh); + atomic_set(&handle->waiters, 0); + if (waitqueue_active(&handle->cache_wait_queue)) { + wake_up(&handle->cache_wait_queue); + } +} + +static void __end_sync_io(struct buffer_head *bh, int uptodate) +{ + struct sync_io_cb *cb = (struct sync_io_cb *) bh->b_private; + + if (!uptodate) + cb->rc = -EIO; + mark_buffer_uptodate(bh, uptodate); + unlock_buffer(bh); + + __deallocate_bh(cb->handle, bh); + if (atomic_dec_and_test(&cb->count)) + if (waitqueue_active(&cb->wait)) + wake_up(&cb->wait); +} + +static void __wait_on_sync_io(struct sync_io_cb * cb) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + add_wait_queue(&cb->wait, &wait); + do { + run_task_queue(&tq_disk); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (!atomic_read(&cb->count)) + break; + schedule(); + } while (atomic_read(&cb->count)); + tsk->state = TASK_RUNNING; + remove_wait_queue(&cb->wait, &wait); +} + +/* + * Function: __partial_sector_sync_io + * + * This function is a support function for sync_io, + * which handles the cases of performing I/O to only a part + * of non-standard sized hardsector. This function is not + * designed to be called directly, but via sync_io + * + */ +static int +__partial_sector_sync_io(struct sync_io_job *partial_job, + u64 sector_lsn, + struct sync_io_cb *cb, + unsigned char **sector_buf) +{ + int rc = 0; + int hardsector_size = get_hardsect_size(partial_job->dev); + struct buffer_head *bh; + + if (*sector_buf == NULL) { + /* The caller did not supply a buffer, + * allocate buffer for incoming sector + */ + *sector_buf = kmalloc(hardsector_size, GFP_KERNEL); + if (!*sector_buf) + return -ENOMEM; + } + + bh = __allocate_bh(partial_job->handle, 1); + + bh->b_end_io = __end_sync_io; + bh->b_size = hardsector_size; + bh->b_rdev = partial_job->dev; + bh->b_rsector = partial_job->start_lsn - sector_lsn; + bh->b_data = *sector_buf; + bh->b_page = virt_to_page(*sector_buf); + bh->b_state = 0; + set_bit(BH_Dirty, &bh->b_state); + set_bit(BH_Lock, &bh->b_state); + set_bit(BH_Req, &bh->b_state); + set_bit(BH_Mapped, &bh->b_state); + bh->b_private = (void *)cb; + atomic_inc(&cb->count); + + generic_make_request(READ, bh); + + __wait_on_sync_io(cb); + + /* copy data to/from caller */ + if (partial_job->rw != WRITE) + /* READ */ + memcpy(partial_job->data, + *sector_buf + (sector_lsn << SECTOR_SHIFT), + partial_job->num_lsns << SECTOR_SHIFT); + else { + /* WRITE */ + memcpy(*sector_buf + (sector_lsn << SECTOR_SHIFT), + partial_job->data, partial_job->num_lsns << SECTOR_SHIFT); + + /* allocate a buffer head from the pool */ + bh = __allocate_bh(partial_job->handle, 1); + + /* set up the buffer head for this sector */ + bh->b_end_io = __end_sync_io; + bh->b_size = hardsector_size; + bh->b_rdev = partial_job->dev; + bh->b_rsector = partial_job->start_lsn - sector_lsn; + bh->b_data = *sector_buf; + bh->b_page = virt_to_page(*sector_buf); + bh->b_state = 0; + set_bit(BH_Dirty, &bh->b_state); + set_bit(BH_Lock, &bh->b_state); + set_bit(BH_Req, &bh->b_state); + set_bit(BH_Mapped, &bh->b_state); + bh->b_private = (void *)cb; + atomic_inc(&cb->count); + + generic_make_request(WRITE, bh); + + __wait_on_sync_io(cb); + } + return (rc); +} + +int __verify_sync_io_handle(struct sync_io_handle *handle) +{ + struct list_head *tmp; + struct sync_io_handle * entry; + + if (list_empty(&syncio_instances)) { + return -EINVAL; + } + + for (tmp = syncio_instances.next; + entry = list_entry(tmp, struct sync_io_handle, syncio_list), + tmp = tmp->next, tmp->prev != &syncio_instances; + ) { + if (entry == handle) + return 0; + } + return -EINVAL; + +} + +int sync_io(struct sync_io_job * job) +{ + int rc = 0; + struct sync_io_cb cb; + struct sync_io_job partial_job; + int hardsector_size = get_hardsect_size(job->dev); + int blocksize = block_size(job->dev); + int lsns_per_hardsector, lsns_per_blocksize; + u64 next_lsn, remaining_lsns, sector_lsn; + unchar *sector_buf = NULL, *cur_bufptr; + + if (__verify_sync_io_handle(job->handle)) { + DMWARN("%s: Invalid handle", __FUNCTION__); + return -EINVAL; + } + + /* compute some per device info once up-front */ + lsns_per_hardsector = hardsector_size / SECTOR_SIZE; + lsns_per_blocksize = blocksize / SECTOR_SIZE; + + /* initialize the syncio control block */ + memset(&cb, 0, sizeof (cb)); + init_waitqueue_head(&cb.wait); + atomic_set(&cb.count, 0); + cb.handle = job->handle; + + /* only update the local copy of variables */ + cur_bufptr = job->data; + next_lsn = job->start_lsn; + remaining_lsns = job->num_lsns; + + /* check for a mid-sector starting offset + * + * if found, perform I/O on part of that + * sector + */ + sector_lsn = next_lsn & (lsns_per_hardsector - 1); + if (sector_lsn) { + u64 num_lsns; + + /* determine lsns in IO to this sector */ + num_lsns = lsns_per_hardsector - sector_lsn; + if (num_lsns > remaining_lsns) + num_lsns = remaining_lsns; + + /* perform the partial sector io */ + partial_job.handle = job->handle; + partial_job.dev = job->dev; + partial_job.rw = job->rw; + partial_job.start_lsn = next_lsn; + partial_job.num_lsns = num_lsns; + partial_job.data = cur_bufptr; + rc = __partial_sector_sync_io(&partial_job, sector_lsn, &cb, §or_buf); + if (!rc) { + /* update progress in local variables */ + cur_bufptr += num_lsns << SECTOR_SHIFT; + next_lsn += num_lsns; + remaining_lsns -= num_lsns; + } + } + + /* continue if no errors found */ + if (!rc) { + /* perform I/O on all the complete sectors + * in this request. + * + * loop until there are no more complete sectors + * to process. + */ + while (remaining_lsns >= lsns_per_hardsector) { + /* this inner loop attempts to drive as many + * bytes (in sector size multiples) down to + * the device as possible using the available + * buffer heads in the pool. + */ + while (remaining_lsns >= lsns_per_hardsector) { + struct buffer_head *bh; + int io_bytes; + + /* allocate a buffer head from the pool */ + bh = __allocate_bh(job->handle, 0); + if (bh == NULL) + break; + if ( (next_lsn & (lsns_per_blocksize - 1)) || + (remaining_lsns < lsns_per_blocksize)) + io_bytes = hardsector_size; + else + io_bytes = blocksize; + + /* set up the buffer head for this I/O */ + bh->b_end_io = __end_sync_io; + bh->b_size = io_bytes; + bh->b_data = cur_bufptr; + bh->b_rdev = job->dev; + bh->b_rsector = next_lsn; + bh->b_page = virt_to_page(cur_bufptr); + bh->b_state = 0; + set_bit(BH_Dirty, &bh->b_state); + set_bit(BH_Lock, &bh->b_state); + set_bit(BH_Req, &bh->b_state); + set_bit(BH_Mapped, &bh->b_state); + bh->b_private = (void *)&cb; + atomic_inc(&cb.count); + + generic_make_request(job->rw, bh); + + /* update progress in local variables */ + cur_bufptr += io_bytes; + next_lsn += io_bytes >> SECTOR_SHIFT; + remaining_lsns -= io_bytes >> SECTOR_SHIFT; + } + /* wait for all bh's I/O's to end */ + __wait_on_sync_io(&cb); + } + } + + /* continue if no errors found */ + if (!rc) + /* check for a mid-sector ending offset + * + * if found, perform I/O on part of that + * sector + */ + if (remaining_lsns) { + /* perform the partial sector io */ + partial_job.handle = job->handle; + partial_job.dev = job->dev; + partial_job.rw = job->rw; + partial_job.start_lsn = next_lsn; + partial_job.num_lsns = remaining_lsns; + partial_job.data = cur_bufptr; + rc = __partial_sector_sync_io(&partial_job, 0, &cb, §or_buf); + } + + /* free the sector buffer if it was allocated */ + if (sector_buf) + kfree(sector_buf); + + /* coalesce return codes */ + rc |= cb.rc; + if (rc) { + DMINFO("%s: FAILED dev(%d:%d) hardsect_size=%d rw=%d start_lsn=%lu num_lsns=%lu", + __FUNCTION__, MAJOR(job->dev), MINOR(job->dev), + hardsector_size, job->rw, + (unsigned long)(job->start_lsn), (unsigned long)(job->num_lsns)); + } + return (rc); +} + +int sync_io_setup(struct sync_io_handle **handle, u8 *name) +{ + + *handle = (struct sync_io_handle *)kmalloc(sizeof(struct sync_io_handle), GFP_KERNEL); + if (*handle == NULL) + return -ENOMEM; + memset(*handle, 0, sizeof(struct sync_io_handle)); + atomic_set(&(*handle)->waiters, 0); + init_waitqueue_head(&(*handle)->cache_wait_queue); + (*handle)->cachep = kmem_cache_create(name, sizeof(struct buffer_head), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + if ((*handle)->cachep == NULL) { + DMERR("cannot create %s SLAB cache", name); + kfree(*handle); + return -ENOMEM; + } + INIT_LIST_HEAD(&(*handle)->syncio_list); + list_add(&(*handle)->syncio_list, &syncio_instances); + return 0; +} + +void sync_io_cleanup (struct sync_io_handle *handle) +{ + if (atomic_read(&handle->waiters)) { + DMERR("Someone is still trying to allocate from SLAB cache"); + return; + } + kmem_cache_destroy(handle->cachep); + list_del(&(handle->syncio_list)); + kfree(handle); +} + +MODULE_LICENSE("GPL"); + diff -Naur linux-2.4.20-dm-10/drivers/md/syncio.h linux-2.4.20-evms-2.0.1/drivers/md/syncio.h --- linux-2.4.20-dm-10/drivers/md/syncio.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.4.20-evms-2.0.1/drivers/md/syncio.h 2003-04-28 11:58:29.000000000 -0500 @@ -0,0 +1,62 @@ +/* + * Copyright (c) International Business Machines Corp., 2002 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * linux/drivers/md/syncio.h + * + * Provides synchronous I/O support + */ + +#ifndef __SYNCIO_H_ +#define __SYNCIO_H_ + +#include +#include +#include + +/* + * The syncio component allows users to call sync_io_setup() to obtain a handle. + * Attached to the syncio handle is a private slab cache for IO's related structs. + * For 2.4.x kernel, buffer heads are allocated from this private slab cache. + * + * The user of syncio service can share a single syncio handle among its instances. + * For example, on the creation of the first BBR device, BBR can obtain a + * syncio handle and use it for all future BBR instances. Of course, the code + * handles the destruction of the last BBR instance should call sync_io_cleanup(). + */ + +struct sync_io_handle { + struct list_head syncio_list; + kmem_cache_t *cachep; + atomic_t waiters; + wait_queue_head_t cache_wait_queue; +}; + +struct sync_io_job { + struct sync_io_handle *handle; + kdev_t dev; + int rw; + u64 start_lsn; + u64 num_lsns; + void *data; +}; + + +extern int sync_io_setup(struct sync_io_handle **handle, u8 *name); +extern void sync_io_cleanup(struct sync_io_handle *handle); +extern int sync_io(struct sync_io_job *job); + +#endif