#include <SPAD/DEV_KRNL.H>
#include <TIME.H>
#include <STDLIB.H>
#include <VALUES.H>

#include "VFS.H"

extern IO_STUB FNODE_PAGE_RELEASE;

static void PAGE_WRITE(VMENTITY *e, PROC *proc, int trashing);
static int PAGE_SWAPOUT(VMENTITY *e);
static int PAGE_CHECKMAP(VMENTITY *e);
static WQ *FULFIL_FROM_BUFFER_CACHE(PAGEINRQ *pgin, int prefetch);
static WQ *FNODE_PAGE_LOCKDOWN(PAGE *p, int lock);

static void VFS_READ_FNODE_SPAGE(PAGEINRQ *pgin, int prefetch);

static int page_vm_entity;

static __const__ VMENTITY_T page_calls = { PAGE_CHECKMAP, PAGE_WRITE, PAGE_SWAPOUT, "PAGE" };

static __finline__ unsigned PAGE_HASH(_u_off_t off, unsigned mask)
{
	return ((unsigned)off >> __PAGE_CLUSTER_BITS) & mask;
}

static PAGE *FIND_PAGE(_u_off_t pos, FNODE *f, PROC *proc)
{
	PAGE *p;
	unsigned h = PAGE_HASH(pos, f->u.h.hash_mask);
	pos &= ~(_u_off_t)__PAGE_CLUSTER_SIZE_MINUS_1;
	/*__debug_printf("find page(%LX) ...", pos);*/
	XLIST_FOR_EACH(p, &f->u.h.hash[h], PAGE, hash_entry) {
#if __DEBUG >= 1
		if (__unlikely(p->release != FNODE_PAGE_RELEASE))
			KERNEL$SUICIDE("FIND_PAGE: %s: FOUND A NON-FILESYSTEM PAGE (RELEASE %p)", VFS$FNODE_DESCRIPTION(f), p->release);
		if (__unlikely(p->fnode != f))
			KERNEL$SUICIDE("FIND_PAGE: %s: PAGE ON DIFFERENT FNODE %p", VFS$FNODE_DESCRIPTION(f), p->fnode);
#endif
		if (__likely(p->id == pos)) {
			/*__debug_printf("(%d,%d); (%d,%d)\n", p->valid_from, p->valid_to, p->dirty_from, p->dirty_to);*/
			CHECK_PAGE(p, 0);
			if (__likely(proc != NULL)) {
#if __DEBUG >= 1
				if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS)))
					KERNEL$SUICIDE("FIND_PAGE WITH NON-ZERO PROC AT SPL %08X", KERNEL$SPL);
#endif
				RAISE_SPL(SPL_CACHE);
				KERNEL$CACHE_TOUCH_VM_ENTITY(&p->e, proc);
				LOWER_SPL(SPL_FS);
			}
			return p;
		}
	}
	/*__debug_printf("no. ");*/
	return NULL;
}

static void PAGE_WRITE(VMENTITY *e, PROC *proc, int trashing)
{
	PAGE *p;
	LOWER_SPL(SPL_FS);
	p = LIST_STRUCT(e, PAGE, e);
	if (__likely(!p->dirty_to)) return;
	FNODE_WRITE(&((FNODE *)p->fnode)->e, proc, trashing);
}

static int PAGE_SWAPOUT(VMENTITY *e)
{
	int r;
	PAGE *p;
	FS *fs;
	LOWER_SPL(SPL_FS);
	p = LIST_STRUCT(e, PAGE, e);
	fs = ((FNODE *)p->fnode)->fs;
		/* fnodes can depend on pages, so hold page only if we can
		   hold fnodes */
	if (__unlikely(!fs->z.allocated) && no_need_to_free_fnodes(fs)) return 2;
	r = !!VFS$FREE_PAGE(p);
	LOWER_SPL(SPL_FS);
	return r;
}

static int PAGE_CHECKMAP(VMENTITY *e)
{
	PAGE *p;
	LOWER_SPL(SPL_FS);
	p = LIST_STRUCT(e, PAGE, e);
	return KERNEL$VM_SCAN_PAGE(p);
}

DECL_IOCALL(FNODE_PAGE_RELEASE, SPL_FS, PAGE_RELEASE_REQUEST)
{
	FNODE *f;
	PAGE *p1, *p2;
	void *v1, *v2;
	p1 = RQ->pg;
	/*__debug_printf(".");*/
	/*__debug_printf("fnode page release\n");*/
	if (__unlikely(p1->release != FNODE_PAGE_RELEASE)) call_again: RETURN_IORQ_LSTAT(RQ, KERNEL$WAKE_PAGE_RELEASE);
	if (p1->flags & PAGE_WANTFREE) {
		WQ_WAIT(&p1->wait, RQ, KERNEL$WAKE_PAGE_RELEASE);
		RETURN;
	}
	f = p1->fnode;
	if (__unlikely((KERNEL$VM_UNMAP_PAGE(p1)) != NULL)) {
		WQ *wq;
		LOWER_SPL(SPL_FS);
		if ((wq = VFS$FREE_PAGE(p1))) {
			WQ_WAIT(wq, RQ, KERNEL$WAKE_PAGE_RELEASE);
			RETURN;
		}
		goto call_again;
	}
	if (!(p2 = KERNEL$ALLOC_USER_PAGE(VM_TYPE_WIRED_UNMAPPED))) {
		if (KERNEL$OOM(VM_TYPE_WIRED_UNMAPPED)) {
			RQ->status = -ENOMEM;
			RETURN_AST(RQ);
		}
		LOWER_SPL(SPL_FS);
		WQ_WAIT(&KERNEL$FREEMEM_WAIT, RQ, KERNEL$WAKE_PAGE_RELEASE);
		RETURN;
	}
	DEL_FROM_LIST(&p1->node_entry);
	DEL_FROM_LIST(&p1->hash_entry);
	LOWER_SPL(SPL_FS);
	p2->id = p1->id;
	p2->release = FNODE_PAGE_RELEASE;
	p2->lockdown = FNODE_PAGE_LOCKDOWN;
	p2->fnode = f;
	p2->flags = p1->flags;
	WQ_WAKE_ALL(&p1->wait);
	WQ_INIT(&p2->wait, "VFS$PAGE_WAIT(rel)");
	CACHE_CONSTRUCT_VM_ENTITY(&p2->e);
	RAISE_SPL(SPL_CACHE);
	p2->e.type = page_vm_entity;
	KERNEL$CACHE_INSERT_VM_ENTITY(&p2->e, &KERNEL$PROC_KERNEL, 0);
	TEST_SPL(SPL_FS, SPL_CACHE);
	KERNEL$CACHE_TRANSFER_QUEUE_STATE(&p2->e, &p1->e);
	TEST_SPL(SPL_FS, SPL_CACHE);
	KERNEL$CACHE_REMOVE_VM_ENTITY(&p1->e);
	LOWER_SPL(SPL_FS);
	p2->valid_from = p1->valid_from;
	p2->valid_to = p1->valid_to;
	p2->dirty_from = p1->dirty_from;
	p2->dirty_to = p1->dirty_to;
	p2->io_to = p1->io_to;	/* not needed as busy pages can't be moved */
	p2->prefetch_hint = p1->prefetch_hint;
	v1 = KERNEL$MAP_PHYSICAL_PAGE(p1);
	v2 = KERNEL$MAP_PHYSICAL_PAGE(p2);
	memcpy(v2, v1, __PAGE_CLUSTER_SIZE);
	KERNEL$UNMAP_PHYSICAL_BANK(v1);
	KERNEL$UNMAP_PHYSICAL_BANK(v2);
	INIT_XLIST(&p2->mapping);
	RAISE_SPL(SPL_VSPACE);
	if (__unlikely(!(p2->flags & PAGE_WRITEABLE))) ADD_TO_LIST_END(&f->u.h.clean, &p2->node_entry);
	else ADD_TO_LIST_END(&f->u.h.dirty, &p2->node_entry);
	ADD_TO_XLIST(&f->u.h.hash[PAGE_HASH(p2->id, f->u.h.hash_mask)], &p2->hash_entry);
	LOWER_SPL(SPL_FS);
	KERNEL$PAGE_RELEASE(p1, VM_TYPE_WIRED_UNMAPPED);
	RQ->status = 0;
	RETURN_AST(RQ);
}

static void SETUP_FNODE_PAGE(FNODE *f, PAGE *p, _u_off_t pos, PROC *proc)
{
	/*__debug_printf("setup page(%LX).", pos);*/
	p->id = pos & ~(_u_off_t)__PAGE_CLUSTER_SIZE_MINUS_1;
	p->release = FNODE_PAGE_RELEASE;
	p->lockdown = FNODE_PAGE_LOCKDOWN;
	p->fnode = f;
	p->flags = 0;
	p->valid_from = __PAGE_CLUSTER_SIZE;
	p->valid_to = 0;
	p->dirty_from = __PAGE_CLUSTER_SIZE;
	p->dirty_to = 0;
	p->prefetch_hint = 0;
	WQ_INIT(&p->wait, "VFS$PAGE_WAIT");
	RAISE_SPL(SPL_VSPACE);
	ADD_TO_LIST_END(&f->u.h.clean, &p->node_entry);
	ADD_TO_XLIST(&f->u.h.hash[PAGE_HASH(pos, f->u.h.hash_mask)], &p->hash_entry);
	INIT_XLIST(&p->mapping);
	LOWER_SPL(SPL_FS);
	CACHE_CONSTRUCT_VM_ENTITY(&p->e);
	RAISE_SPL(SPL_CACHE);
	p->e.type = page_vm_entity;
	KERNEL$CACHE_INSERT_VM_ENTITY(&p->e, proc, 0);
	LOWER_SPL(SPL_FS);
}

#define BMAP_TEST(f, off)	(__likely((off) >= (f)->file_blk) && __likely((off) < (f)->file_blk + (f)->run_length))
#define BMAP_TEST_0(f)		(__likely(!(f)->file_blk) && __likely((f)->run_length != 0))

int VFS$BMAP_OK(FNODE *f, __d_off off)
{
	return BMAP_TEST(f, off);
}

static void zero_end(PAGE *p, unsigned z, unsigned t)
{
	char *ptr;
#if __DEBUG >= 1
	if (__unlikely(t > __PAGE_CLUSTER_SIZE) || __unlikely(z > t))
		KERNEL$SUICIDE("zero_end: INVALID PARAMS %X, %X", z, t);
#endif
	ptr = KERNEL$MAP_PHYSICAL_PAGE(p);
	memset(ptr + z, 0, t - z);
	KERNEL$UNMAP_PHYSICAL_BANK(ptr);
}

static void __NORET_ATTR__ io_uncommitted_suicide(PAGE *p, unsigned from);
static void __NORET_ATTR__ io_out_of_disk_suicide(PAGE *p, unsigned from);
static void __NORET_ATTR__ zero_left_sectors_suicide(PAGEINRQ *pgin);
static void __NORET_ATTR__ unaligned_left_sectors_suicide(PAGEINRQ *pgin, unsigned long left);

static void VFS_READ_FNODE_PAGE(PAGEINRQ *pgin, unsigned from)
{
	FNODE *f = pgin->fnode;
	PAGE *p;
#if __DEBUG >= 1
	if (__unlikely(from >= __PAGE_CLUSTER_SIZE))
		KERNEL$SUICIDE("VFS_READ_FNODE_PAGE: PAGE OFFSET %u", from);
#endif
	/*__debug_printf("read_fnode_page(%d); page(%Ld,%d-%d)\n", from, pgin->page->id, pgin->page->valid_from, pgin->page->valid_to);*/
	if (__unlikely(f->fs->write_error) && !f->fs->ignore_write_errors) {
		eio:
		pgin->caller->status = -EIO;
		CALL_AST(pgin->caller);
		free_ret:
		VFS$FREE_EMPTY_PAGEIN(pgin);
		return;
	}
	if (__unlikely(f->flags & FNODE_WRITELOCK) || __unlikely(f->dio_writers)) {
		if (__likely(!PAGEIN_IS_PREFETCH(pgin, pgin->caller))) {
			WQ_WAIT_F(&f->wait, pgin->caller);
			VFS$FREE_FNODE(f);
		}
		goto free_ret;
	}
	p = pgin->page;
	if (__unlikely(p->flags & (PAGE_WANTFREE | (PAGE_BUSY & ~PAGE_BUSY_1)))) {
		wq_page_ret:
		if (__likely(!PAGEIN_IS_PREFETCH(pgin, pgin->caller))) WQ_WAIT_F(&p->wait, pgin->caller);
		goto free_ret;
	}
	if (__unlikely(from >= p->valid_from) && __unlikely(from < p->valid_to)) goto ret;
	pgin->wr = 0;
	if (p->valid_to) {
		if (__unlikely(p->flags & PAGE_BUSY_1)) goto wq_page_ret;
		if (__unlikely(from < p->valid_from)) {
			pgin->vdesc.len = 0;
			from = p->valid_from - 1, pgin->wr = -1;
		} else if (__likely(from >= p->valid_to)) {
			if (__likely((long)pgin->vdesc.len >= 0)) pgin->vdesc.len += (from - p->valid_to) >> BIO_SECTOR_SIZE_BITS;
			else pgin->vdesc.len -= (from - p->valid_to) >> BIO_SECTOR_SIZE_BITS;
			from = p->valid_to, pgin->wr = 1;
		}
		if (__unlikely((p->id | from) >= f->size)) {
			unsigned z;
			clr:
			z = (unsigned long)f->size & __PAGE_CLUSTER_SIZE_MINUS_1;
			zero_end(p, z, __PAGE_CLUSTER_SIZE);
			RAISE_SPL(SPL_VSPACE);
			p->valid_to = __PAGE_CLUSTER_SIZE;
			if (p->valid_from > z) p->valid_from = z;
			LOWER_SPL(SPL_FS);
			ret:
			CALL_IORQ_LSTAT_EXPR(pgin->caller, (IO_STUB *)pgin->caller->tmp1);
			goto free_ret;
		}
	} else {
		if (__unlikely(p->flags & PAGE_BUSY_1)) {
			if (__unlikely(from < p->io_to)) goto wq_page_ret;
			from = p->io_to, pgin->wr = 1;
		}
		if (__unlikely((p->id | from) >= f->size)) if (__unlikely(!((from = (unsigned long)f->size & __PAGE_CLUSTER_SIZE_MINUS_1) & f->fs->pageio_mask))) goto clr;
	}
	from &= ~f->fs->pageio_mask;
	pgin->off = (p->id | from) >> BIO_SECTOR_SIZE_BITS;

	if (__unlikely(f->flags & (FNODE_UNCOMMITTED | FNODE_SRC_UNCOMMITTED | FNODE_KILLED))) {
		if (__likely(f->flags & FNODE_KILLED)) {
			goto eio;
		}
		io_uncommitted_suicide(p, from);
	}
	if (__unlikely(pgin->off << BIO_SECTOR_SIZE_BITS >= f->disk_size)) {
		io_out_of_disk_suicide(p, from);
	}
	pgin->status = 0;
	f->readers++;
	p->io_to = __PAGE_CLUSTER_SIZE;
	RAISE_SPL(SPL_VSPACE);
	p->flags += PAGE_BUSY_1;
	LOWER_SPL(SPL_FS);
	/*__debug_printf("off=%Ld, p->id=%Ld, from=%d, mask=%d\n", pgin->off, p->id, from, f->fs->pageio_mask);*/

	if (__likely(BMAP_TEST(f, pgin->off))) {
		/*bd:*/
		VFS$BMAP_DONE(pgin);
	} else {
		/* these two lines were just for testing, they present no
		speed-up (only icache pollution) because bmap is as fast as
		sync_bmap
		f->fs->fsops->sync_bmap(f, pgin->off, 1);
		if (__likely(BMAP_TEST(f, pgin->off))) goto bd;
		*/
		f->fs->fsops->bmap(pgin);
	}
}

static void DIRECT_SIO(SIORQ *rq, FNODE *f, int wr, _u_off_t pos);
static void DIRECT_AIO(AIORQ *rq, FNODE *f, int wr);

extern AST_STUB PAGES_READ;

void VFS$BMAP_DONE(PAGEINRQ *pgin)
{
	FNODE *f;
	PAGE *p;
	IORQ *caller;
	__sec_t blk;
	__d_off leftx;
	unsigned long sector_mask;
	unsigned long left;
	unsigned long l;
	unsigned o;
	_u_off_t sz;
	BRQ *next;
	WQ *a;
	/*__debug_printf("BMAP_DONE: %d\n", (int)pgin->status);*/
	if (__unlikely(pgin->status != 0)) {
		p = pgin->page;
		if (__unlikely(!p)) {
			if (__unlikely(pgin->status == 1)) {
				/* direct IO on file with holes ... turn off
				   direct IO for that file. It can only happen
				   on Ext2 and I don't care about Ext2
				   performance */
				HANDLE *h;
				caller = pgin->caller;
				if (caller->tmp1 == (unsigned long)KERNEL$WAKE_READ || caller->tmp1 == (unsigned long)KERNEL$WAKE_WRITE) h = ((SIORQ *)caller)->handle;
				else if (caller->tmp1 == (unsigned long)KERNEL$WAKE_AREAD || caller->tmp1 == (unsigned long)KERNEL$WAKE_AWRITE) h = ((AIORQ *)caller)->handle;
				else KERNEL$SUICIDE("VFS$BMAP_DONE: %s: INVALID DIRECT IO REQUEST (HOLE), FUNCTION %lX", VFS$FNODE_DESCRIPTION(pgin->fnode), caller->tmp1);
				if (__likely(h->op == &VFS_FILE_OPERATIONS)) h->flags2 &= ~CAP_DIRECT;
			}
			goto dio_error;
		}
		if (__likely(pgin->status == 1)) {
			unsigned so = ((unsigned long)pgin->off << BIO_SECTOR_SIZE_BITS) & __PAGE_CLUSTER_SIZE_MINUS_1;
			unsigned eo = so + pgin->fs->pageio_mask + 1;
			zero_end(p, so, eo);
			if (p->valid_from > so)
				p->valid_from = so;
			if (p->valid_to < eo) {
				p->valid_to = eo;
				if (__unlikely((p->id + p->valid_to) > ((FNODE *)p->fnode)->size)) {
					p->valid_to = (unsigned long)((FNODE *)p->fnode)->size - (unsigned long)p->id;
				}
			}
		}
		RAISE_SPL(SPL_VSPACE);
		if (__unlikely(!(p->flags & PAGE_BUSY)))
			KERNEL$SUICIDE("VFS$BMAP_DONE: %s: IO ON NON-BUSY PAGE %"__64_format"X, FLAGS %X", VFS$FNODE_DESCRIPTION(pgin->fnode), p->id, p->flags);
		p->flags -= PAGE_BUSY_1;
		WQ_WAKE_ALL(&p->wait);
		CHECK_PAGE(p, 0);
		LOWER_SPL(SPL_FS);
		dio_error:
		if (__likely(pgin->status == 1)) {
			caller = VFS$FREE_PAGEIN(pgin);
			CALL_IORQ_LSTAT_EXPR(caller, (IO_STUB *)caller->tmp1);
		} else {
			pgin->caller->status = pgin->status;
			caller = VFS$FREE_PAGEIN(pgin);
			CALL_AST(caller);
		}
		return;
	}
	f = pgin->fnode;
	if (__unlikely(!BMAP_TEST(f, pgin->off))) {
		f->fs->fsops->bmap(pgin);
		return;
	}
	if (__unlikely(!pgin->page)) {
		__d_off off;
		/* direct IO restart */
		off = pgin->off;
		caller = VFS$FREE_PAGEIN(pgin);
		if (__likely(caller->tmp1 == (unsigned long)KERNEL$WAKE_AREAD)) DIRECT_AIO((AIORQ *)caller, f, 0);
		else if (__likely(caller->tmp1 == (unsigned long)KERNEL$WAKE_AWRITE)) DIRECT_AIO((AIORQ *)caller, f, 1);
		else if (__likely(caller->tmp1 == (unsigned long)KERNEL$WAKE_READ)) DIRECT_SIO((SIORQ *)caller, f, 0, (_u_off_t)off << BIO_SECTOR_SIZE_BITS);
		else if (__likely(caller->tmp1 == (unsigned long)KERNEL$WAKE_WRITE)) DIRECT_SIO((SIORQ *)caller, f, 1, (_u_off_t)off << BIO_SECTOR_SIZE_BITS);
		else KERNEL$SUICIDE("VFS$BMAP_DONE: %s: INVALID DIRECT IO REQUEST, FUNCTION %lX", VFS$FNODE_DESCRIPTION(f), caller->tmp1);
		return;
	}

	if (pgin->wr != 1 && __likely((long)pgin->vdesc.len >= 0)) {
		blk = pgin->off - f->file_blk;
		if (blk > ((unsigned long)pgin->off & __SECTORS_PER_PAGE_CLUSTER_MINUS_1))
			blk = (unsigned long)pgin->off & __SECTORS_PER_PAGE_CLUSTER_MINUS_1;
		pgin->off -= blk; 
		pgin->vdesc.len += blk;
	}

	blk = f->disk_blk + (pgin->off - f->file_blk);
	leftx = f->run_length - (pgin->off - f->file_blk);
	if (__unlikely(leftx > MAXULONG)) left = MAXULONG & ~((unsigned long)pgin->fs->pageio_mask >> BIO_SECTOR_SIZE_BITS);
	else left = leftx;

	sector_mask = pgin->fs->pageio_mask >> BIO_SECTOR_SIZE_BITS;
	if (__unlikely(left & sector_mask))
		unaligned_left_sectors_suicide(pgin, left);

	sz = __unlikely(f->disk_size > f->size) ? f->size : f->disk_size;
	if (__unlikely(pgin->off + left > (sz + BIO_SECTOR_SIZE - 1) >> BIO_SECTOR_SIZE_BITS))
		left = ((unsigned long)(((sz + BIO_SECTOR_SIZE - 1) >> BIO_SECTOR_SIZE_BITS) - pgin->off) + sector_mask) & ~sector_mask;

	o = (unsigned long)pgin->off & __SECTORS_PER_PAGE_CLUSTER_MINUS_1;

	/* pgin->vdesc.len ... 
		>= 0 ... advice
		< 0  ... exactly */
	if (__likely((long)pgin->vdesc.len >= 0)) {
		unsigned long p_sec = (pgin->vdesc.len + sector_mask) & ~sector_mask;
		if (left > pgin->fs->page_readahead && left > p_sec) {
			left = pgin->fs->page_readahead & ~sector_mask;
			if (__unlikely(left < p_sec)) {
				left = p_sec;
			}
			if (__unlikely(!left)) left = sector_mask + 1;
		}
	} else {
		unsigned long max_sec = (-pgin->vdesc.len + sector_mask) & ~sector_mask;
		if (__likely(left > max_sec)) {
			left = max_sec;
		}
	}
	if (__unlikely(left > (VFS_MAX_PAGE_READAHEAD >> BIO_SECTOR_SIZE_BITS)))
		left = (VFS_MAX_PAGE_READAHEAD >> BIO_SECTOR_SIZE_BITS);

	if (__unlikely(!left)) {
		zero_left_sectors_suicide(pgin);
	}

	l = left;

	p = pgin->page;
	if (__unlikely(pgin->wr == -1)) {
		if (o + l > (p->valid_from >> BIO_SECTOR_SIZE_BITS))
			l = (p->valid_from >> BIO_SECTOR_SIZE_BITS) - o;
		left = l;
	} else {
		if (o + l > __SECTORS_PER_PAGE_CLUSTER)
			l = __SECTORS_PER_PAGE_CLUSTER - o;
	}

	p->io_to = (o + l) << BIO_SECTOR_SIZE_BITS;

	/*__debug_printf("o=%Ld, l=%ld, valid_from=%d, left=%ld\n", pgin->off, l, p->valid_from, left);*/

	/*pgin->brq.rrq.h = ... in constructor */
	pgin->brq.rrq.fn = PAGES_READ;
	pgin->brq.rrq.sec = blk;
	pgin->brq.rrq.nsec = l;
	pgin->brq.rrq.flags = BIO_READ;
	pgin->brq.rrq.desc = &pgin->brq.rdesc;
	pgin->brq.rrq.proc = pgin->tag.proc;
	pgin->brq.rrq.fault_sec = -1;
	/*pgin->brq.rdesc.v.vspace ... in constructor */
	pgin->brq.rdesc.v.ptr = KERNEL$PAGE_2_PHYS(p) | (o << BIO_SECTOR_SIZE_BITS);
	pgin->brq.rdesc.v.len = l << BIO_SECTOR_SIZE_BITS;
	pgin->brq.rdesc.next = NULL;
	pgin->brq.next = NULL;
	next = &pgin->brq;
	pgin->wr = pgin->brq.rrq.nsec;

	a = FULFIL_FROM_BUFFER_CACHE(pgin, 0);
	if (__unlikely(!a)) {
		pgin->brq.rrq.status = 0;
		CALL_AST(&pgin->brq.rrq);
		goto do_prefetch;
	}
	if (__unlikely(a != (WQ *)1)) {
		RAISE_SPL(SPL_VSPACE);
		p->flags -= PAGE_BUSY_1;
		WQ_WAKE_ALL(&p->wait);
		CHECK_PAGE(p, 0);
		LOWER_SPL(SPL_FS);
		caller = VFS$FREE_PAGEIN(pgin);
		WQ_WAIT_F(a, caller);
		return;
	}

	while (left -= l) {
		BRQ *brq;
		PAGE *pp;
		_u_off_t pos;
		unsigned ws;
		pos = p->id + __PAGE_CLUSTER_SIZE;
		pp = FIND_PAGE(pos, f, NULL);
		if (__likely(!pp)) {
			if (__unlikely(KERNEL$MAY_ALLOC(pgin->tag.proc, __PAGE_CLUSTER_SIZE) != NULL)) break;
			brq = __slalloc(&pgin->fs->brq);
			if (__unlikely(!brq)) break;
			pp = PAGEZONE_ALLOC(&f->fs->z, KERNEL$ALLOC_IO_PAGE, 0);
			if (__unlikely(!pp)) {
				__slow_slfree(brq);
				break;
			}
			INIT_PAGE(pp, 0xa3);
			SETUP_FNODE_PAGE(f, pp, pos, pgin->tag.proc);
		} else {
			if (__unlikely((pp->flags & PAGE_BUSY) != 0) || (pp->valid_from < __PAGE_CLUSTER_SIZE && pp->valid_from > left << BIO_SECTOR_SIZE_BITS) || !pp->valid_from) break;
			brq = __slalloc(&pgin->fs->brq);
			if (__unlikely(!brq)) break;
		}
		p = pp;
		RAISE_SPL(SPL_VSPACE);
		p->flags |= PAGE_BUSY_1;
		LOWER_SPL(SPL_FS);
		ws = p->valid_from >> BIO_SECTOR_SIZE_BITS;
		if (ws > left) ws = left;
		p->io_to = ws << BIO_SECTOR_SIZE_BITS;
		next->next = brq;
		next->rdesc.next = &brq->rdesc;
		brq->rdesc.v.ptr = KERNEL$PAGE_2_PHYS(p);
		brq->rdesc.v.len = ws << BIO_SECTOR_SIZE_BITS;
		brq->rdesc.next = NULL;
		brq->next = NULL;
		pgin->brq.rrq.nsec += ws;
		pgin->wr += ws;
		next = brq;
		l = ws;
		if (l != __SECTORS_PER_PAGE_CLUSTER) break;
	}

	/*__debug_printf(" bio: %Ld,%d ", pgin->brq.rrq.sec, pgin->brq.rrq.nsec);*/
	CALL_IORQ_CANCELABLE(&pgin->brq.rrq, KERNEL$BIO, pgin->caller);
	do_prefetch:
	if (!PAGEIN_IS_PREFETCH(pgin, pgin->caller)) {
		if (__likely((long)pgin->vdesc.len >= 0))
			VFS$PREFETCH_FILE(p->id + p->io_to, f, pgin->tag.proc);
		return;
	} else {
		pgin->page->prefetch_hint = (unsigned long)p->id - (unsigned long)pgin->page->id + p->io_to;
		return;
	}
}

DECL_AST(PAGES_READ, SPL_FS, BIORQ)
{
	BRQ *brq;
	PAGE *p;
	PAGEINRQ *pgin = GET_STRUCT(RQ, PAGEINRQ, brq.rrq);
	unsigned pos, nsec;
	IO_DISABLE_CHAIN_CANCEL(SPL_FS, pgin->caller);
	/*__debug_printf(" biodone ");*/
	brq = pgin->brq.next;
	while (brq) {
		BRQ *new = brq->next;
		__slfree(brq);
		brq = new;
	}
	p = pgin->page;
	pos = (unsigned long)pgin->off & __SECTORS_PER_PAGE_CLUSTER_MINUS_1;
	nsec = pgin->wr;
	do nextpg: {
		unsigned es = pos + nsec;
		if (es > __SECTORS_PER_PAGE_CLUSTER) es = __SECTORS_PER_PAGE_CLUSTER;
		RAISE_SPL(SPL_VSPACE);
		if (__likely(pgin->brq.rrq.status >= 0)) {
			if (p->valid_from > pos << BIO_SECTOR_SIZE_BITS)
				p->valid_from = pos << BIO_SECTOR_SIZE_BITS;
			if (p->valid_to < es << BIO_SECTOR_SIZE_BITS) {
				p->valid_to = es << BIO_SECTOR_SIZE_BITS;
				if (p->valid_to == p->io_to) p->io_to = __PAGE_CLUSTER_SIZE;
				if (__unlikely((p->id + p->valid_to) > ((FNODE *)p->fnode)->size)) {
					p->valid_to = (unsigned long)((FNODE *)p->fnode)->size - (unsigned long)p->id;
				}
			}
		}
		if (__unlikely(!(p->flags & PAGE_BUSY)))
			KERNEL$SUICIDE("PAGES_READ: %s: IO ON NON-BUSY PAGE %"__64_format"X, FLAGS %X", VFS$FNODE_DESCRIPTION(pgin->fnode), p->id, p->flags);
		p->flags -= PAGE_BUSY_1;
		WQ_WAKE_ALL(&p->wait);
		CHECK_PAGE(p, 0);
		LOWER_SPL(SPL_FS);
		if ((nsec -= es - pos)) {
			pos = 0;
			p = FIND_PAGE(p->id + __PAGE_CLUSTER_SIZE, pgin->fnode, NULL);
			if (__unlikely(!p))
				KERNEL$SUICIDE("PAGES_READ: %s: CAN'T FIND PAGE %"__64_format"X", VFS$FNODE_DESCRIPTION(pgin->fnode), (__u64)p->id + __PAGE_CLUSTER_SIZE);
			goto nextpg;
		}
	} while (0);
	if (__unlikely(pgin->brq.rrq.status < 0)) {
		IORQ *caller;
		pgin->caller->status = pgin->brq.rrq.status;
		caller = VFS$FREE_PAGEIN(pgin);
		/*__debug_printf(" fail: %d ", (int)caller->status);*/
		RETURN_AST(caller);
	} else {
		IORQ *caller;
		caller = VFS$FREE_PAGEIN(pgin);
		/*__debug_printf(" success ");*/
		RETURN_IORQ_LSTAT(caller, (IO_STUB *)caller->tmp1);
	}
}

static void REHASH_FNODE(FNODE *f)
{
	unsigned i;
	XLIST_HEAD *new_hash, *old_hash;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS)))
		KERNEL$SUICIDE("REHASH_FNODE AT SPL %08X", KERNEL$SPL);
	if (__unlikely(f->u.h.hash_mask != SMALL_HASH_MASK))
		KERNEL$SUICIDE("REHASH_FNODE: %s: INVALID HASH MASK %X, FLAGS %X", VFS$FNODE_DESCRIPTION(f), f->u.h.hash_mask, f->flags);
#endif
	if (!(new_hash = VFS_ALLOC_BIGHASH(f->fs))) return;
	for (i = 0; i < SMALL_HASH_SIZE; i++) {
		while (!(XLIST_EMPTY(&f->u.h.hash[i]))) {
			unsigned h;
			PAGE *p = LIST_STRUCT(f->u.h.hash[i].next, PAGE, hash_entry);
			RAISE_SPL(SPL_VSPACE);
			h = PAGE_HASH(p->id, BIG_HASH_MASK);
			DEL_FROM_LIST(&p->hash_entry);
			ADD_TO_XLIST(&new_hash[h], &p->hash_entry);
			LOWER_SPL(SPL_FS);
		}
	}
	RAISE_SPL(SPL_VSPACE);
	old_hash = f->u.h.hash;
	f->u.h.hash = new_hash;
	f->u.h.hash_mask = BIG_HASH_MASK;
	LOWER_SPL(SPL_FS);
	__slfree(old_hash);
	/*__debug_printf("done big hash\n");*/
}

#define EXTEND_HASH(new_size, proc)					\
{									\
	if (__DEBUG >= 1 && __unlikely(!(f->flags & FNODE_HASH)))	\
		KERNEL$SUICIDE("EXTEND_HASH: %s: EXTENDING HASH FOR NON-HASHED FNODE, FLAGS %X", VFS$FNODE_DESCRIPTION(f), f->flags);			\
	if (__unlikely((((unsigned)(new_size) & -BIG_HASH_THRESHOLD) | (unsigned)(new_size >> 31 >> 1)) & (((f->u.h.hash_mask / SMALL_HASH_SIZE) & 1) - 1))) {\
		REHASH_FNODE(f);					\
	}								\
}

static WQ *SET_HASH_(FNODE *f, PROC *proc)
{
	XLIST_HEAD *new_hash;
	FS *fs = f->fs;
	PAGE *p;
#if __DEBUG >= 1
	if (__unlikely((f->flags & (FNODE_DIRECTORY | FNODE_FILE)) != FNODE_FILE)) {
		KERNEL$SUICIDE("SET_HASH_: %s: SETTING HASH FOR INVALID FNODE, FLAGS %X", VFS$FNODE_DESCRIPTION(f), f->flags);
	}
#endif
	if (__likely(f->size <= BIG_HASH_THRESHOLD)) {
		try_small:
		if (__unlikely(!(new_hash = __slalloc(&fs->small_hash))))
			return (void *)1;
		init_small_hash(new_hash);
	} else {
		new_hash = VFS_ALLOC_BIGHASH(fs);
		if (__unlikely(!new_hash)) goto try_small;
	}
	p = NULL;
	if (__unlikely(f->flags & FNODE_SPAGE)) {
		void *v1, *v2;
		WQ *wq;
		if (__unlikely((wq = KERNEL$VM_UNMAP_SPAGE(&f->u.s.s)) != NULL)) {
			free_hash(fs, new_hash);
			return wq;
		}
		f->flags &= ~FNODE_SPAGE;
		LOWER_SPL(SPL_FS);
		p = PAGEZONE_ALLOC(&f->fs->z, KERNEL$ALLOC_IO_PAGE, 0);
		if (__unlikely(!p)) {
			RAISE_SPL(SPL_VSPACE);
			f->flags |= FNODE_SPAGE;
			LOWER_SPL(SPL_FS);
			free_hash(fs, new_hash);
			return (void *)1;
		}
		INIT_PAGE(p, 0xaa);
		v1 = KERNEL$MAP_PHYSICAL_PAGE(p);
		v2 = (char *)KERNEL$MAP_PHYSICAL_PAGE(f->u.s.s.page) + f->u.s.s.offset;
#if __DEBUG >= 1
		if (__unlikely(f->size > __PAGE_CLUSTER_SIZE - PG_SIZE))
			KERNEL$SUICIDE("SET_HASH_: %s: SPAGE FNODE HAS SIZE %"__64_format"X, FLAGS %X", VFS$FNODE_DESCRIPTION(f), (__u64)f->size, f->flags);
#endif
		memcpy(v1, v2, f->size);
		KERNEL$UNMAP_PHYSICAL_BANK(v1);
		KERNEL$UNMAP_PHYSICAL_BANK(v2);
		VFS_FREE_SPAGE(f);
	}
	f->u.h.hash = new_hash;
	f->u.h.hash_mask = IS_SMALL_HASH(new_hash) ? SMALL_HASH_MASK : BIG_HASH_MASK;
	INIT_LIST(&f->u.h.clean);
	INIT_LIST(&f->u.h.dirty);
	if (__unlikely(p != NULL)) {
		SETUP_FNODE_PAGE(f, p, 0, proc);
		p->valid_from = 0;
		p->valid_to = f->size;
	}
	RAISE_SPL(SPL_VSPACE);
	f->flags |= FNODE_HASH;
	LOWER_SPL(SPL_FS);
	return NULL;
}

#define SET_HASH(proc, fail_no_space, fail_wq)				\
{									\
	if (__unlikely(!(f->flags & FNODE_HASH))) {			\
		WQ *xwq = SET_HASH_(f, (proc));				\
		if (__unlikely(xwq != NULL)) {				\
			if (xwq == (void *)1) {fail_no_space;}		\
			else {fail_wq;}					\
		}							\
	}								\
}

static int DIRECT_IO_CAPABLE(_u_off_t pos, SIORQ *rq, FNODE *f)
{
	return __likely(pos < (f->size & ~(_u_off_t)(BIO_SECTOR_SIZE - 1))) & __likely(!(((unsigned)pos | (unsigned)rq->v.ptr) & (BIO_SECTOR_SIZE - 1))) & __likely(rq->v.len >= BIO_SECTOR_SIZE);
}

#define sio
#include "VFSFILEI.I"

#undef sio
#include "VFSFILEI.I"

void VFS$PREFETCH_FILE(_u_off_t pos, FNODE *f, PROC *proc)
{
	PAGEINRQ *pgin;
	PAGE *p;
	WQ *wq;
	unsigned ppos;
	if (__unlikely(pos >= f->size)) return;
	if (__unlikely(f->flags & FNODE_SPAGE)) return;
	SET_HASH(proc, return, LOWER_SPL(SPL_FS); return);
	p = FIND_PAGE(pos, f, NULL);
	if (__likely(!p)) {
		if (__unlikely(f->flags & FNODE_WANTFREE)) return;
		if (__unlikely(KERNEL$MAY_ALLOC(proc, __PAGE_CLUSTER_SIZE) != NULL)) return;
		p = PAGEZONE_ALLOC(&f->fs->z, KERNEL$ALLOC_IO_PAGE, 0);
		if (__unlikely(!p)) return;
		INIT_PAGE(p, 0xa9);
		SETUP_FNODE_PAGE(f, p, pos, proc);
	} else {
		if (__unlikely(p->flags & (PAGE_WANTFREE | (PAGE_BUSY & ~PAGE_BUSY_1)))) return;
		if (p->flags & PAGE_BUSY_1) {
			if (__unlikely(p->valid_to)) return;
			if (__unlikely(p->io_to == __PAGE_CLUSTER_SIZE)) return;
			pos = (pos & ~(_u_off_t)__PAGE_CLUSTER_SIZE_MINUS_1) | p->io_to;
			if (__unlikely(pos >= f->size)) return;
		}
	}
	ppos = (unsigned long)pos & __PAGE_CLUSTER_SIZE_MINUS_1;
	GET_PAGEINRQ(pgin, f->fs, proc, 1, 0, label, LOWER_SPL(SPL_FS); return;);
	pgin->page = p;
	pgin->fnode = f;
	pgin->vdesc.len = pgin->fs->page_readahead;
	VFS_READ_FNODE_PAGE(pgin, ppos);
}

static WQ *FULFIL_FROM_BUFFER_CACHE(PAGEINRQ *pgin, int prefetch)
{
	void *data, *ptr;
	BUFFER *buf;
	while (buf = NULL, data = VFS$GET_BUFFER(pgin->fs, pgin->brq.rrq.sec, (pgin->fs->pageio_mask >> BIO_SECTOR_SIZE_BITS) + 1 + GET_BUFFER_FILEDATA, &buf, pgin->brq.rrq.proc)) {
		unsigned n;
		if (__likely(prefetch)) {
			VFS$PREFETCH_BUFFER(pgin->fs, pgin->brq.rrq.sec + __SECTORS_PER_PAGE_CLUSTER, READ_BUFFER_FILEDATA, pgin->tag.proc);
			VFS$PREFETCH_BUFFER(pgin->fs, pgin->brq.rrq.sec + 2 * __SECTORS_PER_PAGE_CLUSTER, READ_BUFFER_FILEDATA, pgin->tag.proc);
		}
		n = __SECTORS_PER_PAGE_CLUSTER - ((unsigned)pgin->brq.rrq.sec & __SECTORS_PER_PAGE_CLUSTER_MINUS_1);
		if (n > pgin->brq.rrq.nsec) n = pgin->brq.rrq.nsec;
		if (__likely(buf->flags & B_PARTIAL)) {
			unsigned i, off = (unsigned)pgin->brq.rrq.sec & __SECTORS_PER_PAGE_CLUSTER_MINUS_1;
			for (i = 1; i < n; i++) if (__unlikely(!BUFFER_VALIDMAP_TEST(buf, i + off))) {
				n = i & ~(pgin->fs->pageio_mask >> BIO_SECTOR_SIZE_BITS);
#if __DEBUG >= 1
				if (__unlikely(!n))
					KERNEL$SUICIDE("FULFIL_FROM_BUFFER_CACHE: VFS$GET_BUFFER MISBEHAVED AND RETURNED INVALID BUFFER");
#endif
				break;
			}
		}
		ptr = KERNEL$MAP_PHYSICAL_BANK(pgin->brq.rdesc.v.ptr);
		memcpy(ptr, data, n << BIO_SECTOR_SIZE_BITS);
		KERNEL$UNMAP_PHYSICAL_BANK(ptr);
		VFS$PUT_BUFFER(data);
		/*__debug_printf("%s: got %d sectors\n", pgin->fnode->name, n);*/
		if (__likely(!(pgin->brq.rrq.nsec -= n))) return NULL;
		pgin->brq.rrq.sec += n;
		pgin->brq.rdesc.v.ptr += n << BIO_SECTOR_SIZE_BITS;
		pgin->brq.rdesc.v.len -= n << BIO_SECTOR_SIZE_BITS;
	}
	if (__unlikely(buf != NULL) && buf->flags & (B_BUSY | B_WRITING)) return &buf->wait;
	return (WQ *)1;
}

extern AST_STUB SPAGE_READ;

static void VFS_READ_FNODE_SPAGE(PAGEINRQ *pgin, int prefetch)
{
	BUFFER *buf;
	__d_off sec;
	unsigned boff;
	WQ *a;
	FNODE *f = pgin->fnode;
	f->readers++;
	pgin->brq.rdesc.v.ptr = KERNEL$PAGE_2_PHYS(f->u.s.s.page) | f->u.s.s.offset;
	pgin->brq.rdesc.v.len = ((unsigned long)f->size + pgin->fs->pageio_mask) & ~pgin->fs->pageio_mask;
	pgin->brq.buf = NULL;
	pgin->brq.rdesc.next = NULL;
	pgin->brq.rrq.fn = SPAGE_READ;
	pgin->brq.rrq.sec = f->disk_blk;
	pgin->brq.rrq.nsec = pgin->brq.rdesc.v.len >> BIO_SECTOR_SIZE_BITS;
	pgin->brq.rrq.flags = BIO_READ;
	pgin->brq.rrq.desc = &pgin->brq.rdesc;
	pgin->brq.rrq.proc = pgin->tag.proc;
	pgin->brq.rrq.fault_sec = -1;
	if (__unlikely(pgin->fs->write_error) && !pgin->fs->ignore_write_errors) {
		pgin->brq.rrq.status = -EIO;
		CALL_AST(&pgin->brq.rrq);
		return;
	}
	a = FULFIL_FROM_BUFFER_CACHE(pgin, 1);
	if (__likely(!a)) {
		pgin->brq.rrq.status = 0;
		CALL_AST(&pgin->brq.rrq);
		return;
	}
	if (__unlikely(a != (WQ *)1)) {
		IORQ *caller;
		RAISE_SPL(SPL_VSPACE);
		f->flags &= ~FNODE_SPAGE;
		LOWER_SPL(SPL_FS);
		VFS_FREE_SPAGE(f);
		caller = VFS$FREE_PAGEIN(pgin);
		WQ_WAIT_F(a, caller);
		return;
	}
	if (__unlikely(!prefetch)) goto no_prefetch;
	sec = pgin->brq.rrq.sec + pgin->brq.rrq.nsec;
	boff = (unsigned)sec & __SECTORS_PER_PAGE_CLUSTER_MINUS_1;
	if (__unlikely(sec + (__SECTORS_PER_PAGE_CLUSTER - boff) > pgin->fs->size)) goto no_prefetch;
	buf = CREATE_EMPTY_BUFFER(pgin->fs, sec, pgin->tag.proc);
	if (__unlikely(!buf) || __unlikely((unsigned long)buf & 1)) goto no_prefetch;
	pgin->brq.buf = buf;
	pgin->brq.rdesc.next = &buf->brq.rdesc;
	buf->brq.rdesc.v.ptr = buf->data | (boff << BIO_SECTOR_SIZE_BITS);
	pgin->brq.rrq.nsec += __SECTORS_PER_PAGE_CLUSTER - boff;
	buf->brq.rdesc.v.len = (__SECTORS_PER_PAGE_CLUSTER - boff) << BIO_SECTOR_SIZE_BITS;
	buf->brq.rdesc.next = NULL;
	buf->read_from = boff;
	buf->read_to = __SECTORS_PER_PAGE_CLUSTER;
	buf->flags |= B_BUSY | B_PARTIAL | B_FILEDATA_CREATED;
	BUFFER_VALIDMAP_CLEAR(buf);
	no_prefetch:
	CALL_IORQ_CANCELABLE(&pgin->brq.rrq, KERNEL$BIO, pgin->caller);
}

DECL_AST(SPAGE_READ, SPL_FS, BIORQ)
{
	FNODE *f;
	BUFFER *buf;
	PAGEINRQ *pgin = GET_STRUCT(RQ, PAGEINRQ, brq.rrq);
	IO_DISABLE_CHAIN_CANCEL(SPL_FS, pgin->caller);
	if (__likely((buf = pgin->brq.buf) != NULL)) BUFFER_READ_PARTIAL(buf, pgin->brq.rrq.status, pgin->tag.proc);
	f = pgin->fnode;
	if (__unlikely(pgin->brq.rrq.status < 0)) {
		IORQ *caller;
		if (buf) {
			f->readers--;
			VFS_READ_FNODE_SPAGE(pgin, 0);
			RETURN;
		}
		RAISE_SPL(SPL_VSPACE);
		f->flags &= ~FNODE_SPAGE;
		LOWER_SPL(SPL_FS);
		VFS_FREE_SPAGE(f);
		pgin->caller->status = pgin->brq.rrq.status;
		caller = VFS$FREE_PAGEIN(pgin);
		RETURN_AST(caller);
	} else {
		IORQ *caller;
		char *v = (char *)KERNEL$MAP_PHYSICAL_PAGE(f->u.s.s.page) + f->u.s.s.offset + (unsigned long)f->size;
		memset(v, 0, (f->u.s.s.n_pages << PG_SIZE_BITS) - (unsigned long)f->size);
		KERNEL$UNMAP_PHYSICAL_BANK(v);
		RAISE_SPL(SPL_VSPACE);
		f->u.s.s.flags &= ~PAGE_BUSY;
		WQ_WAKE_ALL(&f->u.s.s.wait);
		LOWER_SPL(SPL_FS);
		caller = VFS$FREE_PAGEIN(pgin);
		RETURN_IORQ_LSTAT(caller, (IO_STUB *)caller->tmp1);
	}
}

extern AST_STUB FILE_BIO_DONE;

DECL_IOCALL(VFS_FILE_BIO, SPL_FS, BIORQ)
{
	HANDLE *h = RQ->handle;
	FNODE *f;
	BRQ *brq;
	if (__unlikely(h->op != &VFS_FILE_OPERATIONS)) RETURN_IORQ_LSTAT(RQ, KERNEL$WAKE_BIO);
	SWITCH_PROC_ACCOUNT_KERNEL_OPTIMIZE(RQ->proc, SPL_X(SPL_FS));
	if (__unlikely(!HAS_CAPABILITY(h, CAP_READ | CAP_WRITE | CAP_APPEND)))
		emu: RETURN_IORQ_LSTAT(RQ, KERNEL$BIO_EMU);
	f = h->fnode;
	if (__unlikely((f->flags & (FNODE_FILE | FNODE_SPAGE | FNODE_DIRTY | FNODE_UNCOMMITTED | FNODE_WRITELOCK)) != FNODE_FILE)) goto emu;
	if (__unlikely(f->fs->flags & FS_RO)) goto emu;
	/* similar code in DIO */
	RAISE_SPL(SPL_VSPACE);
	if (__unlikely(f->flags & FNODE_HASH)) {
		if (__unlikely(!LIST_EMPTY(&f->u.h.dirty))) goto emu;
		if (__likely(!LIST_EMPTY(&f->u.h.clean))) {
			if (__unlikely(RQ->flags & BIO_WRITE)) goto emu;
			if (__unlikely(f->pending_writes)) goto emu;
		} else {
			f->flags &= ~FNODE_HASH;
			free_hash(f->fs, f->u.h.hash);
			WQ_WAKE_ALL(&f->fs->freemem);
		}
	}
	if (__unlikely(f->size != f->disk_size)) goto emu;
	LOWER_SPL(SPL_FS);
	if (__likely(!(RQ->flags & BIO_FLUSH))) {
		if (__unlikely(RQ->flags & BIO_WRITE) && __unlikely(!(f->flags & FNODE_DIRTY))) {
			WQ *wq = VFS$MAY_DIRTY(RQ->handle->name_addrspace, f->fs);
			if (__unlikely(wq != NULL)) goto emu;
		}
		if (__unlikely((__usec_t)RQ->sec + ((unsigned long)RQ->nsec - 1) >= f->disk_size >> BIO_SECTOR_SIZE_BITS)) goto emu;
		if (__unlikely(!BMAP_TEST(f, RQ->sec))) {
			f->fs->fsops->sync_bmap(f, RQ->sec, 1);
			if (__unlikely(!BMAP_TEST(f, RQ->sec))) goto emu;
		}
		if (__unlikely((__usec_t)RQ->sec - f->file_blk + (unsigned long)RQ->nsec > f->run_length)) goto emu;
	} else {
		if (__unlikely(!LIST_EMPTY(&f->fs->dirty_buffers))) goto emu;
	}
	brq = __slalloc(&f->fs->brq);
	if (__unlikely(!brq)) goto emu;
	f->readers++;
	f->dio_writers += RQ->flags & BIO_WRITE;
	brq->rrq.fn = FILE_BIO_DONE;
	brq->write_sec = brq->rrq.sec = RQ->sec + (__likely(!(RQ->flags & BIO_FLUSH)) ? f->disk_blk - f->file_blk : 0);
	brq->rrq.nsec = RQ->nsec;
	brq->rrq.flags = RQ->flags;
	brq->rrq.desc = RQ->desc;
	brq->rrq.proc = RQ->proc;
	brq->rrq.fault_sec = -1;
	brq->buf = (void *)f;
	brq->next = (void *)RQ;
	RETURN_IORQ_CANCELABLE(&brq->rrq, KERNEL$BIO, RQ);
}

DECL_AST(FILE_BIO_DONE, SPL_FS, BIORQ)
{
	BRQ *brq = GET_STRUCT(RQ, BRQ, rrq);
	FNODE *f;
	BIORQ *caller = (void *)brq->next;
	IO_DISABLE_CHAIN_CANCEL(SPL_FS, caller);
	if (__unlikely((caller->status = brq->rrq.status) < 0)) {
		if (!(brq->rrq.flags & BIO_FLUSH) && brq->rrq.fault_sec != -1) caller->fault_sec = brq->rrq.fault_sec - brq->write_sec + caller->sec;
		else caller->fault_sec = -1;
	}
	f = (void *)brq->buf;
	if (__unlikely(brq->rrq.flags & BIO_WRITE)) {
		if (__likely(!--f->dio_writers)) WQ_WAKE_ALL(&f->wait);
		VFS$INVALIDATE_FILEBUFFERS(f->fs, brq->write_sec, caller->nsec);
		time(&f->mtime);
		VFS$SET_DIRTY(f);
	}
	if (__likely(!--f->readers)) WQ_WAKE_ALL(&f->wait);
	__slfree(brq);
	RETURN_AST(caller);
}

WQ *VFS_EXTEND_FILE(FNODE *f, _u_off_t off, PROC *proc)
{
	unsigned from, to;
	PAGE *p;
	char *v;
	if (__unlikely(off > f->fs->max_filesize)) return __ERR_PTR(-EFBIG);
	if (__unlikely(f->flags & (FNODE_WANTFREE | FNODE_WRITEPAGES))) return &f->wait;
	SET_HASH(proc, goto wait, return xwq);
	EXTEND_HASH(off, proc);
	off -= f->size;
	next_page:
	from = (unsigned long)f->size & __PAGE_CLUSTER_SIZE_MINUS_1;
	to = off > __PAGE_CLUSTER_SIZE - from ? __PAGE_CLUSTER_SIZE : (unsigned long)off + from;
	p = FIND_PAGE(f->size, f, proc);
	if (__unlikely(!p)) {
		WQ *wq;
		if (__unlikely((wq = KERNEL$MAY_ALLOC(proc, __PAGE_CLUSTER_SIZE)) != NULL))
			return wq;
		p = PAGEZONE_ALLOC(&f->fs->z, KERNEL$ALLOC_IO_PAGE, 0);
		if (__unlikely(!p)) wait: {
			FS *fs = f->fs;
			if (__likely(VFS$FREE_SOME_DATA(fs))) return (void *)2;
			return &fs->freemem;
		}
		INIT_PAGE(p, 0xa4);
		SETUP_FNODE_PAGE(f, p, f->size, proc);
	} else if (__unlikely(p->flags & (PAGE_BUSY | PAGE_WANTFREE))) return &p->wait;
	if (__likely(!p->valid_to)) {
		if (__likely(from & f->fs->pageio_mask))
			read_page: return (void *)3;
	} else {
		if (__unlikely(from > p->valid_to)) goto read_page;
	}
	if (__likely(!(p->flags & PAGE_WRITEABLE))) {
		WQ *wq;
		wq = VFS$MAY_DIRTY(proc, f->fs);
		if (__unlikely(wq != NULL)) return wq;
		wq = f->fs->fsops->account(f, ACCOUNT_PAGE, p);
		if (__unlikely(wq != NULL)) return wq;
	}
	v = KERNEL$MAP_PHYSICAL_PAGE(p);
	memset(v + from, 0, to - from);
	KERNEL$UNMAP_PHYSICAL_BANK(v);
	RAISE_SPL(SPL_VSPACE);
	if (__likely(!(p->flags & PAGE_WRITEABLE))) {
		p->flags |= PAGE_WRITEABLE;
		DEL_FROM_LIST(&p->node_entry);
		ADD_TO_LIST_END(&f->u.h.dirty, &p->node_entry);
	}
	if (__likely(from < p->valid_from)) p->valid_from = from;
	if (__likely(to > p->valid_to)) p->valid_to = to;
	if (__likely(from < p->dirty_from)) p->dirty_from = from;
	if (__likely(to > p->dirty_to)) p->dirty_to = to;
	CHECK_PAGE(p, 0);
	LOWER_SPL(SPL_FS);
	time(&f->mtime);
	VFS$SET_DIRTY(f);
	SWITCH_PROC_ACCOUNT(proc, SPL_X(SPL_FS));
	f->size += to - from;
	if (off -= to - from) {
		if (__unlikely(KERNEL$LOCKUP_LEVEL >= LOCKUP_LEVEL_ONE_PASS)) {
			if (__unlikely(proc == &KERNEL$PROC_KERNEL)) return &KERNEL$LOCKUP_EVENTS;
			else return __ERR_PTR(-EINTR);
		}
		goto next_page;
	}
	return NULL;
}

void VFS_DO_EXTEND_READ(FNODE *f, IOCTLRQ *rq)
{
	PAGEINRQ *pgin;
	unsigned roff;
	PAGE *p;
	WQ *wq;
	SET_HASH(rq->handle->name_addrspace, goto wait, WQ_WAIT_F(xwq, rq); LOWER_SPL(SPL_FS); return);
	p = FIND_PAGE(f->size, f, rq->handle->name_addrspace);
	if (__unlikely(!p))
		KERNEL$SUICIDE("DO_EXTEND_READ: %s: PAGE NOT FOUND BUT VFS_EXTEND_FILE RETURNED 3", VFS$FNODE_DESCRIPTION(f));
	roff = ((unsigned long)f->size & __PAGE_CLUSTER_SIZE_MINUS_1) & ~f->fs->pageio_mask;
	GET_PAGEINRQ(pgin, f->fs, rq->handle->name_addrspace, 0, 0, label, {
		WQ_WAIT_F(wq, rq);
		LOWER_SPL(SPL_FS);
		return;
	});
	pgin->caller = (IORQ *)rq;
	pgin->fnode = f;
	pgin->page = p;
	pgin->vdesc.len = -1;
	VFS_READ_FNODE_PAGE(pgin, roff);
	return;
	wait:
	WQ_WAIT_F(&f->fs->freemem, rq);
	VFS$FREE_SOME_DATA(f->fs);
}

PAGE *VFS_GET_PAGE(HANDLE *h, __v_off idx, int wr)
{
	int spl;
	PAGE *p;
	FNODE *f;
#if __DEBUG >= 1
	if (__unlikely(SPLX_BELOW(KERNEL$SPL, SPL_X(SPL_FS))))
		KERNEL$SUICIDE("VFS$GET_PAGE AT SPL %08X", KERNEL$SPL);
#endif
	f = h->fnode;
	CHECK_FNODE_HANDLE(f, h);
	/*__debug_printf("get page(%s,%Lx,%d).", f->name, idx, wr);*/
	if (__unlikely(wr & ~PF_RW)) return __ERR_PTR(-EINVAL);
	if (__likely(!(wr & PF_WRITE))) {
		if (__unlikely(!HAS_CAPABILITY(h, CAP_READ))) return __ERR_PTR(-EACCES);
	} else {
		if (__unlikely(!HAS_CAPABILITY(h, CAP_READ | CAP_WRITE))) return __ERR_PTR(-EACCES);
		if (__unlikely(f->fs->flags & FS_RO)) return __ERR_PTR(-EROFS);
	}
	if (__unlikely((f->flags & (FNODE_HASH | FNODE_WANTFREE | FNODE_WRITEPAGES)) != FNODE_HASH)) {
		if (__likely((f->flags & (FNODE_SPAGE | FNODE_WANTFREE | FNODE_WRITEPAGES)) == FNODE_SPAGE) && __likely(idx < __PAGE_CLUSTER_SIZE) && __likely(!(wr & PF_WRITE))) {
			if (__unlikely(f->u.s.s.flags & PAGE_BUSY)) goto rn;
			RAISE_SPL(SPL_CACHE);
			KERNEL$CACHE_TOUCH_VM_ENTITY(&f->e, h->name_addrspace);
			LOWER_SPL(SPL_FS);
			return (PAGE *)&f->u.s.s;
		}
		goto rn;
	}
	p = FIND_PAGE(idx, f, h->name_addrspace);
	if (__unlikely(!p)) {
		rn:
		if (__unlikely((idx & ~(__v_off)__PAGE_CLUSTER_SIZE_MINUS_1) >= f->size)) return __ERR_PTR(-ERANGE);
		return NULL;
	}
	if (__unlikely(p->prefetch_hint)) {
		unsigned ph = p->prefetch_hint;
		p->prefetch_hint = 0;
		VFS$PREFETCH_FILE(p->id + ph, f, h->name_addrspace);
	}
	if (__unlikely(p->valid_from > 0) || __unlikely(p->flags & (PAGE_BUSY | PAGE_WANTFREE))) return NULL;
	if (__unlikely(p->valid_to < __PAGE_CLUSTER_SIZE)) {
		if (__unlikely((p->id | p->valid_to) < f->size)) return NULL;
		zero_end(p, p->valid_to, __PAGE_CLUSTER_SIZE);
		RAISE_SPL(SPL_VSPACE);
		p->valid_to = __PAGE_CLUSTER_SIZE;
		LOWER_SPL(SPL_FS);
	}
	if (__unlikely(wr & PF_WRITE) && __unlikely(!(p->flags & PAGE_WRITEABLE))) {
		WQ *wq;
		wq = VFS$MAY_DIRTY(h->name_addrspace, f->fs);
		if (__unlikely(wq != NULL)) return NULL;
		wq = f->fs->fsops->account(f, ACCOUNT_PAGE, p);
		if (__unlikely(wq != NULL)) return NULL;
		spl = KERNEL$SPL;
		RAISE_SPL(SPL_VSPACE);
		p->flags |= PAGE_WRITEABLE;
		DEL_FROM_LIST(&p->node_entry);
		ADD_TO_LIST_END(&f->u.h.dirty, &p->node_entry);
		LOWER_SPLX(spl);
		VFS$SET_DIRTY(f);
	}
	return p;
}

extern IO_STUB CONT_PAGEIN_RQ;

IORQ *VFS_GET_PAGEIN_RQ(VDESC *desc, IORQ *rq, int wr)
{
	HANDLE *h = desc->vspace;
	FNODE *f = h->fnode;
	FS *fs;
	PAGEINRQ *pgin;
	WQ *wq;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_VSPACE)))
		KERNEL$SUICIDE("VFS$GET_PAGEIN_RQ AT SPL %08X", KERNEL$SPL);
#endif
	if (__unlikely(f->flags & (FNODE_WANTFREE | FNODE_WRITEPAGES))) {
		WQ_WAIT_F(&f->wait, rq);
		return NULL;
	}
	CHECK_FNODE_HANDLE(f, h);
	fs = f->fs;
	GET_PAGEINRQ(pgin, fs, h->name_addrspace, 0, 1, label, {
		WQ_WAIT_F(wq, rq);
		return NULL;
	});
	pgin->caller = rq;
	pgin->wr = wr;
	memcpy(&pgin->vdesc, desc, sizeof(VDESC));
	pgin->tmp1 = (unsigned long)CONT_PAGEIN_RQ;
	return (IORQ *)pgin;
}

DECL_IOCALL(CONT_PAGEIN_RQ, SPL_FS, PAGEINRQ)
{
#define pgin RQ
	HANDLE *h;
	FNODE *f;
	PAGE *p;
	IORQ *caller;
	retry:
	h = pgin->vdesc.vspace;
	if (__unlikely(h->op != &VFS_FILE_OPERATIONS)) goto xx;
	if (__unlikely(KERNEL$LOCKUP_LEVEL >= LOCKUP_LEVEL_ALL_IORQS)) {
		/* WQ_WAIT_F(&KERNEL$LOCKUP_EVENTS, pgin->caller);
		goto free_pagein; --- this would livelock */
		WQ_WAIT_F(&KERNEL$LOCKUP_EVENTS, RQ);
		RETURN;
	}
	SWITCH_PROC_ACCOUNT(h->name_addrspace, SPL_X(SPL_FS));
	f = h->fnode;
	CHECK_FNODE_HANDLE(f, h);
	pgin->fnode = f;
	if (__unlikely(pgin->vdesc.ptr >= f->size) || __unlikely(pgin->vdesc.ptr + pgin->vdesc.len > ((f->size + __PAGE_CLUSTER_SIZE_MINUS_1) & ~(_u_off_t)__PAGE_CLUSTER_SIZE_MINUS_1))) {
		pgin->caller->status = -ERANGE;
		err:
		caller = VFS$FREE_EMPTY_PAGEIN(pgin);
		RETURN_AST(caller);
	}
	if (__likely(!(pgin->wr & PF_WRITE))) {
		if (__unlikely(!HAS_CAPABILITY(h, CAP_READ))) {
			pgin->caller->status = -EACCES;
			goto err;
		}
	} else {
		if (__unlikely(!HAS_CAPABILITY(h, CAP_READ | CAP_WRITE))) {
			pgin->caller->status = -EACCES;
			goto err;
		}
		if (__unlikely(f->fs->flags & FS_RO)) {
			pgin->caller->status = -EROFS;
			goto err;
		}
	}
	if (!(f->flags & (FNODE_SPAGE | FNODE_HASH | FNODE_WRITELOCK)) && f->size <= __PAGE_CLUSTER_SIZE - PG_SIZE && __likely(!f->dio_writers) && __likely(!(pgin->wr & PF_WRITE))) {
		if (__unlikely(!(pgin->wr & PF_PAGEIO)) && __unlikely(pgin->vdesc.len > (unsigned long)f->size - (unsigned long)pgin->vdesc.ptr)) goto no_spage;
		if (__unlikely(!BMAP_TEST_0(f))) {
			f->fs->fsops->sync_bmap(f, 0, 1);
			if (__unlikely(!BMAP_TEST_0(f))) goto no_spage;
		}
		if (__unlikely((unsigned long)f->run_length << BIO_SECTOR_SIZE_BITS < (unsigned long)f->size)) goto no_spage;
		if (__unlikely((unsigned long)f->run_length & (f->fs->pageio_mask >> BIO_SECTOR_SIZE_BITS)))
			KERNEL$SUICIDE("CONT_PAGEIN_RQ: %s: UNALIGNED RUN LENGTH %"__64_format"X", VFS$FNODE_DESCRIPTION(f), (__u64)f->run_length);
		if (__unlikely(VFS_ALLOC_SPAGE(f, ((unsigned long)f->size + PG_SIZE - 1) >> PG_SIZE_BITS))) {
			goto wait;
		}
#if __DEBUG >= 1
		pgin->page = NULL;
#endif
		RAISE_SPL(SPL_VSPACE);
		f->flags |= FNODE_SPAGE;
		LOWER_SPL(SPL_FS);
		VFS_READ_FNODE_SPAGE(pgin, 1);
		RETURN;
	}
	no_spage:
	SET_HASH(pgin->tag.proc, goto wait, WQ_WAIT_F(xwq, pgin->caller); LOWER_SPL(SPL_FS); goto free_pagein);
	next_page:
	p = FIND_PAGE(pgin->vdesc.ptr, f, pgin->tag.proc);
	if (__likely(!p)) {
		WQ *wq;
		if (__unlikely((wq = KERNEL$MAY_ALLOC(pgin->tag.proc, __PAGE_CLUSTER_SIZE)) != NULL)) {
			WQ_WAIT_F(wq, pgin->caller);
			free_pagein:
			VFS$FREE_EMPTY_PAGEIN(pgin);
			RETURN;
		}
		p = PAGEZONE_ALLOC(&pgin->fs->z, KERNEL$ALLOC_IO_PAGE, 0);
		if (__unlikely(!p)) {
			wait:
			if (__likely(VFS$FREE_SOME_DATA(pgin->fs))) goto retry;
			WQ_WAIT_F(&pgin->fs->freemem, pgin->caller);
			goto free_pagein;
		}
		INIT_PAGE(p, 0xa5);
		SETUP_FNODE_PAGE(f, p, pgin->vdesc.ptr, pgin->tag.proc);
		read_page:
		pgin->page = p;
		pgin->vdesc.len = (((unsigned long)pgin->vdesc.ptr & pgin->fs->pageio_mask) + pgin->vdesc.len + BIO_SECTOR_SIZE - 1) >> BIO_SECTOR_SIZE_BITS;
		VFS_READ_FNODE_PAGE(pgin, (unsigned long)pgin->vdesc.ptr & __PAGE_CLUSTER_SIZE_MINUS_1);
		RETURN;
	} else {
		unsigned x;
		unsigned op;
		if (__unlikely(p->prefetch_hint)) {
			unsigned ph = p->prefetch_hint;
			p->prefetch_hint = 0;
			VFS$PREFETCH_FILE(p->id + ph, f, h->name_addrspace);
		}
		op = (unsigned long)pgin->vdesc.ptr & __PAGE_CLUSTER_SIZE_MINUS_1;
		if (__unlikely((p->flags & (PAGE_BUSY | PAGE_WANTFREE)))) {
			wop:
			WQ_WAIT_F(&p->wait, pgin->caller);
			goto free_pagein;
		}
		if (__unlikely((p->flags & PAGE_DMALOCKCOUNT) == PAGE_DMALOCKCOUNT)) goto wop;
		if (__unlikely(op < p->valid_from) || __unlikely(op >= p->valid_to)) goto read_page;

		if (pgin->wr & PF_WRITE && __unlikely(!(p->flags & PAGE_WRITEABLE))) {
			WQ *wq;
			wq = VFS$MAY_DIRTY(h->name_addrspace, f->fs);
			if (__unlikely(wq != NULL)) goto do_wait;
			wq = f->fs->fsops->account(f, ACCOUNT_PAGE, p);
			if (__unlikely(wq != NULL)) {
				if (__unlikely(__IS_ERR(wq))) {
					pgin->caller->status = __PTR_ERR(wq);
					goto err;
				}
				do_wait:
				WQ_WAIT(wq, pgin, CONT_PAGEIN_RQ);
				RETURN;
			}
			RAISE_SPL(SPL_VSPACE);
			p->flags |= PAGE_WRITEABLE;
			DEL_FROM_LIST(&p->node_entry);
			ADD_TO_LIST_END(&f->u.h.dirty, &p->node_entry);
			LOWER_SPL(SPL_FS);
			VFS$SET_DIRTY(f);
		}

		x = p->valid_to - op;
		if (x < pgin->vdesc.len && __likely(KERNEL$LOCKUP_LEVEL < LOCKUP_LEVEL_ONE_PASS)) {
			pgin->vdesc.ptr += x;
			pgin->vdesc.len -= x;
			SWITCH_PROC_ACCOUNT(h->name_addrspace, SPL_X(SPL_FS));
			goto next_page;
		}

		xx:
		caller = VFS$FREE_EMPTY_PAGEIN(pgin);
		RETURN_IORQ_LSTAT(caller, (IO_STUB *)caller->tmp1);
	}
#undef pgin
}

unsigned long VFS_VSPACE_GET(VDESC *desc, __const__ VBUF *buf)
{
	int spl = SPLX_BELOW(buf->spl, SPL_X(SPL_FS)) ? SPL_X(SPL_FS) : buf->spl;
	PAGE *p;
	unsigned off;
	unsigned long l;
	FNODE *f;
	void *v;
	LOWER_SPLX(spl);
	if (__unlikely(!HAS_CAPABILITY(desc->vspace, CAP_READ))) goto ret0;
	f = desc->vspace->fnode;
	if (__unlikely((f->flags & (FNODE_HASH | FNODE_WANTFREE | FNODE_WRITEPAGES)) != FNODE_HASH)) {
		if (__likely((f->flags & (FNODE_SPAGE | FNODE_WANTFREE | FNODE_WRITEPAGES)) == FNODE_SPAGE)) {
			if (__unlikely(desc->ptr >= (unsigned long)f->size)) goto ret0;
			l = desc->len;
			if (__unlikely(l > buf->len)) l = buf->len;
			if (__unlikely(l > (f->u.s.s.n_pages << PG_SIZE_BITS) - (unsigned long)desc->ptr)) {
				l = (f->u.s.s.n_pages << PG_SIZE_BITS) - (unsigned long)desc->ptr;
			}
			if (__unlikely((f->u.s.s.flags & (PAGE_BUSY | PAGE_WANTFREE)))) goto ret0;
			v = (char *)KERNEL$MAP_PHYSICAL_PAGE(f->u.s.s.page) + f->u.s.s.offset + (unsigned long)desc->ptr;
			goto cpy_v;
		}
		ret0:
		LOWER_SPLX(buf->spl);
		return 0;
	}
	p = FIND_PAGE(desc->ptr, f, NULL);
	if (__unlikely(!p) || __unlikely(p->prefetch_hint)) goto ret0;
	off = (unsigned long)desc->ptr & __PAGE_CLUSTER_SIZE_MINUS_1;
	if (__unlikely(off < p->valid_from) || __unlikely(off >= p->valid_to)) goto ret0;
	l = desc->len;
	if (__unlikely(l > buf->len)) l = buf->len;
	if (l > p->valid_to - off) l = p->valid_to - off;
	v = (char *)KERNEL$MAP_PHYSICAL_PAGE(p) + off;
	cpy_v:
	memcpy(buf->ptr, v, l);
	KERNEL$UNMAP_PHYSICAL_BANK(v);
	LOWER_SPLX(buf->spl);
	desc->ptr += l;
	desc->len -= l;
	return l;
}

unsigned long VFS_VSPACE_PUT(VDESC *desc, __const__ VBUF *buf)
{
	int spl = SPLX_BELOW(buf->spl, SPL_X(SPL_FS)) ? SPL_X(SPL_FS) : buf->spl;
	PAGE *p;
	unsigned off;
	unsigned long l;
	FNODE *f;
	void *v;
	LOWER_SPLX(spl);
	if (__unlikely(!HAS_CAPABILITY(desc->vspace, CAP_WRITE))) goto ret0;
	f = desc->vspace->fnode;
	if (__unlikely(f->fs->flags & FS_RO)) goto ret0;
	if (__unlikely((f->flags & (FNODE_HASH | FNODE_WANTFREE | FNODE_WRITEPAGES)) != FNODE_HASH)) {
	/* no need to handle SPAGE here, because SPAGE is read-only */
		ret0:
		LOWER_SPLX(buf->spl);
		return 0;
	}
	if (__unlikely(desc->ptr + desc->len > f->size) || __unlikely(desc->ptr + desc->len > desc->ptr)) goto ret0;
	p = FIND_PAGE(desc->ptr, desc->vspace->fnode, NULL);
	if (__unlikely(!p) || __unlikely(p->prefetch_hint) || __unlikely((p->flags & (PAGE_BUSY | PAGE_WANTFREE | PAGE_WRITEABLE)) != PAGE_WRITEABLE)) goto ret0;
	off = (unsigned long)desc->ptr & __PAGE_CLUSTER_SIZE_MINUS_1;
	l = desc->len;
	if (__unlikely(l > buf->len)) l = buf->len;
	if (l > __PAGE_CLUSTER_SIZE - off) l = __PAGE_CLUSTER_SIZE - off;
	if (__unlikely(desc->ptr + l > f->size)) l = f->size - desc->ptr;
	if (__unlikely(off < p->valid_from)) goto ret0;
	if (__unlikely(off + l > p->valid_to)) l = p->valid_to - off;

	v = (char *)KERNEL$MAP_PHYSICAL_PAGE(p) + off;
	memcpy(v, buf->ptr, l);
	KERNEL$UNMAP_PHYSICAL_BANK(v);
	RAISE_SPL(SPL_VSPACE);
	if (__likely(p->dirty_from > off)) p->dirty_from = off;
	if (__likely(p->dirty_to < off + l)) p->dirty_to = off + l;
	CHECK_PAGE(p, 0);
	LOWER_SPLX(buf->spl);
	desc->ptr += l;
	desc->len -= l;
	return l;
}

#define FN		void *VFS_VSPACE_MAP(VDESC *desc, int rw, vspace_unmap_t **unlock)
#define FN_NAME		"VFS_VSPACE_MAP"
#define UNL		VFS_VSPACE_UNMAP
#define UNL_SPAGE	VFS_VSPACE_UNMAP_SPAGE
#define UNL_TYPE	void *
#define DECL		
#define MAY_RETURN_LESS	0
#define RET_ZERO	{ return NULL; }
#define RET_ERROR(e)	{ return __ERR_PTR(e); }
#define RET_MAP(p,o,l)	{ return (char *)KERNEL$MAP_PHYSICAL_PAGE(p) + (o); }
#define DO_UNMAP(x)	KERNEL$UNMAP_PHYSICAL_BANK_PAGE(x)

#include "VFSFILEM.I"

#define FN		VDMA VFS_VSPACE_DMALOCK(VDESC *desc, int rw, vspace_dmaunlock_t **unlock)
#define FN_NAME		"VFS_VSPACE_DMALOCK"
#define UNL		VFS_VSPACE_DMAUNLOCK
#define UNL_SPAGE	VFS_VSPACE_DMAUNLOCK_SPAGE
#define UNL_TYPE	__u32
#define DECL		VDMA vdma;
#define MAY_RETURN_LESS	1
#define RET_ZERO	{ vdma.ptr = vdma.len = 0; return vdma; }
#define RET_ERROR(e)	{ goto ret0; }
#define RET_MAP(p,o,l)	{ vdma.ptr = KERNEL$MAP_PAGE_DMA(p) + (o), vdma.len = (l); return vdma; }
#define DO_UNMAP(x)	KERNEL$UNMAP_PAGE_DMA(x)

#include "VFSFILEM.I"

#define FN		void VFS_VSPACE_DMA64LOCK(VDESC *desc, int rw, VDMA64 *dma, vspace_dma64unlock_t **unlock)
#define FN_NAME		"VFS_VSPACE_DMA64LOCK"
#define UNL		VFS_VSPACE_DMA64UNLOCK
#define UNL_SPAGE	VFS_VSPACE_DMA64UNLOCK_SPAGE
#define UNL_TYPE	__u64
#define DECL
#define MAY_RETURN_LESS	1
#define RET_ZERO	{ dma->ptr = dma->len = 0; return; }
#define RET_ERROR(e)	{ goto ret0; }
#define RET_MAP(p,o,l)	{ dma->ptr = KERNEL$MAP_PAGE_DMA64(p) | (o), dma->len = (l); return; }
#define DO_UNMAP(x)	KERNEL$UNMAP_PAGE_DMA64(x)

#include "VFSFILEM.I"

#define FN		void VFS_VSPACE_PHYSLOCK(VDESC *desc, int rw, VPHYS *dma, vspace_dma64unlock_t **unlock)
#define FN_NAME		"VFS_VSPACE_PHYSLOCK"
#define UNL		VFS_VSPACE_PHYSUNLOCK
#define UNL_SPAGE	VFS_VSPACE_PHYSUNLOCK_SPAGE
#define UNL_TYPE	__p_addr
#define DECL
#define MAY_RETURN_LESS	1
#define RET_ZERO	{ dma->ptr = dma->len = 0; return; }
#define RET_ERROR(e)	{ goto ret0; }
#define RET_MAP(p,o,l)	{ dma->ptr = KERNEL$PAGE_2_PHYS(p) | (o), dma->len = l; return; }
#define DO_UNMAP(x)	KERNEL$PHYS_2_PAGE(x)

#include "VFSFILEM.I"

static WQ *FNODE_PAGE_LOCKDOWN(PAGE *p, int lock)
{
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_VSPACE)))
		KERNEL$SUICIDE("FNODE_PAGE_LOCKDOWN AT SPL %08X", KERNEL$SPL);
#endif
	if (!lock) {
		if (__unlikely(p->flags & (PAGE_BUSY | PAGE_WANTFREE))) return &p->wait;
		if (__unlikely((p->flags & PAGE_DMALOCKCOUNT) == PAGE_DMALOCKCOUNT)) return &p->wait;
		p->flags += PAGE_DMALOCKCOUNT_1;
		return NULL;
	} else {
#if __DEBUG >= 1
		if (__unlikely(!(p->flags & PAGE_DMALOCKCOUNT)))
			KERNEL$SUICIDE("FNODE_PAGE_LOCKDOWN: %s: UNLOCKING UNLOCKED PAGE", VFS$FNODE_DESCRIPTION(p->fnode));
#endif
		if (__unlikely((p->flags & PAGE_DMALOCKCOUNT) == PAGE_DMALOCKCOUNT)) WQ_WAKE_ALL(&p->wait);
		if (__likely(!((p->flags -= PAGE_DMALOCKCOUNT_1) & PAGE_DMALOCKCOUNT))) WQ_WAKE_ALL(&p->wait);
		return NULL;
	}
}

/* For performance reasons it is desired to allow creating more dirty pages
   while the file is being written. However it can lead to starvation in this
   function. Disable it after blocking this number of times.
   FNODE_WRITEPAGES also disables vspace IO and writing to page while BMAP is
   looking up, so it must be set most time of this function. */
#define MAX_FNODE_LOOP		64

int VFS$WRITEPAGES(FNODE *f)
{
	PAGE *p;
	int r;
	__d_off off, o;
	unsigned sec, nsec;
	int loop = MAX_FNODE_LOOP;
	int did_write_last_page = 0;
#if __DEBUG >= 1
	if (__unlikely(!(f->flags & FNODE_WRITELOCK)))
		KERNEL$SUICIDE("VFS$WRITEPAGES: %s: FNODE NOT LOCKED, FLAGS %X", VFS$FNODE_DESCRIPTION(f), f->flags);
#endif
	while (__unlikely(f->pending_writes)) {
		VFS_DO_WRITE(f->fs);
		WQ_WAIT_SYNC(&f->pending_write_wait);
	}
	nextpage:
	if (__unlikely(!(f->flags & FNODE_HASH))) {
		return 0;
	}
	RAISE_SPL(SPL_VSPACE);
	f->flags |= FNODE_WRITEPAGES;
	LOWER_SPL(SPL_FS);
	LIST_FOR_EACH(p, &f->u.h.dirty, PAGE, node_entry) {
		_u_off_t uo;
		unsigned df, dt;
		unsigned pageio_mask;
		WQ *wq;
#if __DEBUG >= 1
		if (__unlikely(!(p->flags & PAGE_WRITEABLE)))
			KERNEL$SUICIDE("VFS$WRITEPAGES: %s: CLEAN(%X, %d,%d) PAGE(%016"__64_format"X) ON DIRTY LIST", VFS$FNODE_DESCRIPTION(f), p->flags, p->dirty_from, p->dirty_to, (__u64)p->id);
#endif
		next_part:
		if (__unlikely((wq = KERNEL$VM_UNSET_WRITEABLE(p)) != NULL)) {
			if (__likely(loop)) {
				loop--;
				f->flags &= ~FNODE_WRITEPAGES;
			}
			WQ_WAIT_SYNC(wq);
			LOWER_SPL(SPL_FS);
			goto nextpage;
		}
		pageio_mask = f->fs->pageio_mask;
		df = p->dirty_from & ~pageio_mask;
		dt = (p->dirty_to + pageio_mask) & ~pageio_mask;
		if (__unlikely(df >= dt)) goto unsw;
		LOWER_SPL(SPL_FS);
		if (__unlikely(did_write_last_page) && __unlikely((p->id | dt) > f->disk_size)) {
			continue;
		}
		uo = p->id | df;
		if (__unlikely(uo >= f->size)) {
#if __DEBUG >= 1
			if (__unlikely(p->id >= f->size))
				KERNEL$SUICIDE("VFS$WRITEPAGES: %s: PAGE OUT OF FNODE (SIZE %"__64_format"X, PAGE %"__64_format"X, DIRTY FROM %X TO %X)", VFS$FNODE_DESCRIPTION(f), (__u64)f->size, (__u64)p->id, df, dt);
#endif
			goto unsw;
		}
		if (__unlikely(uo >= f->disk_size)) {
			continue;
		}
		off = uo >> BIO_SECTOR_SIZE_BITS;
		if (__unlikely(!BMAP_TEST(f, off))) {
			if (__likely(loop)) {
				loop--;
				RAISE_SPL(SPL_VSPACE);
				f->flags &= ~FNODE_WRITEPAGES;
				LOWER_SPL(SPL_FS);
			}
			r = f->fs->fsops->sync_bmap(f, off, 0);
			if (__unlikely(r)) {
				RAISE_SPL(SPL_VSPACE);
				f->flags &= ~FNODE_WRITEPAGES;
				LOWER_SPL(SPL_FS);
				f->fs->write_error = 1;
				VFS_UNDIRTY_PAGES(f);
				return r;
			}
			goto nextpage;
		}
		/*__debug_printf("bmap (%s,%Lx)->(d:%Lx,f:%Lx,l:%Ld)", f->name, off, f->disk_blk, f->file_blk, f->run_length);*/
		sec = df >> BIO_SECTOR_SIZE_BITS;
		if (__unlikely(p->id + dt > f->size)) {
			if (__likely(p->valid_to != __PAGE_CLUSTER_SIZE)) {
				char *ptr = KERNEL$MAP_PHYSICAL_PAGE(p);
				unsigned from = (unsigned long)f->size & __PAGE_CLUSTER_SIZE_MINUS_1;
				f->fs->fsops->init_last_page(f, ptr + from, dt - from);
				KERNEL$UNMAP_PHYSICAL_BANK(ptr);
			}
		}
		nsec = (dt >> BIO_SECTOR_SIZE_BITS) - sec;
		o = off - f->file_blk;
		if (f->run_length - o < nsec)
			nsec = f->run_length - o;
		if (VFS_WRITEPAGE(f->fs, p, sec, nsec, o + f->disk_blk)) {
			if (__likely(loop)) {
				loop--;
				RAISE_SPL(SPL_VSPACE);
				f->flags &= ~FNODE_WRITEPAGES;
				LOWER_SPL(SPL_FS);
			}
			WQ_WAIT_SYNC(&f->fs->wpage_wait);
			goto nextpage;
		}
		df += (nsec << BIO_SECTOR_SIZE_BITS);
		if (__unlikely(f->size > f->disk_size) && __unlikely(p->id + df > f->disk_size)) {
/* Trick:
 Requirement: Data after fnode->disk_size must always be dirty.
 When the file has grown while we were preparing write (f->size > f->disk_size),
 we must still write data (so that fsync flushes everything that was dirty
 before its call), but we must leave the page dirty, so that the above
 requirement is valid.
 This could livelock (because we do not reset page's dirty state), so make sure
 we write the last page only once.
*/
			did_write_last_page = 1;
			continue;
		}
		RAISE_SPL(SPL_VSPACE);
		p->dirty_from = df;
		if (__likely(p->dirty_from >= p->dirty_to)) {
			PAGE *pp;
			unsw:
			p->dirty_from = __PAGE_CLUSTER_SIZE;
			p->dirty_to = 0;
			p->flags &= ~PAGE_WRITEABLE;
			pp = LIST_STRUCT(p->node_entry.prev, PAGE, node_entry);
			DEL_FROM_LIST(&p->node_entry);
			ADD_TO_LIST_END(&f->u.h.clean, &p->node_entry);
			LOWER_SPL(SPL_FS);
			f->fs->fsops->unaccount(f, ACCOUNT_PAGE, p);
			VFS$UNSET_DIRTYLIST(f);
			p = pp;
		} else {
			LOWER_SPL(SPL_FS);
			goto next_part;
		}
	}
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS)))
		KERNEL$SUICIDE("VFS$WRITEPAGES: LEAVING WITH SPL %08X", KERNEL$SPL);
	RAISE_SPL(SPL_VSPACE);
	f->flags &= ~FNODE_WRITEPAGES;
	LOWER_SPL(SPL_FS);
	return 0;
}

void VFS$INIT_LAST_PAGE(FNODE *f, char *ptr, unsigned len)
{
	memset(ptr, 0, len);
}

static void io_uncommitted_suicide(PAGE *p, unsigned from)
{
	FNODE *f = p->fnode;
	KERNEL$SUICIDE("VFS_READ_FNODE_PAGE: %s: IO ON UNCOMMITTED FNODE, FLAGS %X, SIZE %"__64_format"X, PAGE %"__64_format"X, FLAGS %X, VALID FROM %u, VALID TO %u, DIRTY FROM %u, DIRTY TO %u, REQUEST FROM %u", VFS$FNODE_DESCRIPTION(f), f->flags, (__u64)f->size, (__u64)p->id, p->flags, p->valid_from, p->valid_to, p->dirty_from, p->dirty_to, from);
}

static void io_out_of_disk_suicide(PAGE *p, unsigned from)
{
	FNODE *f = p->fnode;
	KERNEL$SUICIDE("VFS_READ_FNODE_PAGE: %s: ACCESS OUT OF DISK SIZE, FNODE FLAGS %X, SIZE %"__64_format"X, DISK SIZE %"__64_format"X, PAGE %"__64_format"X, FLAGS %X, VALID FROM %u, VALID TO %u, DIRTY FROM %u, DIRTY TO %u, REQUEST FROM %u", VFS$FNODE_DESCRIPTION(f), f->flags, (__u64)f->size, (__u64)f->disk_size, (__u64)p->id, p->flags, p->valid_from, p->valid_to, p->dirty_from, p->dirty_to, from);
}

static void zero_left_sectors_suicide(PAGEINRQ *pgin)
{
	FNODE *f = pgin->fnode;
	KERNEL$SUICIDE("VFS$BMAP_DONE: %s: ZERO LEFT SECTORS, FNODE FLAGS %X, SIZE %"__64_format"X, DISK SIZE %"__64_format"X, FILE OFFSET %"__64_format"X, FILE BLOCK %"__64_format"X, RUN LENGTH %"__64_format"X, READAHEAD %X", VFS$FNODE_DESCRIPTION(f), f->flags, (__u64)f->size, (__u64)f->disk_size, (__u64)pgin->off, (__u64)f->file_blk, (__u64)f->run_length, f->fs->page_readahead);
}

static void unaligned_left_sectors_suicide(PAGEINRQ *pgin, unsigned long left)
{
	FNODE *f = pgin->fnode;
	KERNEL$SUICIDE("VFS$BMAP_DONE: %s: UNALIGNED LEFT SECTORS %lX, FNODE FLAGS %X, SIZE %"__64_format"X, DISK SIZE %"__64_format"X, FILE OFFSET %"__64_format"X, FILE BLOCK %"__64_format"X, RUN LENGTH %"__64_format"X, READAHEAD %X", VFS$FNODE_DESCRIPTION(f), left, f->flags, (__u64)f->size, (__u64)f->disk_size, (__u64)pgin->off, (__u64)f->file_blk, (__u64)f->run_length, f->fs->page_readahead);
}

int PAGE_INIT_GLOBAL(void)
{
	return KERNEL$CACHE_REGISTER_VM_TYPE(&page_vm_entity, &page_calls);
}

void PAGE_TERM_GLOBAL(void)
{
	KERNEL$CACHE_UNREGISTER_VM_TYPE(page_vm_entity);
}
