#include <SPAD/HASH.H>
#include <SPAD/VFS.H>
#include <SPAD/IOCTL.H>
#include <DIRENT.H>
#include <SPAD/DIR_KRNL.H>
#include <SPAD/DEV_KRNL.H>
#include <SYS/STAT.H>
#include <TIME.H>
#include <VALUES.H>
#include <SPAD/SLAB.H>

#include "VFS.H"

#define MAX_READDIR_DIRTY	64

static int FNODE_SWAPOUT(VMENTITY *e);
static int FNODE_CHECKMAP(VMENTITY *e);

static void *VFS_DIR_CLONE(HANDLE *from, HANDLE *to, int open_flags);
static void *VFS_DIR_LOOKUP(HANDLE *handle, char *str, int open_flags);
static void *VFS_FNODE_INSTANTIATE(HANDLE *h, IORQ *rq, int open_flags);
static void VFS_FNODE_DETACH(HANDLE *h);
static void *VFS_FILE_LOOKUP(HANDLE *handle, char *str, int open_flags);
extern IO_STUB VFS_ISDIR;
static void VFS_DIR_LOOKUP_IO(HANDLE *h, char *str, IORQ *rq, int open_flags);
static void *VFS_DIR_CREATE(HANDLE *handle, char *filename, int open_flags);
static void *VFS_DIR_DELETE(HANDLE *h, IORQ *rq, int open_flags, HANDLE *hp);
static void *VFS_FILE_DELETE(HANDLE *h, IORQ *rq, int open_flags, HANDLE *hp);
static void *VFS_DIR_RENAME(HANDLE *handle, char *filename, HANDLE *fh, IORQ *rq);
extern IO_STUB VFS_FILE_IOCTL;
extern IO_STUB VFS_DIR_IOCTL;

static int fnode_vm_entity;

static __const__ VMENTITY_T fnode_calls = { FNODE_CHECKMAP, FNODE_WRITE, FNODE_SWAPOUT, "FNODE" };

void VFS$FNODE_CTOR(void *fs, void *fn_)
{
	FNODE *fnode = fn_;
	INIT_XLIST(&fnode->handles);
	WQ_INIT(&fnode->wait, "VFS$FNODE_WAIT");
	fnode->fs = fs;
	fnode->name = fnode->inline_name;
	fnode->readers = 0;
	fnode->dio_writers = 0;
	CACHE_CONSTRUCT_VM_ENTITY(&fnode->e);
	fnode->e.type = fnode_vm_entity;
	fnode->pending_writes = 0;
	WQ_INIT(&fnode->pending_write_wait, "VFS$FNODE_PENDING_WRITE_WAIT");
	fnode->syncproc = NULL;
}

void VFS$PAGEINRQ_CTOR(void *fs_, void *p_)
{
	PAGEINRQ *pageinrq = p_;
	FS *fs = fs_;
	pageinrq->fs = fs;
	pageinrq->brq.rrq.h = fs->disk_handle_num;
	pageinrq->brq.rdesc.v.vspace = &KERNEL$PHYSICAL;
}

static int FNODE_CHECKMAP(VMENTITY *e)
{
	LOWER_SPL(SPL_FS);
	return 0;
}

void FNODE_WRITE(VMENTITY *e, PROC *p, int trashing)
{
	FNODE *f;
	LOWER_SPL(SPL_FS);
	f = LIST_STRUCT(e, FNODE, e);
	WRITE_FNODE(f);
	if (f->flags & FNODE_SYNCLIST) {
		if (!f->syncproc) {
				/* !!! TODO: acquire tag on dirty pages too */
			KERNEL$ACQUIRE_WRITEBACK_TAG(p);
			f->syncproc = p;
		}
	}
}

static int FNODE_SWAPOUT(VMENTITY *e)
{
	int r;
	FNODE *f;
	FS *fs;
	LOWER_SPL(SPL_FS);
	f = LIST_STRUCT(e, FNODE, e);
	fs = f->fs;
	if (__unlikely(!f->parent)) {
		/*__debug_printf("fnode (%d, %d), small hash (%d, %d), names (%d, %d), pageinrq (%d, %d), spages (%d, %d), buffers (%d, %d), brq (%d, %d), wpages (%d, %d), binghash %ld", fs->fnodes.__n_pages, fs->fnodes.__n_reserved_pages, fs->small_hash.__n_pages, fs->small_hash.__n_reserved_pages, fs->names.__n_pages, fs->names.__n_reserved_pages, fs->pageinrq.__n_reserved_pages, fs->pageinrq.__n_pages, fs->spages.__n_reserved_pages, fs->spages.__n_pages, fs->buffers.__n_reserved_pages, fs->buffers.__n_pages, fs->brq.__n_reserved_pages, fs->brq.__n_pages, fs->wpages.__n_reserved_pages, fs->wpages.__n_pages, fs->n_bighash);*/
		return 2;
	}
	if (no_need_to_free_fnodes(fs)) {
		return 2;
	}
	if (__likely(f->flags & FNODE_HASH)) {
		PAGE *p;
		LIST_FOR_EACH(p, &f->u.h.dirty, PAGE, node_entry) {
			if (!CACHE_VMENTITY_IS_FREEABLE(&p->e)) {
				RAISE_SPL(SPL_CACHE);
				KERNEL$CACHE_TRANSFER_QUEUE_STATE(&f->e, &p->e);
				LOWER_SPL(SPL_FS);
				return 3;
			}
		}
		LIST_FOR_EACH(p, &f->u.h.clean, PAGE, node_entry) {
			if (!CACHE_VMENTITY_IS_FREEABLE(&p->e)) {
				RAISE_SPL(SPL_CACHE);
				KERNEL$CACHE_TRANSFER_QUEUE_STATE(&f->e, &p->e);
				LOWER_SPL(SPL_FS);
				return 3;
			}
		}
	} /*else if (f->flags & FNODE_DIRECTORY) {
		FNODE *f2;
		LIST_FOR_EACH(f2, &f->u.d.dirty, FNODE, dirent_entry) {
			RAISE_SPL(SPL_CACHE);
			KERNEL$CACHE_TRANSFER_QUEUE_STATE(&f->e, &f2->e);
			LOWER_SPL(SPL_FS);
			return 3;
		}
		LIST_FOR_EACH(f2, &f->u.d.clean, FNODE, dirent_entry) {
			RAISE_SPL(SPL_CACHE);
			KERNEL$CACHE_TRANSFER_QUEUE_STATE(&f->e, &f2->e);
			LOWER_SPL(SPL_FS);
			return 3;
		}
	}*/
	r = !!VFS$FREE_FNODE(f);
	LOWER_SPL(SPL_FS);
	return r;
}

FNODE *VFS$GET_ROOT_FNODE(FS *fs)
{
	FNODE *root;
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS))) KERNEL$SUICIDE("VFS$GET_ROOT_FNODE: %s: CALLED AT SPL %08X", fs->filesystem_name, KERNEL$SPL);
	root = __slalloc(&fs->fnodes);
	if (!root) KERNEL$SUICIDE("VFS$GET_ROOT_FNODE: %s: CAN'T ALLOCATE FROM RESERVED FNODE SLAB", fs->filesystem_name);
#ifdef VFS_INO
	root->ino = 0;
#endif
	root->inline_name[0] = '/';
	root->inline_name[1] = 0;
	root->namelen = 1;
	VOID_LIST_ENTRY(&root->hash_entry);
	VOID_LIST_ENTRY(&root->dirent_entry);
	root->parent = NULL;
	root->depth = 0;
	root->flags = 0;
	root->size = 0;
	root->disk_size = 0;
	root->ctime = 0;
	root->mtime = 0;
	RAISE_SPL(SPL_CACHE);
	KERNEL$CACHE_INSERT_VM_ENTITY(&root->e, &KERNEL$PROC_KERNEL, 0);
	LOWER_SPL(SPL_FS);
	fs->root = root;
	return root;
}

static __finline__ void REHASH(FNODE *f1, XLIST_HEAD *hash, int mask)
{
	char *e = f1->name;
	int h = 0;
	quickcasehash(e, *e, h);
	h &= mask;
	DEL_FROM_LIST(&f1->hash_entry);
	ADD_TO_XLIST(&hash[h], &f1->hash_entry);
}

XLIST_HEAD *VFS_ALLOC_BIGHASH(FS *fs)
{
	XLIST_HEAD *bighash;
	bighash = KERNEL$ALLOC_KERNEL_PAGE(VM_TYPE_CACHED_MAPPED);
	if (__unlikely(!bighash)) goto ret;
	fs->n_bighash++;
	init_big_hash(bighash);
	ret:
	return bighash;
}

void VFS_REHASH_DIRECTORY(FNODE *f)
{
	XLIST_HEAD *bighash;
	FNODE *f1;
	if (__unlikely(!f->u.d.hash)) return;
	/* the hash may never shrink (readdir would then skip uncommitted
		files), so we can't use big hash for root --- because root can
		not be freed.
		!!! TODO: preallocate bighash for root at mount?
	*/
	if (__unlikely(!f->parent)) return;
	bighash = VFS_ALLOC_BIGHASH(f->fs);
	if (__unlikely(!bighash)) return;
	LIST_FOR_EACH(f1, &f->u.d.clean, FNODE, dirent_entry) REHASH(f1, bighash, BIG_HASH_MASK);
	LIST_FOR_EACH(f1, &f->u.d.dirty, FNODE, dirent_entry) REHASH(f1, bighash, BIG_HASH_MASK);
	free_hash(f->fs, f->u.d.hash);
	WQ_WAKE_ALL(&f->fs->freemem);
	f->u.d.hash = bighash;
	f->u.d.hash_mask = BIG_HASH_MASK;
}

void VFS$FNODE_OUT_OF_WANTFREE(FNODE *f)
{
	PROC *proc;
	if (__unlikely(!(f->flags & FNODE_WANTFREE))) return;
	DEL_FROM_LIST(&f->free_entry);
#if __DEBUG >= 1
	f->free_entry.next = NULL;	/* not needed, but catches bugs */
	f->free_entry.prev = NULL;
#endif
	f->flags &= ~FNODE_WANTFREE;
	WQ_WAKE_ALL(&f->wait);
	WQ_WAKE_ALL(&f->fs->freemem);
	proc = f->e.wired;
	if (!proc) proc = &KERNEL$PROC_KERNEL;
	RAISE_SPL(SPL_CACHE);
	KERNEL$CACHE_REMOVE_VM_ENTITY(&f->e);	/* keep swapper happy about its statistics */
	TEST_SPL(SPL_FS, SPL_CACHE);
	KERNEL$CACHE_INSERT_VM_ENTITY(&f->e, proc, 0);
	LOWER_SPL(SPL_FS);
}

static WQ *VFS_MAY_DIRTY_OVER(PROC *p, FS *fs);

WQ *VFS$MAY_DIRTY(PROC *p, FS *fs)
{
	if (__unlikely(fs->total_dirty >= VFS_MAX_DIRTY_NODES)) return VFS_MAY_DIRTY_OVER(p, fs);
	return KERNEL$MAY_DIRTY(p, 0);
}

void VFS$SET_DIRTY(FNODE *f)
{
	if (f->flags & FNODE_DIRTY) return;
	f->fs->total_dirty++;
	f->flags |= FNODE_DIRTY;
	VFS$SET_DIRTYLIST(f);
}

void VFS$SET_DIRTYLIST(FNODE *f)
{
	FNODE *pf;
	again:
	if (!(f->flags & FNODE_DIRTYLIST)) {
		f->flags |= FNODE_DIRTYLIST;
		pf = f->parent;
		if (__likely(pf != NULL)) {
			DEL_FROM_LIST(&f->dirent_entry);
			ADD_TO_LIST_END(&pf->u.d.dirty, &f->dirent_entry);
			pf->u.d.n_clean--;
			pf->u.d.n_dirty++;
			CHECK_DIR_LISTS_3(pf);
			f = pf;
			goto again;
		}
	}
}

void VFS$UNSET_DIRTY(FNODE *f)
{
	if (__unlikely(!(f->flags & FNODE_DIRTY))) return;
	f->flags &= ~(FNODE_DIRTY | FNODE_TIMED_SYNC);
	if (__unlikely(!f->fs->total_dirty--))
		KERNEL$SUICIDE("VFS$UNSET_DIRTY: %s: DIRTY COUNT UNDERFLOW", VFS$FNODE_DESCRIPTION(f));
	VFS$UNSET_DIRTYLIST(f);
}

void VFS$UNSET_DIRTYLIST(FNODE *f)
{
	FNODE *pf;
	if (!(f->flags & FNODE_DIRECTORY)) {
		if (!(f->flags & FNODE_HASH) || LIST_EMPTY(&f->u.h.dirty)) goto x1;
		goto nd;
	}
	recheck:
	if (!f->u.d.n_dirty) x1: if ((f->flags & (FNODE_DIRTY | FNODE_DIRTYLIST)) == FNODE_DIRTYLIST) {
		f->flags &= ~FNODE_DIRTYLIST;
		if (__unlikely(!(pf = f->parent))) return;
		DEL_FROM_LIST(&f->dirent_entry);
		ADD_TO_LIST_END(&pf->u.d.clean, &f->dirent_entry);
		pf->u.d.n_clean++;
		pf->u.d.n_dirty--;
		CHECK_DIR_LISTS(pf);
		f = pf;
		goto recheck;
	}
	nd:;
}

static WQ *VFS_MAY_DIRTY_OVER(PROC *p, FS *fs)
{
	SYNC_HALF(fs);
	return KERNEL$MAY_DIRTY(p, fs->total_dirty >= VFS_MAX_DIRTY_NODES_HARD);
}

__const__ HANDLE_OPERATIONS VFS_DIR_OPERATIONS = {
	SPL_X(SPL_FS),
	KERNEL$NO_VSPACE_GET,
	KERNEL$NO_VSPACE_PUT,
	KERNEL$NO_VSPACE_MAP,
	KERNEL$NO_VSPACE_DMALOCK,
	KERNEL$NO_VSPACE_DMA64LOCK,
	KERNEL$NO_VSPACE_PHYSLOCK,
	KERNEL$NO_VSPACE_GET_PAGEIN_RQ,
	KERNEL$NO_VSPACE_GET_PAGE,
	KERNEL$NO_VSPACE_SWAP_OP,
	VFS_DIR_CLONE,
	VFS_DIR_LOOKUP,
	VFS_DIR_CREATE,
	VFS_DIR_DELETE,
	VFS_DIR_RENAME,
	VFS_DIR_LOOKUP_IO,	/* lookup_io */
	VFS_FNODE_INSTANTIATE,
	NULL,			/* leave */
	VFS_FNODE_DETACH,
	NULL,			/* open */
	NULL,			/* close */
	VFS_ISDIR,		/* read */
	VFS_ISDIR,		/* write */
	VFS_ISDIR,		/* aread */
	VFS_ISDIR,		/* awrite */
	VFS_DIR_IOCTL,		/* ioctl */
	KERNEL$NO_OPERATION,
	KERNEL$NO_OPERATION,
};

__const__ HANDLE_OPERATIONS VFS_FILE_OPERATIONS = {
	SPL_X(SPL_FS),
	VFS_VSPACE_GET,
	VFS_VSPACE_PUT,
	VFS_VSPACE_MAP,
	VFS_VSPACE_DMALOCK,
	VFS_VSPACE_DMA64LOCK,
	VFS_VSPACE_PHYSLOCK,
	VFS_GET_PAGEIN_RQ,
	VFS_GET_PAGE,
	KERNEL$NO_VSPACE_SWAP_OP,
	NULL,			/* clone */
	VFS_FILE_LOOKUP,
	NULL,
	VFS_FILE_DELETE,
	NULL,			/* rename */
	NULL,
	VFS_FNODE_INSTANTIATE,
	NULL,			/* leave */
	VFS_FNODE_DETACH,
	NULL,			/* open */
	NULL,			/* close */
	VFS_FILE_READ,		/* read */
	VFS_FILE_WRITE,		/* write */
	VFS_FILE_AREAD,		/* aread */
	VFS_FILE_AWRITE,	/* awrite */
	VFS_FILE_IOCTL,		/* ioctl */
	VFS_FILE_BIO,		/* bio */
	KERNEL$NO_OPERATION,
};

static struct {
	char name[8];
	long flagclr;
	long flagset;
} __const__ accmod[] = {
	"RO", CAP_ALLWRITE, 0,
	"ROO", CAP_ALLWRITE | CAP_ACLGAIN, 0,
	"XR", CAP_READ, 0,
	"XA", CAP_APPEND | CAP_WRITE | CAP_UTIMES, 0,
	"XW", CAP_WRITE | CAP_UTIMES, 0,
	"XD", CAP_DELETE, 0,
	"XS", CAP_SCANDIR, 0,
	"XL", CAP_LOOKUP | CAP_CREATE, 0,
	"XC", CAP_CREATE, 0,
	"XG", CAP_ACLGAIN, 0,
	"XU", CAP_UTIMES, 0,
	"DIRECT", 0, CAP_DIRECT,
};

static void *VFS_DIR_CLONE(HANDLE *from, HANDLE *to, int open_flags)
{
	PROC *p;
	FNODE *f;
	to->op = &VFS_DIR_OPERATIONS;
	to->flags = from->flags;
	to->flags2 = from->flags2;
	f = to->fnode = from->fnode;
	p = to->name_addrspace;
#if __DEBUG >= 1
	to->fnode_entry.next = NULL;	/* not needed, but catches bugs */
	to->fnode_entry.prev = NULL;
#endif
	RAISE_SPL(SPL_CACHE);
	do {
		KERNEL$CACHE_TOUCH_VM_ENTITY(&f->e, p);
	} while ((f = f->parent));
	LOWER_SPL(SPL_FS);
	return NULL;
}

static void *VFS_DIR_LOOKUP(HANDLE *handle, char *str, int open_flags)
{
	char c;
	FNODE *f2;
	FNODE *f = handle->fnode;
	char *e;
	int h;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS))) KERNEL$SUICIDE("VFS_DIR_LOOKUP AT SPL %08X", KERNEL$SPL);
#endif
	CHECK_DIR_LISTS_3(f);
	c = *str;
	if (__unlikely(c == '^')) goto control_seq;
	if (__unlikely(!c)) goto enoent;
	if (__unlikely(c == '.')) goto dot;
	if (__unlikely(f->depth == MAX_DEPTH - 1)) return __ERR_PTR(-ENAMETOOLONG);
	lookup_name:
	if (__unlikely(!f->u.d.hash)) goto ret1;
	e = str;
	h = 0;
	quickcasehash(e, *e, h);
	h &= f->u.d.hash_mask;
	XLIST_FOR_EACH(f2, &f->u.d.hash[h], FNODE, hash_entry) if (__likely(!__strcasexcmp(f2->name, str, e))) {
		if (__likely(!(f2->flags & (FNODE_NEGATIVE | FNODE_MOVE_SRC | FNODE_MOVE_DST | FNODE_BUSY | FNODE_INVALID)))) {
				/* !!! FIXME: check if new fnode allows in ACL lookup */
			if (__unlikely(!HAS_CAPABILITY(handle, CAP_LOOKUP))) return __ERR_PTR(-EACCES);
			handle->fnode = f2;
			if (f2->flags & FNODE_FILE) handle->op = &VFS_FILE_OPERATIONS;
			else CHECK_DIR_LISTS_3(f2);
			RAISE_SPL(SPL_CACHE);
			KERNEL$CACHE_TOUCH_VM_ENTITY(&f2->e, handle->name_addrspace);
			LOWER_SPL(SPL_FS);
			return NULL;
		}
		if (__unlikely(!HAS_CAPABILITY(handle, CAP_LOOKUP))) return __ERR_PTR(-EACCES);
		if (__unlikely(f2->flags & FNODE_MOVE_DST)) {
			write_fnode:
			WRITE_FNODE(f2);
			return &f2->wait;
		}
		if (__likely(f2->flags & (FNODE_NEGATIVE | FNODE_MOVE_SRC))) {
			if (__unlikely(f2->flags & FNODE_DIRTY) && __unlikely(f2->fs->flags & FS_MORE_SAME_NAMES)) goto write_fnode;
			enoent:
			return __ERR_PTR(-ENOENT);
		}
		if (__likely(f2->flags & FNODE_BUSY))
			return &f2->wait;
		VFS_FREE_EMPTY_FNODE(f2);
		goto ret1;
	}
	ret1:
	if (__unlikely(!HAS_CAPABILITY(handle, CAP_LOOKUP))) return __ERR_PTR(-EACCES);
	if (f->flags & (FNODE_COMPLETE | FNODE_UNCOMMITTED)) goto enoent;
	return (void *)1;

	dot:
	if (__unlikely(str[1] == '.') && __unlikely(!str[2])) {
		return __ERR_PTR(-EINVAL);
	}
	if (__unlikely(!str[1])) return __ERR_PTR(-EINVAL);
	goto lookup_name;

	control_seq:
	return VFS_FILE_LOOKUP(handle, str, open_flags);
}

static void *VFS_FNODE_INSTANTIATE(HANDLE *h, IORQ *rq, int open_flags)
{
	WQ *wq;
	FNODE *f = h->fnode;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS))) KERNEL$SUICIDE("VFS_FNODE_INSTANTIATE AT SPL %08X", KERNEL$SPL);
#endif
	if (__unlikely(open_flags & _O_KRNL_WRITE)) {
		if (__unlikely(f->flags & FNODE_DIRECTORY)) {
			return __ERR_PTR(-EISDIR);
		} else {
			if (open_flags & O_APPEND) {
				if (__unlikely(!HAS_CAPABILITY(h, CAP_APPEND))) return __ERR_PTR(-EACCES);
			} else {
				if (__unlikely(!HAS_CAPABILITY(h, CAP_WRITE))) return __ERR_PTR(-EACCES);
			}
		}
		if (__unlikely(f->fs->flags & FS_RO)) return __ERR_PTR(-EROFS);
	}
	if (__likely(open_flags & _O_KRNL_READ)) {
		if (__unlikely(!HAS_CAPABILITY(h, __unlikely(f->flags & FNODE_DIRECTORY) ? CAP_SCANDIR : CAP_READ))) return __ERR_PTR(-EACCES);
	}
	if (__unlikely(open_flags & O_TRUNC)) {
		if (__unlikely(f->flags & FNODE_DIRECTORY)) return __ERR_PTR(-EISDIR);
		if (__unlikely(!HAS_CAPABILITY(h, CAP_WRITE))) return __ERR_PTR(-EACCES);
		if (__unlikely(f->fs->flags & FS_RO)) return __ERR_PTR(-EROFS);
		if (__unlikely(!(open_flags & _O_KRNL_WRITE))) return __ERR_PTR(-EPERM);
		if (__likely(f->size != 0) && __unlikely((wq = VFS_FREE_FILE_PAGES(f, 0)) != NULL))
			return wq;	/* leaving SPL up is not a bug --- it
					   must be so, so that WQ_WAIT happens
					   at SPL_VSPACE */
	}
	if (__likely(h->file_addrspace != NULL)) ADD_TO_XLIST(&f->handles, &h->fnode_entry);
	else VOID_LIST_ENTRY(&h->fnode_entry);
	CHECK_FNODE_HANDLE(f, h);
	return NULL;
}

static void VFS_FNODE_DETACH(HANDLE *h)
{
	CHECK_FNODE_HANDLE(h->fnode, h);
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_VSPACE))) KERNEL$SUICIDE("VFS$FNODE_DETACH AT SPL %08X", KERNEL$SPL);
	h->fnode = NULL;	/* not needed but catches bugs */
#endif
	DEL_FROM_LIST(&h->fnode_entry);
}

static void *VFS_FILE_LOOKUP(HANDLE *handle, char *str, int open_flags)
{
	int i;
	if (__unlikely(*str != '^')) return __ERR_PTR(-ENOTDIR);
	str++;
	for (i = 0; i < sizeof(accmod) / sizeof(*accmod); i++) if (!_strcasecmp(accmod[i].name, str)) {
		handle->flags2 = (handle->flags2 & ~accmod[i].flagclr) | accmod[i].flagset;
		return NULL;
	}
	return __ERR_PTR(-EBADMOD);
}

DECL_IOCALL(VFS_ISDIR, SPL_FS, IORQ)
{
	RQ->status = -EISDIR;
	RETURN_AST(RQ);
}

static void VFS_DIR_LOOKUP_IO(HANDLE *h, char *str, IORQ *rq, int open_flags)
{
	FNODE *fnode = h->fnode;
	char *longname;
	char c;
	int i;
	int hash;
	/*FNODE *fnode = h->fnode;*/
	FNODE *new_fnode;
	PAGEINRQ *pgin = NULL;
	FS *fs;
	XLIST_HEAD *new_hash;
	WQ *wq;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS))) KERNEL$SUICIDE("VFS_DIR_LOOKUP_IO AT SPL %08X", KERNEL$SPL);
#endif
	/*__debug_printf("VFS_DIR_LOOKUP_IO(%s->%s)\n", fnode->name, str);*/
	if (__unlikely((fnode->flags & (FNODE_UNCOMMITTED | FNODE_SRC_UNCOMMITTED)) != 0))
		KERNEL$SUICIDE("VFS_DIR_LOOKUP_IO: %s: UNCOMMITTED FNODE", VFS$FNODE_DESCRIPTION(fnode));
	if (__unlikely(fnode->flags & (FNODE_WRITELOCK | FNODE_WANTFREE | FNODE_KILLED))) {
		if (__unlikely(fnode->flags & FNODE_KILLED)) {
			rq->status = -EIO;
			CALL_AST(rq);
			return;
		}
		WQ_WAIT_F(&fnode->wait, rq);
		return;
	}
	if (__unlikely(!fnode->u.d.hash)) {
		if (__unlikely(!(new_hash = __slalloc(&fnode->fs->small_hash)))) goto freememwait;
		init_small_hash(new_hash);
		fnode->u.d.hash = new_hash;
		fnode->u.d.hash_mask = SMALL_HASH_MASK;
	}
	process_subdir:
	next1:
	c = *str;
	if (__unlikely(c == '/')) goto skipdir1;
	if (__unlikely(c == '^')) goto skipmod1;
	fs = fnode->fs;
	if (__unlikely(!c)) goto empty1;

	if (__unlikely((wq = KERNEL$MAY_ALLOC(h->name_addrspace, fs->fsops->sizeof_FNODE + SMALL_HASH_SIZE * sizeof(XLIST_HEAD))) != NULL)) goto no_fnode_limit;
	if (__unlikely(!(new_fnode = __slalloc(&fs->fnodes)))) goto no_fnode;
#ifdef VFS_INO
	new_fnode->ino = 0;
#endif
	if (pgin) {
		if (__unlikely(!(new_hash = __slalloc(&fs->small_hash)))) goto no_smallhash;
		init_small_hash(new_hash);
		fnode->flags |= FNODE_DIRECTORY;
		VFS_INIT_DIR(fnode);
		fnode->u.d.hash = new_hash;
		fnode->u.d.hash_mask = SMALL_HASH_MASK;
	}

	hash = 0;
	i = 0;
	do {
		quickcasehashchar(c, hash);
		new_fnode->inline_name[i] = __upcasechr(c);
		i++;
		if (__unlikely(i == FNODE_INLINE_NAMELEN)) goto long1;
		c = str[i];
	} while (c && c != '/');
	new_fnode->inline_name[i] = 0;
	new_fnode->namelen = i;

	name_copied:
	str += i;
	hash &= fnode->u.d.hash_mask;
	RAISE_SPL(SPL_CACHE);
	KERNEL$CACHE_INSERT_VM_ENTITY(&new_fnode->e, h->name_addrspace, 0);
	LOWER_SPL(SPL_FS);
	new_fnode->parent = fnode;
	new_fnode->depth = fnode->depth + 1;
	ADD_TO_XLIST(&fnode->u.d.hash[hash], &new_fnode->hash_entry);
	ADD_TO_LIST_END(&fnode->u.d.clean, &new_fnode->dirent_entry);
	fnode->u.d.n_clean++;
	new_fnode->flags = FNODE_BUSY;
	CHECK_DIR_LISTS_3(fnode);
	POSSIBLY_REHASH_DIRECTORY(fnode);

	fs = new_fnode->fs;
	if (pgin) goto skip_pgin;

	GET_PAGEINRQ(pgin, fs, h->name_addrspace, 0, 0, label, {
		new_fnode->flags = FNODE_INVALID;
		WQ_WAIT_F(wq, rq);
		LOWER_SPL(SPL_FS);
		return;
	});

	pgin->status = 0;
	pgin->caller = rq;
	pgin->fnode = fnode;
	pgin->wr = open_flags;
	fnode->readers++;
	pgin->new_fnode = new_fnode;

	skip_pgin:
	if (!*str) {
		return_pgin:
		fs->fsops->lookup(pgin);
		return;
	}
	str++;
	fnode = new_fnode;
	goto process_subdir;

	skipdir1:
	str++;
	goto next1;

	skipmod1:
	while (*str && *str != '/') str++;
	goto next1;

	long1:
	if (__unlikely(!(longname = __slalloc(&fnode->fs->names)))) goto no_longname;
	new_fnode->name = longname;
	hash = 0;
	c = *str;
	i = 0;
	do {
		quickcasehashchar(c, hash);
		longname[i] = __upcasechr(c);
		i++;
		if (__unlikely(i == MAX_NAMELEN)) goto toolong;
		c = str[i];
	} while (c && c != '/');
	longname[i] = 0;
	new_fnode->namelen = i;
	goto name_copied;

	empty1:
	if (__likely(pgin != NULL)) goto return_pgin;
	rq->status = -ENOENT;
	CALL_AST(rq);
	return;

	toolong:
	__slow_slfree(longname);
	new_fnode->name = new_fnode->inline_name;
	__slow_slfree(new_fnode);
	if (pgin) goto return_pgin;
	rq->status = -ENAMETOOLONG;
	CALL_AST(rq);
	return;

	no_longname:
	if (pgin) {
		free_hash(fnode->fs, fnode->u.d.hash);
		fnode->u.d.hash = NULL;
		fnode->flags &= ~FNODE_DIRECTORY;
	}

	no_smallhash:
	new_fnode->name = new_fnode->inline_name;
	__slow_slfree(new_fnode);
	/* fall through */

	no_fnode:
	if (pgin) goto return_pgin;
	freememwait:
	WQ_WAIT_F(&fnode->fs->freemem, rq);
	VFS$FREE_SOME_DATA(fnode->fs);
	return;

	no_fnode_limit:
	if (pgin) goto return_pgin;
	WQ_WAIT_F(wq, rq);
	return;
}

IORQ *VFS$FREE_LOOKUP_PAGEIN(PAGEINRQ *rq, int negative)
{
	FNODE *pf;
	FNODE *f = rq->new_fnode;
	while (f->flags & FNODE_DIRECTORY) {
		f = LIST_STRUCT(f->u.d.clean.next, FNODE, dirent_entry);
	}
	negative = negative && !(rq->fnode->flags & (FNODE_COMPLETE | FNODE_UNCOMMITTED));
	next_level:
	pf = f->parent;
	if (pf != rq->fnode || !negative) {
		FNODE_OUT_OF_WANTFREE(f);
		f->flags = 0;
		VFS_FREE_EMPTY_FNODE(f);
	}
	if (pf != rq->fnode) {
		free_hash(pf->fs, pf->u.d.hash);
		WQ_WAKE_ALL(&pf->fs->freemem);
		f = pf;
		goto next_level;
	}
	if (negative) {
		FNODE_OUT_OF_WANTFREE(f);
		f->flags = FNODE_NEGATIVE;
		WQ_WAKE_ALL(&f->wait);
	}
	return VFS$FREE_PAGEIN(rq);
}

IORQ *VFS$FREE_PAGEIN(PAGEINRQ *rq)
{
	FNODE *f = rq->fnode;
	f->readers--;
#if __DEBUG >= 1
	if (__unlikely(f->readers < 0)) KERNEL$SUICIDE("VFS$FREE_PAGEIN: %s: LOCK UNDERRUN: %d", VFS$FNODE_DESCRIPTION(f), f->readers);
#endif
	if (__likely(!f->readers)) WQ_WAKE_ALL(&f->wait);
	return VFS$FREE_EMPTY_PAGEIN(rq);
}

IORQ *VFS$FREE_EMPTY_PAGEIN(PAGEINRQ *rq)
{
	return FREE_PAGEINRQ(rq, 0);
}

static void *VFS_DIR_CREATE(HANDLE *handle, char *filename, int open_flags)
{
	void *account;
	FNODE *f = handle->fnode;
	FS *fs;
	FNODE *f2;
	char *e, *n;
	int h, r;
	WQ *wq;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS))) KERNEL$SUICIDE("VFS$DIR_CREATE AT SPL %08X", KERNEL$SPL);
#endif
	if (__unlikely(filename[0] == '^')) return __ERR_PTR(-EINVAL);
	if (__unlikely(!filename[0])) return __ERR_PTR(-EINVAL);
	if (__unlikely(filename[0] == '.')) {
		if (__unlikely(!filename[1])) return __ERR_PTR(-EINVAL);
		if (__unlikely(filename[1] == '.') && __unlikely(!filename[2])) return __ERR_PTR(-EINVAL);
	}
	if (__unlikely(f->depth == MAX_DEPTH - 1)) return __ERR_PTR(-ENAMETOOLONG);
	if (__unlikely(!HAS_CAPABILITY(handle, CAP_CREATE | CAP_LOOKUP))) return __ERR_PTR(-EACCES);
	if (__unlikely(f->fs->flags & FS_RO)) return __ERR_PTR(-EROFS);

	r = f->fs->fsops->validate_filename(f->fs, filename);
	if (__unlikely(r)) return __ERR_PTR(r);

	if (__unlikely((wq = VFS$MAY_DIRTY(handle->name_addrspace, f->fs)) != NULL)) return wq;
	if (__unlikely(f->flags & (FNODE_WANTFREE | FNODE_WRITELOCK))) return &f->wait;

	if (__unlikely(!f->u.d.hash)) goto no_hash;
	hash_done:

	e = filename;
	h = 0;
	quickcasehash(e, *e, h);
	h &= f->u.d.hash_mask;
	XLIST_FOR_EACH(f2, &f->u.d.hash[h], FNODE, hash_entry) if (__likely(!__strcasexcmp(f2->name, filename, e))) {
		if (__unlikely(f2->flags & (FNODE_MOVE_DST | FNODE_MOVE_SRC | FNODE_WANTFREE | FNODE_BUSY))) {
			if (__unlikely(f2->flags & FNODE_MOVE_DST)) goto eexist;
			if (__unlikely(f2->flags & FNODE_MOVE_SRC)) goto q;
			return &f2->wait;
		}
		if (__likely(f2->flags & FNODE_NEGATIVE)) {
			if (__likely(!(f2->flags & (FNODE_FILE | FNODE_DIRECTORY | FNODE_WRITELOCK)))) {
				f2->size = 0;
				f2->disk_size = 0;
				f2->fs->fsops->init_fnode_quota(f2);
				DEL_FROM_LIST(&f2->dirent_entry);
				f->u.d.n_clean--;
				FNODE_OUT_OF_WANTFREE(f2);
				goto ok_to_create_acct;
			}
			if (__unlikely(!(f2->flags & FNODE_DIRTY)))
				KERNEL$SUICIDE("VFS$DIR_CREATE: %s: NEGATIVE NON-DIRTY FNODE", VFS$FNODE_DESCRIPTION(f2));
			if (__unlikely(f2->fs->flags & FS_MORE_SAME_NAMES)) goto q;
	/* !!! FIXME: if there are extended attributes on f2, goto q */
			if (__unlikely(open_flags & _O_MKDIR)) {
				if (__unlikely(open_flags & _O_MKDIR_QUOTA)) goto q;
				if (__likely((f2->flags & (FNODE_DIRECTORY | FNODE_OUT_OF_SYNC | FNODE_WRITELOCK | FNODE_QUOTA)) == FNODE_DIRECTORY)) {
					f2->flags &= ~FNODE_NEGATIVE;
					goto set_f2;
				}
			} else {
				if (__likely(!(f2->flags & (FNODE_DIRECTORY | FNODE_OUT_OF_SYNC | FNODE_WRITELOCK)))) {
					f2->flags &= ~FNODE_NEGATIVE;
					goto set_f2;
				}
			}
			q:
			WRITE_FNODE(f2);
			return &f2->wait;
		}
		if (__unlikely(f2->flags & (FNODE_FILE | FNODE_DIRECTORY))) {
			eexist:
			return __ERR_PTR(-EEXIST);
		}
		if (__unlikely(!(f2->flags & FNODE_INVALID)))
			KERNEL$SUICIDE("VFS$DIR_CREATE: %s: FNODE NOT INVALID: %d", VFS$FNODE_DESCRIPTION(f2), f2->flags);
		DEL_FROM_LIST(&f2->dirent_entry);
		f->u.d.n_clean--;
		FNODE_OUT_OF_WANTFREE(f2);
		goto ok_to_create_acct;
	}
	fs = f->fs;
	/* !!! FIXME: quota: when creating quota directory, set quota flag here & let account_new account blocks on the directory itself */
	if (__unlikely((wq = KERNEL$MAY_ALLOC(handle->name_addrspace, fs->fsops->sizeof_FNODE + SMALL_HASH_SIZE * sizeof(XLIST_HEAD))) != NULL)) goto oom_fno_limit;
	f2 = __slalloc(&fs->fnodes);
	if (__unlikely(f2 == NULL)) goto oom_fno;
#ifdef VFS_INO
	f2->ino = 0;
#endif
	if (__unlikely((f2->namelen = strlen(filename)) >= FNODE_INLINE_NAMELEN)) goto ext_name;
	ext_name_allocated:
	strcpy(f2->name, filename);
	__upcase(f2->name);
	RAISE_SPL(SPL_CACHE);
	KERNEL$CACHE_INSERT_VM_ENTITY(&f2->e, handle->name_addrspace, 0);
	LOWER_SPL(SPL_FS);
	f2->parent = f;
	f2->depth = f->depth + 1;

	f2->size = 0;
	f2->disk_size = 0;
	fs->fsops->init_fnode_quota(f2);
	ADD_TO_XLIST(&f->u.d.hash[h], &f2->hash_entry);
	ok_to_create_acct:
	account = f2->fs->fsops->account(f2, __unlikely(open_flags & _O_MKDIR) ? ACCOUNT_DIR : ACCOUNT_FILE, NULL);
	if (__unlikely(account != NULL)) {
		ADD_TO_LIST_END(&f->u.d.clean, &f2->dirent_entry);
		f->u.d.n_clean++;
		f2->flags = FNODE_INVALID;
		CHECK_DIR_LISTS_3(f);
		POSSIBLY_REHASH_DIRECTORY(f);
		return account;
	}

	ADD_TO_LIST_END(&f->u.d.clean, &f2->dirent_entry);
	f->u.d.n_clean++;

	if (!(open_flags & _O_MKDIR)) {
		f2->flags = FNODE_FILE | FNODE_UNCOMMITTED;
	} else {
		f2->flags = FNODE_DIRECTORY | FNODE_UNCOMMITTED;
		VFS_INIT_DIR(f2);
	}
	CHECK_DIR_LISTS_3(f);
	POSSIBLY_REHASH_DIRECTORY(f);

	set_f2:
	handle->fnode = f2;
	if (__likely(f2->flags & FNODE_FILE)) handle->op = &VFS_FILE_OPERATIONS;
	f2->ctime = f2->mtime = time(&f->mtime);
	f2->size = 0;
	VFS$SET_DIRTY(f2);
	VFS$SET_DIRTY(f);
	return NULL;

	ext_name:
	n = __slalloc(&fs->names);
	if (__likely(n != NULL)) {
		f2->name = n;
		goto ext_name_allocated;
	}
	__slow_slfree(f2);
	oom_fno:
	if (__likely(VFS$FREE_SOME_DATA(fs))) return (void *)2;
	return &fs->freemem;

	oom_fno_limit:
	return wq;

	no_hash:
	{
		XLIST_HEAD *new_hash;
		fs = f->fs;
		if (__likely((new_hash = __slalloc(&fs->small_hash)) != NULL)) {
			init_small_hash(new_hash);
			f->u.d.hash = new_hash;
			f->u.d.hash_mask = SMALL_HASH_MASK;
			goto hash_done;
		}
		goto oom_fno;
	}
}

static int IS_LIST_EMPTY_DIR(LIST_HEAD *list)
{
	FNODE *f2;
	LIST_FOR_EACH(f2, list, FNODE, dirent_entry) {
		if (__likely((f2->flags & (FNODE_NEGATIVE | FNODE_INVALID | FNODE_MOVE_SRC | FNODE_KILLED)) != 0)) continue;
		return 0;
	}
	return 1;
}

static void *DO_DELETE(FNODE *f, IORQ *rq, int leave, PROC *proc)
{
	PAGEINRQ *rddir;
	FS *fs;
	WQ *wq;
	if (__unlikely(!f->parent)) return __ERR_PTR(-EBUSY);
	if (__unlikely(f->fs->flags & FS_RO)) return __ERR_PTR(-EROFS);
	if (__unlikely(f->flags & (FNODE_WRITELOCK | FNODE_WANTFREE))) return &f->wait;
	if (__unlikely((wq = VFS$MAY_DIRTY(proc, f->fs)) != NULL)) return wq;
	if (__likely(f->flags & FNODE_FILE)) goto file;
	if (__unlikely(!IS_LIST_EMPTY_DIR(&f->u.d.clean)) || __unlikely(!IS_LIST_EMPTY_DIR(&f->u.d.dirty))) return __ERR_PTR(-ENOTEMPTY);
	if (f->flags & (FNODE_COMPLETE | FNODE_UNCOMMITTED | FNODE_KILLED)) {
		CHECK_DIR_LISTS(f);
		while (f->u.d.n_clean) {
			FNODE *f2 = LIST_STRUCT(f->u.d.clean.next, FNODE, dirent_entry);
			wq = VFS$FREE_FNODE(f2);
			if (__unlikely(wq != NULL)) return wq;
		}
		if (f->flags & FNODE_UNCOMMITTED && __unlikely(f->u.d.n_dirty != 0)) {
			/* this can happen only with FNODE_SRC_UNCOMMITTED or FNODE_KILLED fnodes */
			FNODE *f2;
			LIST_FOR_EACH(f2, &f->u.d.dirty, FNODE, dirent_entry) {
				WRITE_FNODE(f2);
			}
			f2 = LIST_STRUCT(f->u.d.dirty.prev, FNODE, dirent_entry);
			return &f2->wait;
		}
		if (__unlikely(VFS$IS_FNODE_LOCKED(f))) {
			return &f->wait;
		}
		time(&f->parent->mtime);
		VFS$SET_DIRTY(f->parent);
		if (f->flags & FNODE_UNCOMMITTED) {
			WQ *wq;
			f->fs->fsops->unaccount(f, ACCOUNT_DIR, NULL);
			if (!(f->parent->flags & (FNODE_UNCOMMITTED | FNODE_COMPLETE)) || __unlikely(leave))
				f->flags |= FNODE_DONT_ZAP;
			VFS$UNSET_DIRTY(f);
			if (__unlikely((wq = VFS$FREE_FNODE(f)) != NULL))
				KERNEL$SUICIDE("DO_DELETE: %s: VFS$FREE_FNODE FAILED 1, FLAGS %X, READERS %d, WQ %s", VFS$FNODE_DESCRIPTION(f), f->flags, f->readers,
#ifdef __WQ_DEBUG
				wq->name
#else
				"?"
#endif
				);
		} else {
			f->flags |= FNODE_NEGATIVE;
			VFS$SET_DIRTY(f);
			VFS$KILL_FNODE_HANDLES(f);
		}
		/* execute leave here ... but it's NULL */
		goto settime;
	}
	fs = f->fs;
	if (__unlikely(fs->flags & FS_MORE_SAME_NAMES)) {
		FNODE *f2;
		LIST_FOR_EACH(f2, &f->u.d.dirty, FNODE, dirent_entry) {
			if (f2->flags & (FNODE_NEGATIVE | FNODE_MOVE_SRC)) {
				WRITE_FNODE(f2);
				if (rq) {
					WQ_WAIT_F(&f2->wait, rq);
					rq = NULL;
				}
			}
		}
		if (__unlikely(!rq)) return (void *)3;
	}
	GET_PAGEINRQ(rddir, fs, proc, 0, 0, label, {
		return wq;
	});
	rddir->status = 0;
	rddir->caller = rq;
	rddir->fnode = f;
	rddir->wr = -1;
	f->readers++;
	fs->fsops->readdir(rddir, NULL);
	return (void *)3;

	file:
	if (__unlikely(VFS$IS_FNODE_LOCKED(f))) {
		return &f->wait;
	}
	if (__unlikely((wq = VFS_FREE_FILE_PAGES(f, 0)) != NULL)) {
		return wq;
	}

	if (f->flags & FNODE_UNCOMMITTED) {
		WQ *wq;
		f->fs->fsops->unaccount(f, ACCOUNT_FILE, NULL);
		if (!(f->parent->flags & (FNODE_UNCOMMITTED | FNODE_COMPLETE)) || __unlikely(leave))
			f->flags |= FNODE_DONT_ZAP;
		VFS$UNSET_DIRTY(f);
		if (__unlikely((wq = VFS$FREE_FNODE(f)) != NULL))
			KERNEL$SUICIDE("DO_DELETE: %s: VFS$FREE_FNODE FAILED 2, FLAGS %X, READERS %d, WQ %s", VFS$FNODE_DESCRIPTION(f), f->flags, f->readers,
#ifdef __WQ_DEBUG
			wq->name
#else
			"?"
#endif
			);
	} else {
		f->flags |= FNODE_NEGATIVE;
		VFS$SET_DIRTY(f);
		VFS$KILL_FNODE_HANDLES(f);
	}
	settime:
	time(&f->parent->mtime);
	VFS$SET_DIRTY(f->parent);
	return NULL;
}

static void *VFS_DIR_DELETE(HANDLE *h, IORQ *rq, int open_flags, HANDLE *hp)
{
	FNODE *f;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS))) KERNEL$SUICIDE("VFS$DIR_DELETE AT SPL %08X", KERNEL$SPL);
#endif
	if (__unlikely((open_flags & (_O_MKDIR | _O_DELETE_PROC)) != _O_MKDIR)) return __ERR_PTR(-EISDIR);
	if (__unlikely(!HAS_CAPABILITY(h, CAP_DELETE))) return __ERR_PTR(-EACCES);
	f = h->fnode;
	if (__unlikely(f == hp->fnode)) return __ERR_PTR(-EBUSY);
	return DO_DELETE(f, rq, 0, h->name_addrspace);
}

static void *VFS_FILE_DELETE(HANDLE *h, IORQ *rq, int open_flags, HANDLE *hp)
{
	FNODE *f;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS))) KERNEL$SUICIDE("VFS$FILE_DELETE AT SPL %08X", KERNEL$SPL);
#endif
	if (__unlikely((open_flags & (_O_MKDIR | _O_DELETE_PROC)))) return __ERR_PTR(-ENOTDIR);
	if (__unlikely(!HAS_CAPABILITY(h, CAP_DELETE))) return __ERR_PTR(-EACCES);
	f = h->fnode;
	if (__unlikely(f == hp->fnode)) return __ERR_PTR(-EBUSY);
	return DO_DELETE(f, rq, 0, h->name_addrspace);
}

void *FIX_DEPTH(FNODE *f, void *param)
{
	f->depth = f->parent->depth + 1;
	return NULL;
}

#ifdef VFS_INO
static void *CLEAR_INO(FNODE *f, void *param)
{
	f->ino = 0;
	return NULL;
}
#endif

static void *VFS_DIR_RENAME(HANDLE *handle, char *filename, HANDLE *fh, IORQ *rq)
{
	void *account;
	FNODE *f = handle->fnode;
	FS *fs;
	FNODE *f2, *f1;
	char *e, *n;
	int h, r;
	WQ *wq;
	int acct;
#if __DEBUG >= 1
	if (__unlikely(SPLX_BELOW(KERNEL$SPL, SPL_X(SPL_FS)))) KERNEL$SUICIDE("VFS$DIR_RENAME AT SPL %08X", KERNEL$SPL);
#endif
	if (__unlikely(filename[0] == '^')) return __ERR_PTR(-EINVAL);
	if (__unlikely(!filename[0])) return __ERR_PTR(-EINVAL);
	if (__unlikely(filename[0] == '.')) {
		if (__unlikely(filename[1] == '.') && __unlikely(!filename[2])) return __ERR_PTR(-EINVAL);
		if (__unlikely(!filename[1])) return __ERR_PTR(-EINVAL);
	}
	if (__unlikely(fh->op != &VFS_FILE_OPERATIONS) && __unlikely(fh->op != &VFS_DIR_OPERATIONS)) return __ERR_PTR(-EXDEV);
	if (__unlikely(f->depth == MAX_DEPTH - 1)) return __ERR_PTR(-ENAMETOOLONG);
	if (__unlikely(!HAS_CAPABILITY(handle, CAP_CREATE | CAP_LOOKUP)) || __unlikely(!HAS_CAPABILITY(fh, CAP_DELETE)) || __unlikely(SOME_HIGHER_CAPABILITY(handle, fh))) return __ERR_PTR(-EACCES);
	if (__unlikely(f->fs->flags & FS_RO)) return __ERR_PTR(-EROFS);

	r = f->fs->fsops->validate_filename(f->fs, filename);
	if (__unlikely(r)) return __ERR_PTR(r);

	f1 = fh->fnode;
	if (__unlikely(f1->fs != f->fs)) return __ERR_PTR(-EXDEV);
	/* !!! FIXME: quota: check that f1 and f have same quota */

	if (__unlikely((wq = VFS$MAY_DIRTY(handle->name_addrspace, f->fs)) != NULL)) return wq;
	if (__unlikely(f->flags & (FNODE_WANTFREE | FNODE_WRITELOCK))) return &f->wait;

#ifdef VFS_INO
	VFS$DO_FOR_SUBTREE(f1, NULL, DFS_PREORDER, CLEAR_INO);
#endif

	for (f2 = f; f2; f2 = f2->parent) if (__unlikely(f2 == f1)) return __ERR_PTR(-EINVAL);
	if (__unlikely(f1->flags & (FNODE_WANTFREE | FNODE_WRITELOCK))) return &f1->wait;

	if (__unlikely((f1->flags & (FNODE_DIRECTORY | FNODE_UNCOMMITTED)) == FNODE_DIRECTORY)) {
		a:
		CHECK_DIR_LISTS(f1);
		if (__unlikely(f1->u.d.n_dirty != 0)) {
			FNODE *nf = LIST_STRUCT(f1->u.d.dirty.next, FNODE, dirent_entry);
			if (__unlikely(!(wq = VFS$FREE_FNODE(nf)))) goto a;
			return wq;
		}
		if (__unlikely(f1->u.d.n_clean != 0)) {
			FNODE *nf = LIST_STRUCT(f1->u.d.clean.next, FNODE, dirent_entry);
			if (__likely(!(wq = VFS$FREE_FNODE(nf)))) goto a;
			return wq;
		}
	}
	if (__unlikely(VFS$KILL_SUBTREE_HANDLES_AND_PAGES(f1, rq) != NULL)) return (void *)3;

	if (__unlikely(!f->u.d.hash)) goto no_hash;
	hash_done:

	acct = __unlikely(f1->flags & FNODE_DIRECTORY) ? ACCOUNT_DIR : ACCOUNT_FILE;

	e = filename;
	h = 0;
	quickcasehash(e, *e, h);
	h &= f->u.d.hash_mask;
	XLIST_FOR_EACH(f2, &f->u.d.hash[h], FNODE, hash_entry) if (__likely(!__strcasexcmp(f2->name, filename, e))) {
		void *d;
		if (__unlikely(f2->flags & (FNODE_MOVE_SRC | FNODE_MOVE_DST | FNODE_WANTFREE | FNODE_WRITELOCK))) {
			if (f2->flags & (FNODE_MOVE_SRC | FNODE_MOVE_DST)) WRITE_FNODE(f2);
			return &f2->wait;
		}
		if (__unlikely(f2->flags & FNODE_INVALID)) {
			if (__likely(!(f->flags & (FNODE_COMPLETE | FNODE_UNCOMMITTED)))) {
				VFS_FREE_EMPTY_FNODE(f2);
				return (void *)1;
			}
			if (!(f1->flags & FNODE_UNCOMMITTED)) if (__unlikely((account = f->fs->fsops->account(f, acct, NULL)) != NULL)) return account;
			DEL_FROM_LIST(&f2->dirent_entry);
			f->u.d.n_clean--;
			FNODE_OUT_OF_WANTFREE(f2);
			goto inv;
		}
		if (!(f1->flags & FNODE_UNCOMMITTED)) if (__unlikely((account = f->fs->fsops->account(f, acct, NULL)) != NULL)) return account;
		if (__likely(f2->flags & FNODE_NEGATIVE)) {
			goto deleted_ok;
		}
		if (__unlikely(f2 == f1)) {
			if (!(f1->flags & FNODE_UNCOMMITTED)) f->fs->fsops->unaccount(f, acct, NULL);
			return NULL;
		}
		/* !!! FIXME: check f2 for delete ACL */
		d = DO_DELETE(f2, rq, 1, handle->name_addrspace);
		if (__unlikely(d != NULL)) {
			if (!(f1->flags & FNODE_UNCOMMITTED)) f->fs->fsops->unaccount(f, acct, NULL);
			return d;
		}
		goto deleted_ok;
	}
	if (__likely(!(f->flags & (FNODE_COMPLETE | FNODE_UNCOMMITTED)))) return (void *)1;
	fs = f->fs;
	if (__unlikely((wq = KERNEL$MAY_ALLOC(handle->name_addrspace, fs->fsops->sizeof_FNODE + SMALL_HASH_SIZE * sizeof(XLIST_HEAD))) != NULL)) goto oom_fno_limit;
	f2 = __slalloc(&fs->fnodes);
	if (__unlikely(f2 == NULL)) goto oom_fno;
#ifdef VFS_INO
	f2->ino = 0;
#endif
	if (__unlikely((f2->namelen = strlen(filename)) >= FNODE_INLINE_NAMELEN)) goto ext_name;
	ext_name_allocated:
	strcpy(f2->name, filename);
	__upcase(f2->name);
	RAISE_SPL(SPL_CACHE);
	KERNEL$CACHE_INSERT_VM_ENTITY(&f2->e, handle->name_addrspace, 0);
	LOWER_SPL(SPL_FS);
	f2->parent = f;
	f2->depth = f->depth + 1;
	if (!(f1->flags & FNODE_UNCOMMITTED)) if (__unlikely((account = f->fs->fsops->account(f, acct, NULL)) != NULL)) {
		ADD_TO_XLIST(&f->u.d.hash[h], &f2->hash_entry);
		ADD_TO_LIST_END(&f->u.d.clean, &f2->dirent_entry);
		f->u.d.n_clean++;
		f2->flags = FNODE_INVALID;
		CHECK_DIR_LISTS_3(f);
		POSSIBLY_REHASH_DIRECTORY(f);
		return account;
	}
	ADD_TO_XLIST(&f->u.d.hash[h], &f2->hash_entry);
	inv:
	f2->flags = FNODE_NEGATIVE;
	ADD_TO_LIST_END(&f->u.d.clean, &f2->dirent_entry);
	f->u.d.n_clean++;
	CHECK_DIR_LISTS_3(f);

	deleted_ok:
	if (f1->flags & FNODE_UNCOMMITTED) {
		FNODE *fp;
		if (__unlikely(f2->flags & FNODE_DIRTY)) {
			f1->flags = (f1->flags & ~FNODE_UNCOMMITTED) | FNODE_SRC_UNCOMMITTED;
			goto defer_move;
		}
		DEL_FROM_LIST(&f1->hash_entry);
		DEL_FROM_LIST(&f1->dirent_entry);
		f1->parent->u.d.n_dirty--;
		ADD_TO_XLIST(&f->u.d.hash[h], &f1->hash_entry);
		ADD_TO_LIST_END(&f->u.d.dirty, &f1->dirent_entry);
		f->u.d.n_dirty++;
		fp = f1->parent;
		f1->parent = f;
		f1->namelen = f2->namelen;
		if (__unlikely(f2->name != f2->inline_name)) {
			if (__likely(f1->name != f1->inline_name)) {
				__slow_slfree(f1->name);
				WQ_WAKE_ALL(&f1->fs->freemem);
			}
			f1->name = f2->name;
			(f2->name = f2->inline_name)[0] = 0;
			f2->namelen = 0;
		} else strcpy(f1->name , f2->name);
		VFS$DO_FOR_SUBTREE(f1, NULL, DFS_PREORDER, FIX_DEPTH);
		VFS_FREE_EMPTY_FNODE(f2);
		VFS$SET_DIRTYLIST(f1);
		VFS$UNSET_DIRTYLIST(fp);
		CHECK_DIR_LISTS_3(fp);
		CHECK_DIR_LISTS_3(f);
		POSSIBLY_REHASH_DIRECTORY(f);
		return NULL;
	}
	defer_move:
	f2->flags &= ~FNODE_NEGATIVE;
	f2->flags |= FNODE_MOVE_DST;
	f2->move_link = f1;
	f1->move_link = f2;
	f1->flags |= FNODE_MOVE_SRC;
	VFS$SET_DIRTY(f1);
	VFS$SET_DIRTY(f2);
	time(&f->mtime);
	VFS$SET_DIRTY(f);
	time(&f1->parent->mtime);
	VFS$SET_DIRTY(f1->parent);
	POSSIBLY_REHASH_DIRECTORY(f);
	return NULL;

	ext_name:
	n = __slalloc(&fs->names);
	if (__likely(n != NULL)) {
		f2->name = n;
		goto ext_name_allocated;
	}
	__slow_slfree(f2);
	oom_fno:
	if (__likely(VFS$FREE_SOME_DATA(fs))) return (void *)2;
	return &fs->freemem;

	oom_fno_limit:
	return wq;

	no_hash:
	{
		XLIST_HEAD *new_hash;
		fs = f->fs;
		if (__likely((new_hash = __slalloc(&fs->small_hash)) != NULL)) {
			init_small_hash(new_hash);
			f->u.d.hash = new_hash;
			f->u.d.hash_mask = SMALL_HASH_MASK;
			goto hash_done;
		}
		goto oom_fno;
	}
}


/* cookie is: hash bucket (unsigned) , order from the end of list (unsigned) , complete (unsigned) */
/* INTERNAL_READDIR_COOKIE_SIZE in VFS.H */

static int VFS_READDIR(FNODE *f, struct readdir_buffer *buf, void *end, IOCTLRQ *rq)
{
	int i;
	FNODE *f2;
	struct dirent *de;
	unsigned cookie_hash, cookie_order, cookie_complete;
	if (__likely(!buf->dirent_start)) {
		buf->dirent_start = DIRENT_START_FROM_COOKIE_SIZE(f->fs->readdir_cookie_size);
		buf->total_size = buf->dirent_start;
		buf->end = 0;
		de = DIR_START(buf);
	} else {
		if (__unlikely(buf->end)) return 0;
		de = DIR_END(buf);
	}
	if (__unlikely((char *)de > (char *)end)) return -EMSGSIZE;

	if (buf->cookie_size == 0) {
		if (__unlikely((char *)de + DIRENT_LENGTH(1) + DIRENT_LENGTH(2) > (char *)end)) return -EMSGSIZE;
		de->d_reclen = DIRENT_LENGTH(1);
		de->d_type = DT_DIR;
		de->d_name[0] = '.';
		de->d_name[1] = 0;
		de->d_namlen = 1;
		de = DIR_NEXT(de);
		de->d_reclen = DIRENT_LENGTH(2);
		de->d_type = DT_DIR;
		de->d_name[0] = '.';
		de->d_name[1] = '.';
		de->d_name[2] = 0;
		de->d_namlen = 2;
		de = DIR_NEXT(de);
		buf->total_size = (char *)de - (char *)buf;
		scan0:
		for (i = 0; i < 2; i++) LIST_FOR_EACH(f2, !i ? &f->u.d.clean : &f->u.d.dirty, FNODE, dirent_entry) {
			if (f->flags & (FNODE_COMPLETE | FNODE_UNCOMMITTED) ? !(f2->flags & (FNODE_BUSY | FNODE_NEGATIVE | FNODE_INVALID | FNODE_MOVE_SRC)) : f2->flags & (FNODE_UNCOMMITTED | FNODE_MOVE_DST)) {
				unsigned lll = DIRENT_LENGTH(f2->namelen);
				if (__unlikely((char *)de + lll > (char *)end)) {
					de = (struct dirent *)((char *)DIR_START(buf) + DIRENT_LENGTH(1) + DIRENT_LENGTH(2));
					cookie_hash = 0;
					cookie_order = MAXINT;
					cookie_complete = 1;
					goto full_scan;
				}
				de->d_reclen = lll;
				de->d_type = __likely(f2->flags & FNODE_FILE) ? DT_REG : __likely(f2->flags & FNODE_DIRECTORY) ? DT_DIR : DT_UNKNOWN;
				strcpy(de->d_name, f2->name);
				de->d_namlen = f2->namelen;
				de = DIR_NEXT(de);
				buf->total_size = (char *)de - (char *)buf;
			}
		}
		cookie_complete = !!(f->flags & (FNODE_COMPLETE | FNODE_UNCOMMITTED));
		goto dirscan_done;
	} else if (buf->cookie_size & COOKIE_SIZE_FLAG_INTERNAL) {
		if (__unlikely(buf->cookie_size != (COOKIE_SIZE_FLAG_INTERNAL | INTERNAL_READDIR_COOKIE_SIZE))) goto scan0;
		cookie_hash = ((int *)buf->cookie)[0];
		cookie_order = ((int *)buf->cookie)[1];
		cookie_complete = ((int *)buf->cookie)[2];
	} else {
		FS *fs;
		PAGEINRQ *rddir;
		WQ *wq;
		do_fs_readdir:
		if (__unlikely(f->flags & FNODE_UNCOMMITTED)) goto end;
		fs = f->fs;
		if (__unlikely(f->flags & (FNODE_WRITELOCK | FNODE_WANTFREE | FNODE_KILLED))) {
			if (de != DIR_START(buf)) return 0;
			if (__unlikely(f->flags & FNODE_KILLED)) return -EIO;
			WQ_WAIT_F(&f->wait, rq);
			return 1;
		}
		GET_PAGEINRQ(rddir, fs, rq->handle->name_addrspace, 0, 0, label, {
			if (de != DIR_START(buf)) {
				LOWER_SPL(SPL_FS);
				return 0;
			}
			WQ_WAIT_F(wq, rq);
			LOWER_SPL(SPL_FS);
			return 1;
		});
		rddir->status = 0;
		rddir->caller = (IORQ *)rq;
		rddir->fnode = f;
		rddir->wr = de != DIR_START(buf);
		f->readers++;
		fs->fsops->readdir(rddir, buf);
		return 1;
	}
	
	full_scan:
	if (!(f->flags & (FNODE_COMPLETE | FNODE_UNCOMMITTED))) cookie_complete = 0;

	if (f->u.d.hash) while (cookie_hash <= f->u.d.hash_mask) {
		LIST_ENTRY *xlist_start;
		LIST_ENTRY *xlist;
		unsigned i = 0;
		xlist_start = f->u.d.hash[cookie_hash].next;
		if (__likely(xlist_start == &KERNEL$LIST_END)) {
			cookie_hash++;
			cookie_order = MAXINT;
			continue;
		}
		for (xlist = xlist_start; xlist != &KERNEL$LIST_END; xlist = xlist->next) i++;
		if (cookie_order > i) cookie_order = i;
		i -= cookie_order;
		while (i--) xlist_start = xlist_start->next;
		while (xlist_start != &KERNEL$LIST_END) {
			f2 = LIST_STRUCT(xlist_start, FNODE, hash_entry);
			if (cookie_complete ? !(f2->flags & (FNODE_BUSY | FNODE_NEGATIVE | FNODE_INVALID | FNODE_MOVE_SRC)) : f2->flags & (FNODE_UNCOMMITTED | FNODE_MOVE_DST)) {
				unsigned lll = DIRENT_LENGTH(f2->namelen);
				if (__unlikely((char *)de + lll > (char *)end)) {
					((int *)buf->cookie)[0] = cookie_hash;
					((int *)buf->cookie)[1] = cookie_order;
					((int *)buf->cookie)[2] = cookie_complete;
					buf->cookie_size = INTERNAL_READDIR_COOKIE_SIZE | COOKIE_SIZE_FLAG_INTERNAL;
					if (__likely(de != DIR_START(buf))) return 0;
					return -EMSGSIZE;
				}
				de->d_reclen = lll;
				de->d_type = __likely(f2->flags & FNODE_FILE) ? DT_REG : __likely(f2->flags & FNODE_DIRECTORY) ? DT_DIR : DT_UNKNOWN;
				strcpy(de->d_name, f2->name);
				de->d_namlen = f2->namelen;
				de = DIR_NEXT(de);
				buf->total_size = (char *)de - (char *)buf;
			}
			xlist_start = xlist_start->next;
			cookie_order--;
		}
		cookie_hash++;
		cookie_order = MAXINT;
	}

	dirscan_done:
	if (cookie_complete) {
		end:
		buf->end = 1;
		return 0;
	}
	f->fs->fsops->init_readdir_cookie(buf, f);
	goto do_fs_readdir;
}

static __u64 DUMMY_PAGE[PG_SIZE / 8];

void *VFS$MAP_READDIR(IOCTLRQ *rq, int rmd, FS *fs, unsigned *len, vspace_unmap_t **unmap)
{
	void *ptr;
	if (__unlikely(rmd < 0)) goto dummy;
	if (__unlikely((*len = rq->v.len) > __PAGE_CLUSTER_SIZE)) return __ERR_PTR(-EINVAL);
	RAISE_SPL(SPL_VSPACE);
	ptr = rq->v.vspace->op->vspace_map(&rq->v, PF_RW, unmap);
	LOWER_SPL(SPL_FS);
	if (__unlikely(!ptr)) {
		if (rmd) return (void *)1;
		return NULL;
	}
	if (__unlikely(__IS_ERR(ptr))) return ptr;
	if (__unlikely((int)(unsigned long)ptr & (__MALLOC_ALIGN - 1))) {
		RAISE_SPL(SPL_VSPACE);
		(*unmap)(ptr);
		LOWER_SPL(SPL_FS);
		return __ERR_PTR(-EINVAL);
		/* do not crash on alignment error if the user passed unaligned pointer */
	} else if (__unlikely(rq->v.len < DIRENT_START_FROM_COOKIE_SIZE(fs->readdir_cookie_size))) {
		RAISE_SPL(SPL_VSPACE);
		(*unmap)(ptr);
		LOWER_SPL(SPL_FS);
		return __ERR_PTR(-EMSGSIZE);
	}
	return ptr;
	dummy:
	*unmap = KERNEL$NULL_VSPACE_UNMAP;
	*len = PG_SIZE;
	return DUMMY_PAGE;
}

void VFS$UNMAP_READDIR(void *ptr, vspace_unmap_t unmap)
{
	RAISE_SPL(SPL_VSPACE);
	unmap(ptr);
	LOWER_SPL(SPL_FS);
}

/*
VFS$DO_READDIR_NAME:
copied ok: return 0
out of space: return -EMSGSIZE
*/
int VFS$DO_READDIR_NAME(PAGEINRQ *rq, void *map, unsigned length, __const__ char *name, unsigned namelen, int dt_flags)
{
	unsigned i;
	unsigned len;
	struct dirent *de;
	FNODE *f = rq->fnode, *f2;
	if (__likely(f->u.d.hash != NULL)) {
		__const__ char *e;
		int hash;
		if (__likely(!f->u.d.n_dirty)) goto no_hash_lookup;
		e = name + namelen;
		hash = 0;
		quickcasehash(name, name != e, hash);
		name -= namelen;
		hash &= f->u.d.hash_mask;
		XLIST_FOR_EACH(f2, &f->u.d.hash[hash], FNODE, hash_entry) if (__likely(f2->namelen == namelen) && __likely(!_memcasecmp(f2->name, name, namelen))) {
			if (__unlikely((f2->flags & (FNODE_NEGATIVE | FNODE_MOVE_SRC)) != 0)) {
				if (__unlikely(rq->wr < 0) && __unlikely(f2->flags & FNODE_DIRTY) && __unlikely(f->fs->flags & FS_MORE_SAME_NAMES)) return -ENOTEMPTY;
				return 0;
			}
			break;
		}
		no_hash_lookup:;
	}

	if (__unlikely(rq->wr < 0)) return -ENOTEMPTY;

	for (i = 0; i < namelen; i++) {
		if (__unlikely(name[i] == '/')) return -EFSERROR;
		if (__unlikely(name[i] == ':') || (__unlikely(name[i] == '^') && !i)) return 0;
	}

#define buf	((struct readdir_buffer *)map)
#define end	((char *)map + length)
	de = DIR_END(buf);
	len = DIRENT_LENGTH(namelen);
	if (__unlikely((char *)de >= end) || __unlikely((char *)de + len > end)) return -EMSGSIZE;
	rq->wr = 1;
	de->d_reclen = len;
	de->d_type = dt_flags;
	memcpy(de->d_name, name, namelen);
	de->d_namlen = namelen;
	de->d_name[namelen] = 0;
	__upcase(de->d_name);
	de = DIR_NEXT(de);
	buf->total_size = (char *)de - (char *)buf;
	return 0;
#undef buf
#undef end
}

int VFS$DONE_READDIR(PAGEINRQ *rq)
{
	if (__likely(rq->wr >= 0)) return 0;
	rq->fnode->flags |= FNODE_COMPLETE;
	return 1;
}

#ifdef VFS_INO
static void VFS_FILLSTAT_INO(FNODE *f)
{
	char *c;
	ino_t ino = 0;
	FNODE *ff = f;
	do {
		c = ff->name;
		quickcasehash(c, *c, ino);
	} while ((ff = ff->parent));
	c = f->fs->filesystem_name;
	quickcasehash(c, *c, ino);
	if (__unlikely(ino <= 2)) ino += 3;
	f->ino = ino;
}
#endif

static __finline__ void VFS_FILLSTAT(struct stat *stat, void *fnode_)
{
	FNODE *f = fnode_;
#ifdef VFS_INO
	if (__unlikely(!f->ino)) {
		VFS_FILLSTAT_INO(f);
	}
	stat->st_ino = f->ino;
	stat->st_dev = f->fs->st_dev;
#endif
	stat->st_uid = 1;
	stat->st_gid = 1;
	stat->st_nlink = 1;
	stat->st_size = f->size;
	stat->st_blksize = OPTIMAL_IO_SIZE;
	if (f->flags & FNODE_FILE) {
		stat->st_mode = 0644 | S_IFREG;
		stat->st_blocks = (((f->size + f->fs->pageio_mask) & ~(off_t)f->fs->pageio_mask) + 511) >> 9;
	} else {
		stat->st_mode = 0755 | S_IFDIR;
		stat->st_blocks = 0;
	}
	time(&stat->st_atime);
	stat->st_mtime = f->mtime;
	stat->st_ctime = f->ctime;
	f->fs->fsops->stat(f, stat);
}

static void *SYNC_WRITE_FNODE(FNODE *f, void *unused)
{
	WRITE_FNODE(f);
	return NULL;
}

DECL_AST(FSYNC_DONE, SPL_FS, PAGEINRQ)
{
	FS *fs = RQ->fs;
	IORQ *rq;
	CANCEL_SYNC(fs, RQ->new_fnode);
	__slfree(RQ->new_fnode);
	WQ_WAKE_ALL(&fs->freemem);
	RQ->caller->status = RQ->status;
	rq = VFS$FREE_EMPTY_PAGEIN(RQ);
	RETURN_AST(rq);
}

static int FSYNC(FNODE *f, IOCTLRQ *rq)
{
	PAGEINRQ *pgin;
	FNODE *nf;
	WQ *wq;
	if (__unlikely(rq->param & PARAM_FSYNC_ASYNC)) {
		f->fs->need_sync |= FS_SYNC_IDLE;
		if (rq->param & PARAM_FSYNC_TREE) {
			VFS$DO_FOR_SUBTREE(f, NULL, DFS_PREORDER, SYNC_WRITE_FNODE);
		} else {
			WRITE_FNODE(f);
		}
		WQ_WAKE_ALL(&f->fs->syncer_wait);
		rq->status = 0;
		return 0;
	}
	GET_PAGEINRQ(pgin, f->fs, rq->handle->name_addrspace, 0, 0, label, {
		WQ_WAIT_F(wq, rq);
		LOWER_SPL(SPL_FS);
		return 1;
	});
	nf = __slalloc(&f->fs->fnodes);
	if (__unlikely(!nf)) {
		VFS$FREE_EMPTY_PAGEIN(pgin);
		WQ_WAIT_F(&f->fs->freemem, rq);
		VFS$FREE_SOME_DATA(f->fs);
		return 1;
	}
	pgin->new_fnode = nf;
	pgin->caller = (IORQ *)rq;
	pgin->status = RQS_PROCESSING;
	pgin->fn = FSYNC_DONE;
	if (__unlikely(rq->param & PARAM_FSYNC_TREE)) {
		VFS$DO_FOR_SUBTREE(f, NULL, DFS_PREORDER, SYNC_WRITE_FNODE);
	} else {
		WRITE_FNODE(f);
	}
	SYNC(f->fs, nf, pgin->tag.proc);
	WQ_WAIT(&nf->wait, pgin, KERNEL$SUCCESS);
	return 1;
}

static void DO_TRUNCATE(IOCTLRQ *RQ, FNODE *f, HANDLE *h);
static void DO_STATFS(IOCTLRQ *RQ, FS *fs);
static void DO_UTIMES(IOCTLRQ *RQ, FNODE *f, HANDLE *h);

#define VFS_FN_SPL      SPL_FS

#include <SPAD/VFS_FN.H>

DECL_IOCALL(VFS_FILE_IOCTL, SPL_FS, IOCTLRQ)
{
	HANDLE *h = RQ->handle;
	FNODE *f;
	if (__unlikely(h->op != &VFS_FILE_OPERATIONS)) goto x;
	RQ->tmp1 = (unsigned long)KERNEL$WAKE_IOCTL;
	TEST_LOCKUP_ENTRY(RQ, RETURN);
	SWITCH_PROC_ACCOUNT(h->name_addrspace, SPL_X(SPL_FS));
	f = h->fnode;
	CHECK_FNODE_HANDLE(f, h);
	switch (RQ->ioctl) {
		case IOCTL_READDIR: {
			RQ->status = -ENOTDIR;
			RETURN_AST(RQ);
		}

		case IOCTL_STAT: {
			int r;
			r = VFS_STAT(RQ, f);
			if (__likely(r <= 0)) {
				RQ->status = r;
				RETURN_AST(RQ);
			}
			DO_PAGEIN(RQ, &RQ->v, PF_WRITE);
		}

		case IOCTL_STATFS: {
			DO_STATFS(RQ, f->fs);
			RETURN;
		}

		case IOCTL_UTIMES: {
			DO_UTIMES(RQ, f, h);
			RETURN;
		}

		case IOCTL_LSEEK: {
			int r;
			r = VFS_LSEEK(RQ, ((FNODE *)RQ->handle->fnode)->size);
			if (__likely(r <= 0)) {
				RQ->status = r;
				RETURN_AST(RQ);
			}
			if (r == 2) RETURN;
			DO_PAGEIN(RQ, &RQ->v, PF_RW);
		}

		case IOCTL_TRUNCATE: {
			DO_TRUNCATE(RQ, f, h);
			RETURN;
		}

		case IOCTL_FCNTL_GETFL:
		case IOCTL_FCNTL_SETFL: {
			if (__likely(!VFS_GETSETFL(RQ))) RETURN_AST(RQ);
			RETURN;
		}

		case IOCTL_FSYNC: {
			if (__unlikely(!HAS_CAPABILITY(h, CAP_APPEND))) {
				RQ->status = -EACCES;
				RETURN_AST(RQ);
			}
			if (FSYNC(f, RQ)) RETURN;
			RETURN_AST(RQ);
		}

		case IOCTL_CAN_MMAP: {
			if (__likely(HAS_CAPABILITY(h, __likely(!RQ->param) ? CAP_READ : CAP_READ | CAP_WRITE))) RQ->status = 0;
			else RQ->status = -EACCES;
			RETURN_AST(RQ);
		}

		case IOCTL_BIO_GET_OPTIMAL_REQUEST_SIZE: {
			RQ->status = f->fs->bio_request_size & ~(long)(f->fs->block_mask);
			RETURN_AST(RQ);
		}

		case IOCTL_BIO_PHYSICAL_BLOCKSIZE: {
			RQ->status = f->fs->pageio_mask + 1;
			RETURN_AST(RQ);
		}

		case IOCTL_GET_CAPABILITIES: {
			RQ->status = h->flags2 & CAP_ALL;
			RETURN_AST(RQ);
		}

		default: {
			RQ->status = -ENOOP;
			RETURN_AST(RQ);
		}
	}
	x:
	RETURN_IORQ_LSTAT(RQ, KERNEL$WAKE_IOCTL);
}

DECL_IOCALL(VFS_DIR_IOCTL, SPL_FS, IOCTLRQ)
{
	HANDLE *h = RQ->handle;
	FNODE *f;
	if (__unlikely(h->op != &VFS_DIR_OPERATIONS)) goto x;
	RQ->tmp1 = (unsigned long)KERNEL$WAKE_IOCTL;
	TEST_LOCKUP_ENTRY(RQ, RETURN);
	SWITCH_PROC_ACCOUNT(h->name_addrspace, SPL_X(SPL_FS));
	f = h->fnode;
	CHECK_FNODE_HANDLE(f, h);
	switch (RQ->ioctl) {
		case IOCTL_READDIR: {
			int r;
			vspace_unmap_t *unmap;
			void *map;
			if (__unlikely(!HAS_CAPABILITY(h, CAP_SCANDIR))) {
				RQ->status = -EACCES;
				RETURN_AST(RQ);
			}
			map = VFS$MAP_READDIR(RQ, 0, f->fs, (void *)&KERNEL$LIST_END, &unmap);
			if (__unlikely(!map)) goto readdir_pf;
			if (__unlikely(__IS_ERR(map))) goto readdir_inval;
			r = VFS_READDIR(f, map, (char *)map + RQ->v.len, RQ);
			VFS$UNMAP_READDIR(map, unmap);
			if (__likely(r <= 0)) {
				RQ->status = r;
				RETURN_AST(RQ);
			}
			RETURN;

			readdir_pf:
			DO_PAGEIN(RQ, &RQ->v, PF_RW);

			readdir_inval:
			RQ->status = __PTR_ERR(map);
			RETURN_AST(RQ);
		}

		case IOCTL_STAT: {
			int r;
			r = VFS_STAT(RQ, f);
			if (__likely(r <= 0)) {
				RQ->status = r;
				RETURN_AST(RQ);
			}
			DO_PAGEIN(RQ, &RQ->v, PF_WRITE);
		}

		case IOCTL_STATFS: {
			DO_STATFS(RQ, f->fs);
			RETURN;
		}

		case IOCTL_UTIMES: {
			DO_UTIMES(RQ, f, h);
			RETURN;
		}

		case IOCTL_FCNTL_GETFL:
		case IOCTL_FCNTL_SETFL: {
			if (__likely(!VFS_GETSETFL(RQ))) RETURN_AST(RQ);
			RETURN;
		}

		case IOCTL_FSYNC: {
			if (!HAS_ANY_CAPABILITY(h, CAP_ALLWRITE | CAP_ACLGAIN)) {
				RQ->status = -EACCES;
				RETURN_AST(RQ);
			}
			if (FSYNC(f, RQ)) RETURN;
			RETURN_AST(RQ);
		}

		case IOCTL_TRUNCATE:
		case IOCTL_LSEEK:
		{
			RQ->status = -EISDIR;
			RETURN_AST(RQ);
		}

		case IOCTL_GET_CAPABILITIES: {
			RQ->status = h->flags2 & CAP_ALL;
			RETURN_AST(RQ);
		}

		default: {
			RQ->status = -ENOOP;
			RETURN_AST(RQ);
		}
	}
	x:
	RETURN_IORQ_LSTAT(RQ, KERNEL$WAKE_IOCTL);
}

static void DO_TRUNCATE(IOCTLRQ *RQ, FNODE *f, HANDLE *h)
{
	int r;
	WQ *wq;
	_u_off_t size;
	if (__unlikely(!HAS_CAPABILITY(h, CAP_WRITE))) {
		RQ->status = -EACCES;
		goto call_ast;
	}
	if (__unlikely(f->fs->flags & FS_RO)) {
		RQ->status = -EROFS;
		goto call_ast;
	}
	if (__unlikely((wq = VFS$MAY_DIRTY(h->name_addrspace, f->fs)) != NULL)) {
		WQ_WAIT_F(wq, RQ);
		return;
	}
	if (__unlikely(r = KERNEL$GET_IOCTL_STRUCT(RQ, &size, sizeof size))) {
		if (r == 1) {
			DO_PAGEIN_NORET(RQ, &RQ->v, PF_READ);
			return;
		}
		RQ->status = r;
		goto call_ast;
	}
	if (__likely(size < f->size)) wq = VFS_FREE_FILE_PAGES(f, size);
	else if (__unlikely(size > f->size)) wq = VFS_EXTEND_FILE(f, size, h->name_addrspace);
	else wq = NULL;
	/* SPL may be up here when returning wq */
	if (__likely(!wq) || __unlikely(__IS_ERR(wq))) {
		RQ->status = __PTR_ERR(wq);
		goto call_ast;
	}
	if (__likely(wq == (void *)2)) {
		CALL_IORQ_LSTAT(RQ, KERNEL$WAKE_IOCTL);
		return;
	}
	if (__unlikely(wq == (void *)3)) {
		VFS_DO_EXTEND_READ(f, RQ);
		return;
	}
	WQ_WAIT_F(wq, RQ);
	return;

	call_ast:
	CALL_AST(RQ);
	return;
}

static void DO_STATFS(IOCTLRQ *RQ, FS *fs)
{
	static struct statfs stat;
	int r;
	WQ *wq;
	memset(&stat, 0, sizeof stat);
	stat.f_fsid.val[0] = (unsigned long)fs;
	stat.f_fsid.val[1] = (unsigned long)fs;
	strlcpy(stat.f_fsmntonname, fs->filesystem_name, __MAX_STR_LEN);
	strlcat(stat.f_fsmntonname, ":/", __MAX_STR_LEN);
	strcpy(stat.f_fsmntfromname, fs->device_name);
	strlcpy(stat.f_fstypename, fs->fsops->name, sizeof(stat.f_fstypename));
	memcpy(&stat.f_type, stat.f_fstypename, sizeof stat.f_type);
	stat.f_bsize = fs->pageio_mask + 1;
	stat.f_iosize = __PAGE_CLUSTER_SIZE;
	stat.f_flags = MNT_NOATIME | MNT_ASYNC;
	if (__unlikely(fs->flags & FS_RO)) stat.f_flags |= MNT_RDONLY;
	wq = fs->fsops->statfs(fs, &stat);
	if (__unlikely(wq != NULL)) {
		WQ_WAIT_F(wq, RQ);
		return;
	}
	if (__unlikely((r = KERNEL$PUT_IOCTL_STRUCT(RQ, &stat, sizeof stat)) == 1)) {
		DO_PAGEIN_NORET(RQ, &RQ->v, PF_WRITE);
		return;
	}
	RQ->status = r;
	CALL_AST(RQ);
}

static void DO_UTIMES(IOCTLRQ *RQ, FNODE *f, HANDLE *h)
{
	int r;
	WQ *wq;
	struct timeval tv[2];
	if (__unlikely(!HAS_CAPABILITY(h, CAP_WRITE | CAP_UTIMES))) {
		RQ->status = -EACCES;
		goto call_ast;
	}
	if (__unlikely(f->fs->flags & FS_RO)) {
		RQ->status = -EROFS;
		goto call_ast;
	}
	if (__unlikely((wq = VFS$MAY_DIRTY(h->name_addrspace, f->fs)) != NULL)) {
		WQ_WAIT_F(wq, RQ);
		return;
	}
	if (__unlikely(r = KERNEL$GET_IOCTL_STRUCT(RQ, tv, sizeof tv))) {
		if (r == 1) {
			DO_PAGEIN_NORET(RQ, &RQ->v, PF_READ);
			return;
		}
		RQ->status = r;
		goto call_ast;
	}
	if (__unlikely(f->flags & FNODE_WANTFREE)) {
		WQ_WAIT_F(&f->wait, RQ);
		return;
	}
	f->mtime = tv[1].tv_sec;
	VFS$SET_DIRTY(f);
	RQ->status = 0;
	call_ast:
	CALL_AST(RQ);
	return;
}

int FNODE_INIT_GLOBAL(void)
{
	return KERNEL$CACHE_REGISTER_VM_TYPE(&fnode_vm_entity, &fnode_calls);
}

void FNODE_TERM_GLOBAL(void)
{
	KERNEL$CACHE_UNREGISTER_VM_TYPE(fnode_vm_entity);
}

