#ifndef __SPAD_VFS_H
#define __SPAD_VFS_H

#include <ARCH/SETUP.H>
#include <SYS/TYPES.H>
#include <SPAD/LIBC.H>
#include <SPAD/AC.H>
#include <SPAD/WQ.H>
#include <SPAD/LIST.H>
#include <SPAD/SLAB.H>
#include <SPAD/VM.H>
#include <SPAD/ALLOC.H>
#include <SPAD/DEV.H>
#include <SPAD/DEV_KRNL.H>
#include <SPAD/DIR_KRNL.H>
#include <SPAD/BIO.H>
#include <SPAD/SYNC.H>
#include <SPAD/THREAD.H>
#include <SPAD/TIMER.H>
#include <LIMITS.H>
#include <SYS/STATFS.H>
#include <SYS/STAT.H>

#include <SPAD/CAPA.H>
#include <SPAD/PAGEZONE.H>

__BEGIN_DECLS

typedef __u64 __d_off;			/* must be unsigned ! */
#define __d_off_format __64_format

#define __D_OFF_TOP_BIT	((__d_off)0x80 << ((sizeof(__d_off) - 1) * 8))

#define MAX_NAMELEN	256	/* this is buffer size, actual max namelen is 255, do not increase because namelen is unsigned char */
#define MAX_DEPTH	64

typedef struct __fnode FNODE;
typedef struct __fs FS;
typedef struct __vfsid VFSID;
typedef struct __fsops FSOPS;
typedef struct __pageinrq PAGEINRQ;

char *VFS$HIDE_NAME(char *name);
char *VFS$FNODE_DESCRIPTION(FNODE *f);

	/*************
	 * BUFFER IO *
	 *************/

typedef struct __buffer BUFFER;

typedef struct __brq BRQ;

struct __brq {
	BIORQ rrq;
	BIODESC rdesc;
	BUFFER *buf;
	BRQ *next;
	__d_off write_sec;
};

/* when adding flags, grep sources for ALL_BUF_FLAGS */
#define B_BUSY			0x0001
#define B_PARTIAL		0x0002
#define B_DIRTY			0x0004
#define B_DIRTY_DISCONTIG	0x0008	/* dirtmap is valid */
#define B_WRITING		0x0010
#define B_WRQ			0x0020	/* when busy or writing; indicates head buffer with request posted */
#define B_IOERROR		0x0040	/* there was an io error, read individual sectors */
#define B_WANTFREE		0x0080
#define B_FILEDATA_VALID	0x0100
#define B_FILEDATA_CREATED	0x0200
#define B_NEW			0x0400

struct __buffer {
	LIST_ENTRY hash;
	VMENTITY e;
	__d_off blk;
	__p_addr data;
	unsigned flags;

	unsigned short first_dirty;
	unsigned short after_last_dirty;

	LIST_ENTRY list;

	FS *fs;
	WQ wait;

	PAGEINRQ *pageinrq;
	BRQ brq;
	BUFFER *next;		/* next buffer in readahead/write chain */
	PROC *proc;

	unsigned time;

	unsigned number_of_ios;

	unsigned short read_from;
	unsigned short read_to;
};

#define BUFFER_BITMAP_SIZE		(((__SECTORS_PER_PAGE_CLUSTER - 1) | (sizeof(unsigned long) * 8 - 1)) + 1)

#define BUFFER_USERMAP_TEST(b, o)	__BT((BUFFER *)(b) + 1, (o))
#define BUFFER_USERMAP_SET(b, o)	__BS((BUFFER *)(b) + 1, (o))
#define BUFFER_USERMAP_RESET(b, o)	__BR((BUFFER *)(b) + 1, (o))
#define BUFFER_USERMAP_TEST_SET(b, o)	__BTS((BUFFER *)(b) + 1, (o))
#define BUFFER_USERMAP_TEST_RESET(b, o)	__BTR((BUFFER *)(b) + 1, (o))
#define BUFFER_USERMAP_CLEAR(b)		memset((BUFFER *)(b) + 1, 0, BUFFER_BITMAP_SIZE / 8)
#define BUFFER_DIRTMAP_TEST(b, o)	__BT((BUFFER *)(b) + 1, (o) + BUFFER_BITMAP_SIZE)
#define BUFFER_DIRTMAP_SET(b, o)	__BS((BUFFER *)(b) + 1, (o) + BUFFER_BITMAP_SIZE)
#define BUFFER_DIRTMAP_RESET(b, o)	__BR((BUFFER *)(b) + 1, (o) + BUFFER_BITMAP_SIZE)
#define BUFFER_DIRTMAP_TEST_SET(b, o)	__BTS((BUFFER *)(b) + 1, (o) + BUFFER_BITMAP_SIZE)
#define BUFFER_DIRTMAP_TEST_RESET(b, o)	__BTR((BUFFER *)(b) + 1, (o) + BUFFER_BITMAP_SIZE)
#define BUFFER_DIRTMAP_CLEAR(b)		memset((__u8 *)((BUFFER *)(b) + 1) + BUFFER_BITMAP_SIZE / 8, 0, BUFFER_BITMAP_SIZE / 8)
#define BUFFER_VALIDMAP_TEST(b, o)	__BT((BUFFER *)(b) + 1, (o) + (BUFFER_BITMAP_SIZE << 1))
#define BUFFER_VALIDMAP_SET(b, o)	__BS((BUFFER *)(b) + 1, (o) + (BUFFER_BITMAP_SIZE << 1))
#define BUFFER_VALIDMAP_RESET(b, o)	__BR((BUFFER *)(b) + 1, (o) + (BUFFER_BITMAP_SIZE << 1))
#define BUFFER_VALIDMAP_TEST_SET(b, o)	__BTS((BUFFER *)(b) + 1, (o) + (BUFFER_BITMAP_SIZE << 1))
#define BUFFER_VALIDMAP_TEST_RESET(b, o) __BTR((BUFFER *)(b) + 1, (o) + (BUFFER_BITMAP_SIZE << 1))
#define BUFFER_VALIDMAP_CLEAR(b)	memset((__u8 *)((BUFFER *)(b) + 1) + BUFFER_BITMAP_SIZE / 4, 0, BUFFER_BITMAP_SIZE / 8)

#define BUFFER_HASH_SIZE	__PAGE_CLUSTER_SIZE_DIV_SIZEOF_POINTER
#define BUFFER_HASH_MASK	__PAGE_CLUSTER_SIZE_DIV_SIZEOF_POINTER_MINUS_1

/* returns: NULL -- not found, != NULL -- found, data is mapped with KERNEL$MAP_PHYSICAL_BANK */
#define GET_BUFFER_FILEDATA	__INT_SGN_BIT
void *VFS$GET_BUFFER(FS *fs, __d_off off, unsigned nsec, BUFFER **ptr, void *proc);

/* addr is address from VFS$GET_BUFFER */
void VFS$PUT_BUFFER(void *addr);
void VFS$PUT_BUFFER_AND_WAIT(void *addr);

/* when done, posts the pageinrq AST
	pageinrq->status == 0 -- ok, use VFS$GET_BUFFER / VFS$PUT_BUFFER
	pageinrq->status < 0 -- error */
#define READ_BUFFER_FILEDATA	__INT_SGN_BIT
void VFS$READ_BUFFER(__d_off off, PAGEINRQ *pageinrq, unsigned reada);
void VFS$PREFETCH_BUFFER(FS *fs, __d_off off, unsigned reada, PROC *proc);

void *VFS$ALLOC_BUFFER_SYNC(FS *fs, __d_off off, unsigned nsec, BUFFER **ptr);

int VFS$SYNC_DISK(FS *fs);
void VFS$WRITE_BUFFER(BUFFER *b, PROC *proc);
void VFS$WRITE_BUFFERS(FS *fs, int syn);
void VFS$WAIT_FOR_BUFFERS(FS *fs);

void *VFS$READ_BUFFER_SYNC(FS *fs, __d_off off, unsigned nsec, unsigned reada, BUFFER **ptr);

/* buffer MUST be in hashtable (i.e. VFS$GET_BUFFER must have succeeded), nsectors must not cross page boundary */
void VFS$MARK_BUFFER_DIRTY(BUFFER *buf, unsigned boff, unsigned nsectors);
void VFS$CLEAN_BUFFERS_SYNC(FS *fs, __d_off sector, unsigned n_sectors);
void VFS$INVALIDATE_FILEBUFFERS(FS *fs, __d_off sector, unsigned n_sectors);
WQ *VFS$FREE_SOME_BUFFERS(FS *fs);

static __finline__ int BUFFER_VALID(BUFFER *b, unsigned blk)
{
#if __DEBUG >= 1
	if (__unlikely(blk >= __SECTORS_PER_PAGE_CLUSTER)) KERNEL$SUICIDE("BUFFER_VALID: SECTOR OUT OF PAGE: PAGE %016"__d_off_format"X, SECTOR %d", b->blk, blk);
#endif
	return __likely(!(b->flags & B_PARTIAL)) || __likely(BUFFER_VALIDMAP_TEST(b, blk));
}

static __finline__ int BUFFERS_VALID(BUFFER *b, unsigned blk, unsigned n)
{
	do {
		if (__unlikely(!BUFFER_VALID(b, blk))) return 0;
		blk++;
	} while (--n);
	return 1;
}

	/*********
	 * WPAGE *
	 *********/

typedef struct __wpage WPAGE;

struct __wpage {
	LIST_ENTRY list_entry;
	LIST_ENTRY ihash_entry;
	LIST_ENTRY ehash_entry;
	BIORQ rq;
	BIODESC desc;
	PAGE *p;
	WPAGE *next;
	WPAGE *last;
	FS *fs;
	__d_off sec_cpy;
	int nsec_cpy;
};

#define IHASH_SIZE	__PAGE_CLUSTER_SIZE_DIV_SIZEOF_POINTER
#define IHASH_MASK	__PAGE_CLUSTER_SIZE_DIV_SIZEOF_POINTER_MINUS_1

	/*****************
	 * ACCESS RIGHTS *
	 *****************/

#define HAS_CAPABILITY(h, cap)		(((h)->flags2 & (cap)) == (cap))
#define HAS_ANY_CAPABILITY(h, cap)	(((h)->flags2 & (cap)) != 0)
#define SOME_HIGHER_CAPABILITY(h1, h2)	((((h1)->flags2 & ~(h2)->flags2) & CAP_ALL) != 0)

	/*********
	 * FNODE *
	 *********/

#define SMALL_HASH_SIZE		32
#define SMALL_HASH_MASK		(SMALL_HASH_SIZE - 1)
#define SMALL_HASH_ITEMS	32
#define BIG_HASH_SIZE		__PAGE_CLUSTER_SIZE_DIV_SIZEOF_POINTER
#define BIG_HASH_MASK		__PAGE_CLUSTER_SIZE_DIV_SIZEOF_POINTER_MINUS_1
#define BIG_HASH_THRESHOLD	(SMALL_HASH_SIZE * 32768)

#define FNODE_INLINE_NAMELEN	24

#ifdef VFS_INO
#define __FNODE_INO ino_t ino;
#else
#define __FNODE_INO ino_t __pad;
#endif

#define FNODE_HEAD							\
	LIST_ENTRY hash_entry;	/* entry of FNODE->u.d.hash */		\
	LIST_ENTRY dirent_entry;/* entry of FNODE->u.d.clean/dirty */	\
	char *name;							\
	unsigned char namelen;						\
	unsigned short depth;						\
	XLIST_HEAD handles;	/* list to HANDLE->fnode_entry; */	\
	FNODE *parent;		/* NULL for root fnode */		\
	unsigned flags;							\
	int readers;		/* number of readers */			\
	int dio_writers;	/* number of direct io writers */	\
	WQ wait;							\
									\
	LIST_ENTRY synclist_entry;	/* valid if FNODE_SYNCLIST */	\
									\
	VMENTITY e;							\
									\
	PROC *syncproc; /* may be non-null when on synclist */		\
									\
	LIST_ENTRY free_entry;						\
									\
	union {								\
		struct {						\
			SPAGE s;					\
		} s;							\
		struct {						\
			XLIST_HEAD *hash;/* hash of pages */		\
			unsigned hash_mask;				\
			LIST_HEAD clean;				\
			LIST_HEAD dirty;				\
		} h;							\
		struct {						\
			XLIST_HEAD *hash;	 /* hash of FNODES */	\
					/* might be NULL */		\
	/* the hash may NEVER shrink !!!  (because of readdir) */	\
			unsigned hash_mask;				\
			LIST_HEAD clean;  /* list of fnodes */		\
			LIST_HEAD dirty;  /* list of fnodes */		\
			int n_clean;					\
			int n_dirty;					\
		} d;							\
	} u;								\
									\
	_u_off_t size;							\
	_u_off_t disk_size;						\
	FS *fs;								\
									\
	__d_off disk_blk;						\
	__d_off file_blk;						\
	__d_off run_length;						\
									\
	time_t mtime;							\
	time_t ctime;							\
									\
	int pending_writes;						\
	WQ pending_write_wait;	/* !!! FIXME: useless, fnode free path can be used instead */		\
									\
	FNODE *move_link;						\
									\
	__FNODE_INO							\
									\
	char inline_name[FNODE_INLINE_NAMELEN]				\

#define FNODE_BUSY		1	/* no access allowed, sleep on FNODE->wait */
#define FNODE_FILE		2	/* file, but don't know if spage or hash */
#define FNODE_DIRECTORY		4	/* `d' structure present */
#define FNODE_SPAGE		8	/* `s' structure present */
#define FNODE_HASH		16	/* `h' structure present */
#define FNODE_NEGATIVE		32
#define FNODE_COMPLETE		64	/* directory has all it's entries in cache */
#define FNODE_INVALID		128
#define FNODE_DIRTY		256	/* fnode is dirty */
#define FNODE_DIRTYLIST		512	/* fnode is on dirtylist (i.e. some of it's subfnodes are dirty) */
#define FNODE_WRITELOCK		1024	/* don't do physical read, wait on FNODE->wait */
#define FNODE_UNCOMMITTED	2048
#define FNODE_SYNCLIST		4096	/* fnode is on synclist */
#define FNODE_OUT_OF_SYNC	8192
#define FNODE_KILLED		16384
#define FNODE_SYNC_BARRIER	32768
#define FNODE_DONT_ZAP		65536	/* temporary */
#define FNODE_QUOTA		131072
#define FNODE_WANTFREE		262144
#define FNODE_MOVE_SRC		524288
#define FNODE_MOVE_DST		1048576
#define FNODE_WRITEPAGES	2097152
#define FNODE_SRC_UNCOMMITTED	4194304
#define FNODE_TIMED_SYNC	8388608
#define FNODE_UNCOMMITTED_PREALLOC_HINT	16777216

/* fnode might not be released because of
	readers != 0	(there are pagein requests on it)
	FNODE_WRITELOCK	(fnode is written by the syncer)
	FNODE_BUSY	(fnode is read and does not contain valid data)
	FNODE_DIRTY	(might be released, but dada will be lost)
*/

/* allowed fnode states are:
	FNODE_INVALID	-- fnode is invalid -- when found, free it and read it
			   again; fnode does not have any subfnodes.
	FNODE_BUSY	-- fnode is loading -- must not be freed, because IO
			   request points at it. when found, wait on
			   fnode->wait and you will be waked. fnode might have
			   subfnodes (in BUSY state too), but do not touch them.
	FNODE_NEGATIVE	-- there is no such dirent with that name -- when found,
			   report an "-ENOENT" error.
	FNODE_FILE	-- fnode is file fnode, but neither "s" nor "h" fields
			   are valid. fnode size is ok, but no data are loaded
			   in memory.
	FNODE_FILE | FNODE_SPAGE -- fnode is file and "s" field is valid.
	FNODE_FILE | FNODE_HASH -- fnode is file and "h" field is valid.
	FNODE_DIRECTORY	-- fnode is directory and "d" field is valid.
	FNODE_DIRECTORY | FNODE_COMPLETE -- fnode is directory and all its
			   dirents are in its hash -- there's no need to have
			   any NEGATIVE dirents. when filename is not found in
			   hash, do not do IO and report "-ENOENT".
	FNODE_FILE, FNODE_DIRECTORY | FNODE_DIRTY -- fnode needs to be written
			   to disk.
	FNODE_FILE, FNODE_DIRECTORY | FNODE_DIRTYLIST -- fnode is on dirtylist
			   -- i.e. it is dirty or it contains dirty subfnode.
	FNODE_FILE, FNODE_DIRECTORY | FNODE_UNCOMMITTED -- fnode is in memory
			   but not on disk and not on disk in parent directory.
	FNODE_FILE, FNODE_DIRECTORY | FNODE_NEGATIVE | FNODE_DIRTY -- fnode is
			   on disk and in parent directory on disk, but has been
			   deleted. Other fnode fields are valid but do not use
			   them. Use them only to delete the fnode. You may
			   "resurrect" fnode by clearing the FNODE_NEGATIVE
			   flag.
	FNODE_DIRECTORY | FNODE_NEGATIVE | FNODE_DIRTY | FNODE_OUT_OF_SYNC --
			   like previous, but the directory cannot be
			   resurrected.
	FNODE_FILE, FNODE_DIRECTORY | FNODE_KILLED -- fnode could not be written
			   to disk because of an error. Fnode behaves like
			   normal fnode, except that no fs operations are ever
			   called on it.
	FNODE_SYNC_BARRIER -- not real fnode; should never apperar in the tree.
			   Only wait, flags and synclist_entry are valid.

filesystem-specific part of fnode is valid if
	fnode is not UNCOMMITTED neither KILLED and one of FNODE_FILE or FNODE_DIRECTORY is set

*/

/* cookie is: hash bucket (unsigned) , order from the end of list (unsigned) , complete (unsigned) */
#define INTERNAL_READDIR_COOKIE_SIZE   (sizeof(unsigned) + sizeof(unsigned) + sizeof(unsigned))

struct __fnode {
	FNODE_HEAD;
};

	/******
	 * FS *
	 ******/

#define FS_RO			0x80000000
#define FS_COMMIT_FREES_DATA	0x40000000
#define FS_MORE_SAME_NAMES	0x20000000
#define FS_BACKGROUND_INIT	0x10000000
#define FS_DIRTY		0x08000000
#define FS_SHUTTING_DOWN	0x04000000
#define FS_SOMETHING_TO_COMMIT	0x02000000

#ifdef VFS_INO
#define __FS_DEV dev_t st_dev;
#else
#define __FS_DEV dev_t __pad;
#endif

#define FS_HEAD								\
	__const__ FSOPS *fsops;						\
	FNODE *root;							\
	unsigned flags;							\
									\
	int disk_handle_num;						\
									\
	unsigned char pageio_bits;					\
	unsigned short block_mask;					\
	unsigned pageio_mask;						\
	_u_off_t max_filesize;						\
	__d_off size;							\
	unsigned long total_dirty;					\
									\
	unsigned page_readahead;/* sectors read ahead for page cache */	\
									\
	PAGEZONE z;							\
	struct __slhead fnodes;	/* FNODE */				\
	struct __slhead names; /* char[MAX_NAMELEN] */			\
									\
	WQ freemem;   /* sleep if can't alloc from `z', `fnodes', `names', `spages' or `smallhash' */\
									\
	struct __slhead pageinrq;					\
	WQ pageinrq_wait;	/* sleep if can't alloc from `pageinrq' */\
									\
	struct __slhead spages;						\
	XLIST_HEAD *slist;	/* freelist in __spage */		\
				/* spages of size S are on slist[S-1] */\
									\
	struct __slhead small_hash;/*XLIST_HEAD[SMALL_HASH_SIZE],must have ctor\
		used for page/dentry hashes of small files/small direcroties.\
		Big hashes do not require KPAGEZONE, because if allocation of\
		big hash fails, small hash will be used */		\
	unsigned long n_bighash;					\
									\
	unsigned buffer_readahead;/* pages read ahead for buffer cache */\
	LIST_HEAD clean_buffers;					\
	LIST_HEAD dirty_buffers;					\
	XLIST_HEAD *bhash;	/* BUFFER->hash */			\
	  /* size of this field is one page, it has BUFFER_HASH_SIZE entries */\
	PAGEZONE bz;		/* pagezone for buffers */		\
	struct __slhead buffers;/* BUFFER */				\
	unsigned long n_buffers;					\
	WQ freebuffer;	/* sleep if can't alloc from `bz' or `buffers' */\
									\
	struct __slhead brq;						\
									\
	XLIST_HEAD *ihash;						\
	XLIST_HEAD *ehash;						\
	XLIST_HEAD wpages_prep;						\
	long n_wpages_prep;						\
	XLIST_HEAD wpages_io;						\
	struct __slhead wpages;						\
	WQ wpage_wait;							\
	unsigned wpage_seq;						\
									\
	int write_buf_ops;						\
	WQ sync_buf_done;						\
									\
	unsigned char need_sync;					\
									\
	unsigned readdir_cookie_size;					\
									\
	char *tmp_dirtmap;						\
									\
	char device_name[__MAX_STR_LEN];				\
	char filesystem_name[__MAX_STR_LEN];				\
	dev_t st_dev;							\
									\
	void *lnte;							\
	void *dlrq;							\
	char mounted;							\
									\
	long bio_request_size;						\
									\
	THREAD_RQ syncer;						\
	char terminate_syncer;						\
	unsigned buffer_flush_time;					\
	WQ syncer_wait;							\
	WQ sync_done_wait;						\
									\
	PROC *syncproc;							\
	LIST_HEAD synclist;						\
	FNODE *owait;							\
									\
	char wantfree_active;						\
	IORQ wantfree;							\
	XLIST_HEAD wantfree_pages;					\
	XLIST_HEAD wantfree_fnodes;					\
									\
	TIMER sync_timer;						\
	unsigned sync_time;						\
									\
	char ignore_write_errors;					\
	char write_error;						\
	__d_off write_error_sector;					\
									\
	void *dummy_pageinrq;						\
	void *dummy_brq;						\
	void *dummy_wpage

struct __fs {
	FS_HEAD;
};

#define FS_SYNC_NOW		1
#define FS_SYNC_SOMETIMES	2
#define FS_SYNC_IDLE		4

/* wr flag is PF_READ, PF_WRITE or PF_RW */
/* for lookup requests it is O_xxx open flags + 1 */
/* for readdir requests it is 0 (readdir) or 1 (rmdir) */

#define PAGEINRQ_HEAD			\
	IORQ_HEAD;			\
	IORQ *caller;			\
	PROC_TAG tag;			\
	FS *fs;				\
	FNODE *fnode;			\
	int wr;				\
	__d_off off;			\
	FNODE *new_fnode;		\
	PAGE *page;			\
	VDESC vdesc;			\
	PREFETCH_TAG prefetch_tag;	\
	BRQ brq				\

/* !!! WARNING: PAGEINRQ->fn must do IO_DISABLE_CHAIN_CANCEL(RQ->caller) at the beginning */

struct __pageinrq {
	PAGEINRQ_HEAD;
};

#define PAGEIN_IS_PREFETCH(page_in, callr)	((callr) == (IORQ *)&(page_in)->prefetch_tag)

#define WQ_WAIT_PAGEINRQ(wq, pageinrq)					\
do {									\
	(pageinrq)->status = RQS_PROCESSING;				\
	RAISE_SPL(SPL_TOP);						\
	if (__likely((pageinrq)->caller != NULL)) {			\
		if (__likely((pageinrq)->caller->status != RQS_WANTCANCEL)) {\
			(pageinrq)->caller->status = RQS_CHAINCANCELABLE;\
			(pageinrq)->caller->tmp2 = (unsigned long)pageinrq;\
		} else {						\
			(pageinrq)->status = RQS_WANTCANCEL;		\
		}							\
	}								\
	LOWER_SPL(SPL_FS);						\
	WQ_WAIT((wq), (pageinrq), KERNEL$SUCCESS);			\
} while (0)

void VFS$FNODE_OUT_OF_WANTFREE(FNODE *f);

static __finline__ void FNODE_OUT_OF_WANTFREE(FNODE *f)
{
	if (__unlikely(f->flags & FNODE_WANTFREE)) {
		VFS$FNODE_OUT_OF_WANTFREE(f);
	}
}

/* this will schedule writing of fnode, without waiting for completion */
/* !!! FIXME: set syncproc to responsible process */

static __finline__ void WRITE_FNODE(FNODE *f)
{
	FS *fs = f->fs;
	if (__likely((f->flags & (FNODE_SYNCLIST | FNODE_DIRTY)) == FNODE_DIRTY)) {
		f->flags |= FNODE_SYNCLIST;
		ADD_TO_LIST_END(&fs->synclist, &f->synclist_entry);
	}
	WQ_WAKE_ONE(&fs->syncer_wait);
}

static __finline__ void VFS_INIT_DIR(FNODE *f)
{
	f->u.d.hash = NULL;
	INIT_LIST(&f->u.d.clean);
	INIT_LIST(&f->u.d.dirty);
	f->u.d.n_clean = 0;
	f->u.d.n_dirty = 0;
}

static __finline__ void SYNC(FS *fs, FNODE *barrier, PROC *proc)
{
	KERNEL$ACQUIRE_WRITEBACK_TAG(proc);
	barrier->syncproc = proc;
	barrier->flags = FNODE_SYNC_BARRIER | FNODE_SYNCLIST;
	WQ_INIT(&barrier->wait, "VFS$BARRIER_WAIT");
	ADD_TO_LIST_END(&fs->synclist, &barrier->synclist_entry);
	WQ_WAKE_ONE(&fs->syncer_wait);
}

static __finline__ void CANCEL_SYNC(FS *fs, FNODE *barrier)
{
	if (__likely((barrier->flags & FNODE_SYNCLIST) != 0))
		DEL_FROM_LIST(&barrier->synclist_entry);
}

#define FS_SPARSE_FILES			0x01
#define FS_COMMIT_AFTER_OVERFLOW	0x02
#define FS_RO_ONLY			0x04

struct __vfsid {
	unsigned sizeof_FS;
	unsigned sizeof_FNODE;
	unsigned sizeof_PAGEINRQ;
	unsigned sizeof_FSOPS;
};

#define DEFAULT_VFSID	{ sizeof(FS), sizeof(FNODE), sizeof(PAGEINRQ), sizeof(FSOPS) }

struct __fsops {
	VFSID vfsid;
	char *name;
	unsigned flags;
	unsigned sizeof_FS;
	unsigned sizeof_FNODE;
	void (*ctor_FNODE)(void *fs, void *fnode);
	unsigned sizeof_PAGEINRQ;
	void (*ctor_PAGEINRQ)(void *fs, void *pageinrq);
	int (*process_option)(FS *fs, char *opt, char *optend, char *str);
		/* process mount options. returns:
			> 0	-- syntax error (unknown option etc.)
			== 0	-- ok, option processed
			< 0	-- return value is error code,
				    must fill also fs->error with error message,
				    fs->error points to a message buffer of
				    __MAX_STR_LEN bytes
		*/
	int (*mount)(FS *fs);
	void (*sync_background_init)(FS *fs);
	void (*umount)(FS *fs, int nosync);
	void (*set_dirty)(FS *fs, int dirty);
	int (*validate_filename)(FS *fs, __const__ char *filename);
	void (*lookup)(PAGEINRQ *pagein);   /* fill in pagein->fn, init pagein and post AST */
	int readdir_cookie_size;
	void (*init_readdir_cookie)(struct readdir_buffer *buf, FNODE *f);
	void (*readdir)(PAGEINRQ *readdir, struct readdir_buffer *buf);
	void (*writepages)(FS *fs, int syn);
	void (*commit)(FS *fs);
	void (*init_fnode_quota)(FNODE *f);
	int (*create_fnode)(FNODE *f);
	int (*write_fnode_data)(FNODE *f);
	int (*add_fnode_to_directory)(FNODE *f);
	int (*delete_fnode)(FNODE *f);
	int (*remove_fnode_from_directory)(FNODE *f);
	void (*free_fnode)(FNODE *f);

	void (*bmap)(PAGEINRQ *rq);
	int (*sync_bmap)(FNODE *f, __d_off off, int try);
	void (*init_last_page)(FNODE *f, char *ptr, unsigned len);

	void *(*account)(FNODE *f, int acct, PAGE *p);
	void (*unaccount)(FNODE *f, int acct, PAGE *p);

	void (*stat)(FNODE *f, struct stat *stat);
	WQ *(*statfs)(FS *fs, struct statfs *stat);
};

#define ACCOUNT_PAGE	0
#define ACCOUNT_FILE	1
#define ACCOUNT_DIR	2

#define DFS_PREORDER	0
#define DFS_POSTORDER	1
#define DFS_DIRTY	2
void VFS$NULL_FNODE_CALL(FNODE *f);
void *VFS$DO_FOR_SUBTREE(FNODE *ff, void *param, int flags, void *(*op)(FNODE *f, void *param));
int VFS$IS_FNODE_LOCKED(FNODE *f);
WQ *VFS$FREE_PAGE(PAGE *p);
void VFS$FREE_CLEAN_PAGES(FNODE *f);
void VFS$WRITE_AND_FREE_FNODE_PAGES(FNODE *f);
WQ *VFS$FREE_FNODE(FNODE *f);
void VFS$KILL_FNODE_HANDLES(FNODE *f);
void *VFS$KILL_SUBTREE_HANDLES_AND_PAGES(FNODE *f, IORQ *rq);
int VFS$FREE_SOME_DATA(FS *fs);

int VFS$MOUNT(int argc, char *argv[], __const__ FSOPS *f, char *driver_name);
void VFS$FNODE_CTOR(void *fs, void *fnode);
void VFS$PAGEINRQ_CTOR(void *fs, void *pageinrq);
FNODE *VFS$GET_ROOT_FNODE(FS *fs);
WQ *VFS$MAY_DIRTY(PROC *p, FS *fs);
void VFS$SET_DIRTY(FNODE *f);
void VFS$SET_DIRTYLIST(FNODE *f);
void VFS$UNSET_DIRTY(FNODE *f);
void VFS$UNSET_DIRTYLIST(FNODE *f);

void VFS$DEFAULT_WRITEPAGES(FS *fs, int syn);
void VFS$INIT_LAST_PAGE(FNODE *f, char *ptr, unsigned len);
void VFS$DEFAULT_COMMIT(FS *fs);
void *VFS$OVERACCOUNT(FNODE *root);

void VFS$PREPARE_CONTIG_ALLOC(FNODE *nf, int (*is_ok)(FNODE *));

IORQ *VFS$FREE_LOOKUP_PAGEIN(PAGEINRQ *rq, int negative);
IORQ *VFS$FREE_PAGEIN(PAGEINRQ *rq);
IORQ *VFS$FREE_EMPTY_PAGEIN(PAGEINRQ *rq);

void *VFS$MAP_READDIR(IOCTLRQ *rq, int rmd, FS *fs, unsigned *len, vspace_unmap_t **unmap);
void VFS$UNMAP_READDIR(void *ptr, void (*unmap)(void *ptr));
int VFS$DO_READDIR_NAME(PAGEINRQ *rq, void *map, unsigned len, __const__ char *name, unsigned namelen, int dt_flags);
int VFS$DONE_READDIR(PAGEINRQ *rq);

void VFS$PREFETCH_FILE(_u_off_t pos, FNODE *f, PROC *proc);
int VFS$BMAP_OK(FNODE *f, __d_off off);
void VFS$BMAP_DONE(PAGEINRQ *rq);

int VFS$WRITEPAGES(FNODE *f);

__END_DECLS

#endif
