#include <KERNEL/PROC.H>
#include <KERNEL/PROCARCH.H>
#include <SPAD/SLAB.H>
#include <SPAD/VM.H>
#include <KERNEL/ASM.H>
#include <SPAD/LIST.H>
#include <SPAD/QUOTA.H>
#include <SPAD/WQ.H>
#include <KERNEL/VMPROC.H>
#include <SPAD/TIMER.H>
#include <KERNEL/TIME.H>
#include <SPAD/SCHED.H>
#include <KERNEL/DEV.H>
#include <KERNEL/UDATA.H>
#include <KERNEL/UACCESS.H>
#include <KERNEL/VM_ARCH.H>
#include <KERNEL/SYSCALL.H>
#include <SPAD/LIBPROC.H>
#include <STDLIB.H>
#include <SIGNAL.H>
#include <ARCH/CPU.H>

#define MAX_SCHEDULE_TIME		(JIFFIES_PER_SECOND / 10)
#define RECLAIM_TIME			(JIFFIES_PER_SECOND * 5)
#define IOSCHED_FINE_TICKS_SHIFT	4

#define N_RESERVED_PROCS		4

PROC *PROC_CURRENT;
int PROC_CURRENT_LOCK;
PROC *PROC_FPU = NULL;
long KERNEL$FPU_ENABLED = 0;
PROC *PROC_RUN = NULL;

PROC *KERNEL$PROC_ACCOUNT /* = &KERNEL$PROC_KERNEL initialized in DEV_INIT */;

/* this one may be static, but compiler places it into different section then
   and it causes more cache misses */
sched_unsigned PROC_ACCOUNT_TICKS;
WQ_DECL(KERNEL$LOCKUP_EVENTS, "KERNEL$LOCKUP_EVENTS");
#define BORROW_PER_MINUTE	(60 * JIFFIES_PER_SECOND / (SCHEDULE_TIME))
static unsigned SCHEDULE_TIME;	/* when changing, need to recompute all process' borrowlatency */
static TIMER SCHEDULE_TIMER;
static AST SCHEDULE_AST;
static void SCHEDULE_TIMER_FN(TIMER *t);
extern AST_STUB SCHEDULE_AST_FN;

WQ_DECL(PROC_CURRENT_LOCK_WAIT, "KERNEL$PROC_CURRENT_LOCK_WAIT");

int PROC_INITIALIZED = 0;

/* !!! FIXME: check for infinite loop waiting for these (especially proc_slab) */
int proc_vm_entity;
static struct __slhead proc_slab;
int handle_vm_entity;
static struct __slhead handle_slab;
static struct __slhead uiorq_slab;

static DECL_XLIST(reclaim_list);

static int max_iosched = 0;
static unsigned long ioschedalloc[(MAX_IO_SCHED + sizeof(unsigned long) * 8 - 1) / (sizeof(unsigned long) * 8)] = { 0 };

static int PROC_CHECKMAP(VMENTITY *e);
static void PROC_WRITE(VMENTITY *e, PROC *p, int trashing);
static int PROC_SWAPOUT(VMENTITY *e);
static int HANDLE_CHECKMAP(VMENTITY *e);
static void HANDLE_WRITE(VMENTITY *e, PROC *p, int trashing);
static int HANDLE_SWAPOUT(VMENTITY *e);

static __const__ VMENTITY_T proc_calls = { PROC_CHECKMAP, PROC_WRITE, PROC_SWAPOUT, "PROC" };
static __const__ VMENTITY_T handle_calls = { HANDLE_CHECKMAP, HANDLE_WRITE, HANDLE_SWAPOUT, "HANDLE" };

static void PROC_UNBLOCK(PROC *p);
static void SHUTDOWN_PROCESS(PROC *p, WQ **wq);
static int SHUTDOWN_SOME_HANDLES(PROC *p, int for_myself);
static int SHUTDOWN_SOME_UIORQS(PROC *p, int for_myself);

static void INIT_RECLAIM_TIMER(void);
static void INIT_SYSCALLS(void);

extern IO_STUB FAULT_IORQ_SUCCESS;
extern AST_STUB SOFTFAULT_IORQ_FAILURE;

static void PROC_TIMER_FN(TIMER *t);
extern IO_STUB PROC_TIMER_IORQ;
extern AST_STUB PROC_TIMER_FAILED;

SCHED_GEN_UPDATE(BORROW_PER_MINUTE)
SCHED_D_GEN_UPDATE

static void HANDLE_CTOR(void *null, void *handle_)
{
	HANDLE *handle = handle_;
	CACHE_CONSTRUCT_VM_ENTITY(HANDLE_TO_VMENTITY(handle));
	HANDLE_TO_VMENTITY(handle)->type = handle_vm_entity;
}

static void PROC_CTOR(void *null, void *proc_)
{
	int i;
	PROC *proc = proc_;
	ARCH_PROC_CTOR(proc);
	proc->fork_child = NULL;
	proc->fork_parent = NULL;
	INIT_XLIST(&proc->children);

	proc->handles = proc->default_handles;
	for (i = 0; i < DEFAULT_HANDLE_HASH_SIZE; i++)
		INIT_XLIST(&proc->default_handles[i]);
	INIT_XLIST(&proc->reserved_handle_list);
	proc->handle_hash_mask = DEFAULT_HANDLE_HASH_SIZE - 1;
	proc->n_reserved_handles = RESERVED_HANDLES;
	for (i = RESERVED_HANDLES - 1; i >= 0; i--) {
		ADD_TO_XLIST(&proc->reserved_handle_list, &proc->reserved_handles[i].proc_hash);
		proc->reserved_handles[i].name_addrspace = proc;
	}

	INIT_XLIST(&proc->uiorqs);
	proc->n_reserved_uiorqs = RESERVED_UIORQS;
	INIT_XLIST(&proc->reserved_uiorq_list);
	for (i = RESERVED_UIORQS - 1; i >= 0; i--) {
		ADD_TO_XLIST(&proc->reserved_uiorq_list, &proc->reserved_uiorqs[i].list);
		proc->reserved_uiorqs[i].proc = proc;
	}
	
	INIT_XLIST(&proc->prefetch);
	proc->writeback = 0;
	for (i = 0; i < MAX_IO_SCHED; i++) proc->ios[i].p = proc;

	WQ_INIT(&proc->shutdown, "KERNEL$PROC_SHUTDOWN");
	WQ_INIT(&proc->ioq_wait, "KERNEL$IOQ_WAIT");
	WQ_INIT(&proc->free_resources, "KERNEL$PROC_FREE_RESOURCES");
	proc->on_reclaim_list = 0;
	proc->vspace.op = &PROC_VSPACE_OPERATIONS;
	proc->timer.fn = PROC_TIMER_FN;
	VOID_LIST_ENTRY(&proc->timer.list);
	proc->timer_iorq_posted = 0;
	CACHE_CONSTRUCT_VM_ENTITY(&proc->vme);
	proc->vme.type = proc_vm_entity;
	CACHE_PROC_CTOR(proc);
}

static __finline__ void PROC_INIT_COMMON(PROC *p)
{
	int i;
	QINIT(&p->procq);
	QRINIT(&p->hq);
	QRINIT(&p->uq);
	QRINIT(&p->ioq);
	QINIT(&p->pgtblq);
	QINIT(&p->vmq);
	QINIT(&p->wireq);
	SINIT(&p->sch);
	p->iosch_mininvpri = DEFAULT_MININVPRI;
	p->iosch_maxinvpri = DEFAULT_MAXINVPRI;
	for (i = 0; i < LN_CACHE_SIZE; i++) {
		p->ln_cache[i].name[0] = 0;
		p->ln_cache[i].touch = 0xff;
	}
}

void KERNEL_PROC_INIT(void)
{
	int r, i;
	if (sizeof(VMMAP) & (sizeof(VMMAP) - 1)) {
		KERNEL$SUICIDE("MISCOMPILED KERNEL, SIZEOF(VMMAP) == %d IS NOT POWER OF 2. ADJUST VMMAP_PAD", (int)sizeof(VMMAP));
	}

	VM_ARCH_INIT_PAGETABLES();

	SCHEDULE_TIME = MAX_SCHEDULE_TIME;

	if (__unlikely(r = KERNEL$CACHE_REGISTER_VM_TYPE(&proc_vm_entity, &proc_calls))) {
		__critical_printf("ERROR REGISTERING PROC VM ENTITY: %s\n", strerror(-r));
		HALT_KERNEL();
	}
	/*{
	static unsigned long zero = 0;
	KERNEL$SLAB_INIT(&proc_slab, sizeof(PROC), PROC_ALIGN, VM_TYPE_CACHED_MAPPED, PROC_CTOR, NULL, &zero, "KERNEL$PROC");
	}*/
	KERNEL$SLAB_INIT(&proc_slab, sizeof(PROC), PROC_ALIGN, VM_TYPE_CACHED_MAPPED, PROC_CTOR, NULL, NULL, "KERNEL$PROC");
	if (__unlikely(r = KERNEL$SLAB_RESERVE(&proc_slab, N_RESERVED_PROCS))) {
		__critical_printf("ERROR ALLOCATING PROCS: %s\n", strerror(-r));
		HALT_KERNEL();
	}
	if (__unlikely(r = KERNEL$CACHE_REGISTER_VM_TYPE(&handle_vm_entity, &handle_calls))) {
		__critical_printf("ERROR REGISTERING HANDLE VM ENTITY: %s\n", strerror(-r));
		HALT_KERNEL();
	}
	KERNEL$SLAB_INIT(&handle_slab, sizeof(HANDLE) + sizeof(VMENTITY), 0, VM_TYPE_CACHED_MAPPED, HANDLE_CTOR, NULL, NULL, "KERNEL$HANDLE");
	KERNEL$SLAB_INIT(&uiorq_slab, sizeof(UIORQ), __CPU_CACHELINE_ALIGN, VM_TYPE_CACHED_MAPPED, NULL, NULL, NULL, "KERNEL$UIORQ");
	for (i = 0; i < MAX_IO_SCHED; i++) KERNEL$PROC_KERNEL.ios[i].p = &KERNEL$PROC_KERNEL;
	PROC_INIT_COMMON(&KERNEL$PROC_KERNEL);
	KERNEL$PROC_KERNEL.parent = NULL;
	INIT_XLIST(&KERNEL$PROC_KERNEL.children);
	KERNEL$PROC_KERNEL.handles = KERNEL$PROC_KERNEL.default_handles;
	KERNEL$PROC_KERNEL.handle_hash_mask = 0;
	INIT_XLIST(KERNEL$PROC_KERNEL.handles);

	INIT_XLIST(&KERNEL$PROC_KERNEL.prefetch);
	KERNEL$PROC_KERNEL.writeback = 0;

	KERNEL$PROC_KERNEL.parent_lnt = NULL;
	KERNEL$PROC_KERNEL.ln_mode = LN_ALL;
	KERNEL$PROC_KERNEL.parent_forktable = NULL;
	KERNEL$PROC_KERNEL.parent_forktable_n = 0;
	KERNEL$PROC_KERNEL.parent_rq = NULL;
	KERNEL$PROC_KERNEL.flags = 0;

	KERNEL$PROC_KERNEL.fork_child = NULL;
	KERNEL$PROC_KERNEL.fork_parent = NULL;
	KERNEL$PROC_KERNEL.depth = 0;
	memset(&KERNEL$PROC_KERNEL.backptr, 0, sizeof KERNEL$PROC_KERNEL.backptr);
	KERNEL$PROC_KERNEL.backptr[0] = &KERNEL$PROC_KERNEL;

	QINIT2(&KERNEL$PROC_KERNEL.procq);
	QRINIT2(&KERNEL$PROC_KERNEL.hq);
	QRINIT2(&KERNEL$PROC_KERNEL.uq);
	QRINIT2(&KERNEL$PROC_KERNEL.ioq);
	QINIT2(&KERNEL$PROC_KERNEL.pgtblq);
	QINIT2(&KERNEL$PROC_KERNEL.vmq);
	QINIT2(&KERNEL$PROC_KERNEL.wireq);
	PROC_ACCOUNT_TICKS = GET_FINE_TICKS();
	SINIT2(&KERNEL$PROC_KERNEL.sch, PROC_ACCOUNT_TICKS, FINE_TICKS_PER_MINUTE, BORROW_PER_MINUTE);
	KERNEL$PROC_KERNEL.sch.flags |= SCHED_ONLIST;
	KERNEL$PROC_KERNEL.n_reserved_handles = 0;
	KERNEL$PROC_KERNEL.reserved_handle_list.next = NULL;
	KERNEL$PROC_KERNEL.n_reserved_uiorqs = 0;
	KERNEL$PROC_KERNEL.reserved_uiorq_list.next = NULL;
	KERNEL$PROC_KERNEL.uiorqs.next = NULL;
	WQ_INIT(&KERNEL$PROC_KERNEL.shutdown, "KERNEL$PROC_KERNEL_SHUTDOWN");
	WQ_INIT(&KERNEL$PROC_KERNEL.ioq_wait, "KERNEL$PROC_KERNEL_IOQ_WAIT");
	WQ_INIT(&KERNEL$PROC_KERNEL.free_resources, "KERNEL$PROC_KERNEL_FREE_RESOURCES");
	memcpy(&KERNEL$PROC_KERNEL.vspace, &KERNEL$VIRTUAL, sizeof KERNEL$PROC_KERNEL.vspace);
	KERNEL$PROC_KERNEL.alloc_rate = KERNEL$PROC_KERNEL.created_alloc_rate = 0;
	KERNEL$PROC_KERNEL.alloc_rate_time = KERNEL$PROC_KERNEL.created_alloc_rate_time = KERNEL$GET_JIFFIES_LO();
	CACHE_PROC_CTOR(&KERNEL$PROC_KERNEL);
	strncpy(KERNEL$PROC_KERNEL.jobname, "KERNEL", 9);
	INIT_RECLAIM_TIMER();
	INIT_SYSCALLS();
	SCHEDULE_TIMER.fn = SCHEDULE_TIMER_FN;
	SCHEDULE_AST.fn = SCHEDULE_AST_FN;
	KERNEL$SET_TIMER(SCHEDULE_TIME, &SCHEDULE_TIMER);
	__barrier();
	PROC_INITIALIZED = 1;
}

int FOR_ALL_PROCS(PROC *from, int (*fn)(PROC *p))
{
	PROC *p = from;
	int r = 0;
#if __DEBUG >= 1
	if (__unlikely(SPLX_BELOW(KERNEL$SPL, SPL_X(SPL_DEV))))
		KERNEL$SUICIDE("FOR_ALL_PROCS AT SPL %08X", KERNEL$SPL);
#endif
	a1:
	if (__likely(!XLIST_EMPTY(&p->children))) {
		p = LIST_STRUCT(p->children.next, PROC, child_entry);
		goto a1;
	}
	a2:
	r |= fn(p);
	if (__unlikely(r < 0)) return r;	/* the process (and possibly its parents) was freed */
	if (__unlikely(p == from)) return r;
	if (p->child_entry.next != &KERNEL$LIST_END) {
		p = LIST_STRUCT(p->child_entry.next, PROC, child_entry);
		goto a1;
	}
	p = p->parent;
	goto a2;
}

static int CLEAR_LN_CACHE(PROC *p)
{
	int i;
	for (i = 0; i < LN_CACHE_SIZE; i++) {
		p->ln_cache[i].name[0] = 0;
		p->ln_cache[i].touch = 0xff;
	}
	return 0;
}

void CLEAR_SUBPROC_LN_CACHE(void)
{
	FOR_ALL_PROCS(&KERNEL$PROC_KERNEL, CLEAR_LN_CACHE);
}

static int TEST_DELAYED_SHUTDOWN(PROC *proc);
extern AST_STUB DELAYED_SHUTDOWN_FN;

static int DELAYED_SHUTDOWN_FLAG = 0;
static AST DELAYED_SHUTDOWN_AST = { DELAYED_SHUTDOWN_FN, NULL };

void DELAYED_SHUTDOWN(void)
{
	if (__likely(!__CMPXCHGI(&DELAYED_SHUTDOWN_FLAG, 0, 1))) CALL_AST(&DELAYED_SHUTDOWN_AST);
}

DECL_AST(DELAYED_SHUTDOWN_FN, SPL_ZERO, AST)
{
	DELAYED_SHUTDOWN_FLAG = 0;
	RAISE_SPL(SPL_DEV);
	while (FOR_ALL_PROCS(&KERNEL$PROC_KERNEL, TEST_DELAYED_SHUTDOWN) < 0);
	RETURN;
}

static int TEST_DELAYED_SHUTDOWN(PROC *proc)
{
	if (__unlikely(proc->flags & PR_RUNDOWN)) {
		WQ *wq;
		SHUTDOWN_PROCESS(proc, &wq);
		if (__likely(!wq)) return -1;
	}
	return 0;
}

static WQ *ACQUIRE_FAILED(QUOTA_R *pzap, PROC *proc)
{
	QUOTA_R *zapq;
	PROC *pz, *diff;
	QRZAP(pzap, for_all_ioq_subnodes, for_all_ioq_subnodes_tail, zapq, 1);
	pz = LIST_STRUCT(zapq, PROC, ioq);
	if (__unlikely(!pz->ioq.q_node_usage))
		KERNEL$SUICIDE("ACQUIRE_FAILED: NO IO IN PROGRESS (PROC %p, NODE %lu, SUBTREE %lu; PZAP %p, NODE %lu, SUBTREE %lu; PZ %p, NODE %lu, SUBTREE %lu)", proc, proc->ioq.q_node_usage, proc->ioq.q_subtree_usage, LIST_STRUCT(pzap, PROC, ioq), pzap->q_node_usage, pzap->q_subtree_usage, pz, pz->ioq.q_node_usage, pz->ioq.q_subtree_usage);
	FIND_DIFFERING_PROC(proc, pz, diff, {
		QRRESTRICT(&diff->ioq);
		goto done;
	});
	done:
	return &pz->ioq_wait;
}

#define IO_TAG_SCHED_ACCT(io_q)					\
do {								\
	LIST_STRUCT(io_q, PROC, ioq)->ios_unblock_time = t;	\
} while (0)

#define IO_TAG_SCHED_UNACCT(io_q)				\
do {								\
	int j;							\
	sched_unsigned add = t - LIST_STRUCT(io_q, PROC, ioq)->ios_unblock_time;								\
	if (__unlikely((LIST_STRUCT(io_q, PROC, ioq)->ios_accumulated_time += add) >= FINE_TICKS_PER_MINUTE >> IOSCHED_FINE_TICKS_SHIFT)) {	\
		for (j = 0; j < max_iosched; j++)		\
			LIST_STRUCT(io_q, PROC, ioq)->ios[j].sch.accumulated_time += LIST_STRUCT(io_q, PROC, ioq)->ios_accumulated_time;	\
		LIST_STRUCT(io_q, PROC, ioq)->ios_accumulated_time = 0;\
	}							\
} while (0)

WQ *KERNEL$ACQUIRE_IO_TAG(PROC_TAG *tag, PROC *proc)
{
	sched_unsigned t;
	QUOTA_R *pzap;
#if __DEBUG >= 1
	if (KERNEL$SPL != SPL_X(SPL_VSPACE))
		KERNEL$SUICIDE("KERNEL$ACQUIRE_IO_TAG AT SPL %08X", KERNEL$SPL);
#endif
	tag->proc = proc;
	if (__unlikely(proc == &KERNEL$PROC_KERNEL)) goto ok;
	t = GET_FINE_TICKS();
	QRALLOC(&proc->ioq, 1, proc_ioq_isroot, proc_ioq_parent, IO_TAG_SCHED_ACCT, pzap, {
		return ACQUIRE_FAILED(pzap, proc);
	});
	ok:
	return NULL;
}

void KERNEL$ACQUIRE_PREFETCH_TAG(PREFETCH_TAG *tag, PROC *proc)
{
#if __DEBUG >= 1
	if (KERNEL$SPL != SPL_X(SPL_VSPACE))
		KERNEL$SUICIDE("KERNEL$ACQUIRE_PREFETCH_TAG AT SPL %08X", KERNEL$SPL);
#endif
	ADD_TO_XLIST(&proc->prefetch, &tag->list);
}

sched_unsigned KERNEL$GET_SCHED_TICKS(void)
{
	return GET_FINE_TICKS();
}

void KERNEL$ACCOUNT_IOSCHED(PROC *p, int sched, sched_unsigned start, sched_unsigned now)
{
	sched_unsigned t;
#if __DEBUG >= 1
	if (KERNEL$SPL != SPL_X(SPL_VSPACE))
		KERNEL$SUICIDE("KERNEL$ACCOUNT_IOSCHED AT SPL %08X", KERNEL$SPL);
#endif
	t = now - start;
	SDACCOUNT(&p->ios[sched].sch, t, now, FINE_TICKS_PER_MINUTE >> IOSCHED_FINE_TICKS_SHIFT, proc_iosch_isroot, proc_iosch_parent);
}

void KERNEL$RELEASE_IO_TAG(PROC_TAG *tag)
{
	sched_unsigned t;
	PROC *proc;
#if __DEBUG >= 1
	if (KERNEL$SPL != SPL_X(SPL_VSPACE))
		KERNEL$SUICIDE("KERNEL$RELEASE_IO_TAG AT SPL %08X", KERNEL$SPL);
#endif
	proc = tag->proc;
#if __DEBUG >= 1
	tag->proc = NULL;
#endif
	if (__unlikely(proc == &KERNEL$PROC_KERNEL)) return;
	t = GET_FINE_TICKS();
	QRFREE(&proc->ioq, 1, proc_ioq_isroot, proc_ioq_parent, IO_TAG_SCHED_UNACCT);
	WQ_WAKE_ALL(&proc->ioq_wait);
	if (__unlikely(proc->flags & PR_RUNDOWN) && !proc->ioq.q_node_usage) {
		DELAYED_SHUTDOWN();
	}
}

int KERNEL$COMPARE_IOSCHED(PROC *p1, int s1, PROC *p2, int s2)
{
	unsigned depth;
	if (__unlikely(p1 == p2)) return 0;
	s1 = s1 * sizeof(IO_SCHED) + __offsetof(PROC, ios);
	s2 = s2 * sizeof(IO_SCHED) + __offsetof(PROC, ios);
#define sched1(p)		(&((IO_SCHED *)((char *)(p) + s1))->sch)
#define sched2(p)		(&((IO_SCHED *)((char *)(p) + s2))->sch)
	depth = 1;
	while (1) {
		PROC *q1 = p1->backptr[depth];
		PROC *q2 = p2->backptr[depth];
		if (__unlikely(q1 != q2)) {
			if (__unlikely(!q1)) return sched1(p1)->ccount - sched2(q2)->count;
			if (__unlikely(!q2)) return sched1(q1)->count - sched2(p2)->ccount;
			return sched1(q1)->count - sched2(q2)->count;
		}
		depth++;
	}
}

static int new_iosched;
static int init_iosched(PROC *p)
{
	int i = new_iosched;
	SDINIT(&p->ios[i].sch);
	p->ios[i].sch.mininvpri = p->iosch_mininvpri;
	p->ios[i].sch.maxinvpri = p->iosch_maxinvpri;
	SDINIT2(&p->ios[i].sch, PROC_ACCOUNT_TICKS, FINE_TICKS_PER_MINUTE >> IOSCHED_FINE_TICKS_SHIFT);
	/*INIT_LIST(&p->ios[i].queue);*/
	return 0;
}

int KERNEL$ALLOC_IOSCHED(void)
{
	int i;
	int spl = KERNEL$SPL;
	if (__unlikely(SPLX_BELOW(SPL_X(SPL_DEV), spl)))
		KERNEL$SUICIDE("KERNEL$ALLOC_IOSCHED AT SPL %08X", spl);
	RAISE_SPL(SPL_VSPACE);
	for (i = 0; i < MAX_IO_SCHED; i++) if (!__BTS(ioschedalloc, i)) {
		if (i > max_iosched) max_iosched = i + 1;
		new_iosched = i;
		FOR_ALL_PROCS(&KERNEL$PROC_KERNEL, init_iosched);
		/*KERNEL$PROC_KERNEL.ios[i].sch.flags |= SCHED_ONLIST;*/
		goto ret;
	}
	i = -ENFILE;
	ret:
	LOWER_SPLX(spl);
	return i;
}

static int done_iosched(PROC *p)
{
	SDDONE(&p->ios[new_iosched].sch, "IOSCH");
	/*if (__unlikely(!LIST_EMPTY(&p->ios[i].queue))) KERNEL$SUICIDE("done_iosched: IO QUEUE NOT EMPTY");*/
	return 0;
}

void KERNEL$FREE_IOSCHED(int i)
{
	int spl = KERNEL$SPL;
	if (__unlikely(SPLX_BELOW(SPL_X(SPL_DEV), spl)))
		KERNEL$SUICIDE("KERNEL$FREE_IOSCHED AT SPL %08X", spl);
	RAISE_SPL(SPL_VSPACE);
	if (__unlikely((unsigned)i >= MAX_IO_SCHED)) KERNEL$SUICIDE("KERNEL$FREE_IOSCHED: FREEING INVALID IOSCHED %d", i);
	if (__unlikely(!__BTR(ioschedalloc, i))) KERNEL$SUICIDE("KERNEL$FREE_IOSCHED: FREEING FREE IOSCHED %d", i);
	if (i == max_iosched - 1) do max_iosched--; while (max_iosched && !__BT(ioschedalloc, max_iosched - 1));
	new_iosched = i;
	/*KERNEL$PROC_KERNEL.ios[i].sch.flags &= ~SCHED_ONLIST;*/
	FOR_ALL_PROCS(&KERNEL$PROC_KERNEL, done_iosched);
	LOWER_SPLX(spl);
}


static __finline__ void UNBLOCK_IO(PROC *p)
{
	/*__debug_printf("unblock_io(%x).", p->flags);*/
	if ((p->flags & PR_BLOCKREASON) == PR_IO_WAIT) PROC_UNBLOCK(p);
}

#define type HANDLE
#define proc_entry name_addrspace
#define alloc HANDLE *PROC_ALLOC_HANDLE(PROC *p, int n, WQ **wq)
#define alloc_special h->op = &KERNEL$HANDLE_NOOP, h->handle_num = n, ADD_TO_XLIST(&p->handles[n & p->handle_hash_mask], &h->proc_hash);
#define ffree PROC_FREE_HANDLE
#define free_special h->handle_num = -1;
#if __DEBUG < 2
#define free_debug
#else
#define free_debug h->handle_num = -1; h->op = (void *)1L; h->file_addrspace = (void *)2L; h->file = (void *)3L; h->fnode = (void *)4L; h->name = (void *)5L; h->child_list.next = (void *)6L; h->child_entry.next = (void *)7L; h->child_entry.prev = (void *)8L; h->fnode_entry.next = (void *)9L; h->fnode_entry.prev = (void *)10L;
#endif
#define new_special h->handle_num = -1; RAISE_SPL(SPL_CACHE); KERNEL$CACHE_INSERT_VM_ENTITY(HANDLE_TO_VMENTITY(h), p, 0); LOWER_SPL(SPL_DEV);
#define drop_special RAISE_SPL(SPL_CACHE); KERNEL$CACHE_REMOVE_VM_ENTITY(HANDLE_TO_VMENTITY(h)); LOWER_SPL(SPL_DEV);
#define reserved n_reserved_handles
#define reserved_list reserved_handle_list
#define list_entry proc_hash
#define refill_reserved PROC_REFILL_RESERVED_HANDLES
#define reserved_min RESERVED_HANDLES_MIN
#define reserved_max RESERVED_HANDLES_MAX
#define q hq
#define q_isroot proc_hq_isroot
#define q_parent proc_hq_parent
#define q_forall for_all_hq_subnodes
#define q_forall_tail for_all_hq_subnodes_tail
#define shutdown_some SHUTDOWN_SOME_HANDLES
#define slab handle_slab
#define charge_alloc (sizeof(HANDLE) + sizeof(VMENTITY))
#define free_one_reserved FREE_RESERVED_HANDLE
#define free_reserved FREE_RESERVED_HANDLES
#define reserved_area reserved_handles
#define reserved_area_size RESERVED_HANDLES
#include "PROCHNDL.I"

#define type UIORQ
#define proc_entry proc
#define alloc UIORQ *PROC_ALLOC_UIORQ(PROC *p, WQ **wq)
#define alloc_special ADD_TO_XLIST(&p->uiorqs, &h->list);
#define ffree PROC_FREE_UIORQ
#if __DEBUG < 2
#define free_debug
#else
#define free_debug h->iorq = (void *)1L; h->vspace = (void *)2L; h->pfw.fn = (void *)3L; h->pfw.tmp1 = 4L;
#endif
/* !!! FIXME: call UNBLOCK_IO only if !(p->flags & PR_RUNDOWN) */
#define free_special if (__unlikely(p->flags & PR_RUNDOWN) && XLIST_EMPTY(&p->uiorqs)) SHUTDOWN_PROCESS(p, (void *)&KERNEL$LIST_END); else UNBLOCK_IO(p);
#define new_special
#define drop_special
#define reserved n_reserved_uiorqs
#define reserved_list reserved_uiorq_list
#define list_entry list
#define refill_reserved PROC_REFILL_RESERVED_UIORQS
#define reserved_min RESERVED_UIORQS_MIN
#define reserved_max RESERVED_UIORQS_MAX
#define q uq
#define q_isroot proc_uq_isroot
#define q_parent proc_uq_parent
#define q_forall for_all_uq_subnodes
#define q_forall_tail for_all_uq_subnodes_tail
#define shutdown_some SHUTDOWN_SOME_UIORQS
#define slab uiorq_slab
#define charge_alloc 0
#define free_one_reserved FREE_RESERVED_UIORQ
#define free_reserved FREE_RESERVED_UIORQS
#define reserved_area reserved_uiorqs
#define reserved_area_size RESERVED_UIORQS
#include "PROCHNDL.I"

static void SET_UIORQ_OVERFLOW(PROC *p)
{
	/* this function is for better performance, not corectness, so we do not	   have to process page fault */
	uwrite_char(p, KUPLACE(UDATA_STRUCT + __offsetof(UDATA, uiorq_overflow)), (char)1, );
}

static void RELEASE_HANDLE(HANDLE *h)
{
	/*__debug_printf("releasing: %d.", h->handle_num);*/
	RAISE_SPL(SPL_TOP);
	LOWER_SPLX(h->op->spl);
	KERNEL$DETACH_HANDLE(h);
	LOWER_SPL(SPL_DEV);
	PROC_FREE_HANDLE(h);
}

extern AST_STUB OPEN_AST;
extern AST_STUB CLOSE_AST;

static int SHUTDOWN_HANDLE(HANDLE *h, int test)
{
	int ret = 0;
	UIORQ *u;
	XLIST_FOR_EACH(u, &h->name_addrspace->uiorqs, UIORQ, list) if (__unlikely(u->u.siorq.h == (h->handle_num & HANDLE_NUM_MASK)) || (u->u.siorq.fn != OPEN_AST && u->u.siorq.fn != CLOSE_AST && __unlikely(u->u.siorq.v.vspace == h))) {
		if (__likely(test)) return 1;
		SET_UIORQ_OVERFLOW(h->name_addrspace);
		KERNEL$CIO(&u->u.iorq);
		ret = 1;
	}
	if (__likely(test | ret)) return ret;
	RELEASE_HANDLE(h);
	return 0;
}

static int SHUTDOWN_SOME_HANDLES(PROC *p, int for_myself)
{
	HANDLE *h;
	int r;
	int i;
	int total_freed;
	if (__unlikely(p == &KERNEL$PROC_KERNEL)) return 0;
	if (p->n_reserved_handles - RESERVED_HANDLES - FREE_HANDLES_IN_PASS >= RESERVED_HANDLES_MIN(p)) goto zap_only_reserved;
	total_freed = 0;
	again:
	i = 0;
	nextr:
	r = random() & p->handle_hash_mask;
	if (XLIST_EMPTY(&p->handles[r])) {
		if (__likely(++i <= p->handle_hash_mask)) goto nextr;
		for (r = 0; r <= p->handle_hash_mask; r++) if (!XLIST_EMPTY(&p->handles[r])) goto f;
		goto zap_only_reserved;
		f:;
	}
	i = 0;
	XLIST_FOR_EACH(h, &p->handles[r], HANDLE, proc_hash) i++;
	i = (unsigned)random() % i;
	XLIST_FOR_EACH(h, &p->handles[r], HANDLE, proc_hash) if (!i--) goto found;
	KERNEL$SUICIDE("SHUTDOWN_SOME_HANDLES: HANDLE NOT FOUND");
	found:
	if (__unlikely(SHUTDOWN_HANDLE(h, 0))) goto skip_this;
	if (h >= &p->reserved_handles[0] && h < &p->reserved_handles[RESERVED_HANDLES]) goto again;
	skip_this:
	if (++total_freed < FREE_HANDLES_IN_PASS) goto again;

	zap_only_reserved:
	if (!p->n_reserved_handles) return 0;
	if (for_myself) return 1;
	i = (p->n_reserved_handles - RESERVED_HANDLES_MIN(p)) >> 1;
	if (i < FREE_HANDLES_IN_PASS) i = FREE_HANDLES_IN_PASS;
	return FREE_RESERVED_HANDLES(p, i);
}

static int SHUTDOWN_SOME_UIORQS(PROC *p, int for_myself)
{
	UIORQ *u;
	int r;
	int i;
	int total_freed;
	if (__unlikely(p == &KERNEL$PROC_KERNEL)) return 0;
	if (p->n_reserved_uiorqs - RESERVED_UIORQS - FREE_UIORQS_IN_PASS >= RESERVED_UIORQS_MIN(p)) goto zap_only_reserved;
	total_freed = 0;
	again:
	r = 0;
	XLIST_FOR_EACH(u, &p->uiorqs, UIORQ, list) r++;
	if (!r) goto zap_only_reserved;
	i = (unsigned)random() % r;
	XLIST_FOR_EACH(u, &p->uiorqs, UIORQ, list) if (!i--) goto found;
	KERNEL$SUICIDE("SHUTDOWN_SOME_UIORQS: UIORQ NOT FOUND");
	found:
	SET_UIORQ_OVERFLOW(p);
	KERNEL$CIO(&u->u.iorq);
	/*if (u >= &p->reserved_uiorqs[0] && u < &p->reserved_uiorqs[RESERVED_UIORQS]) goto again;*/
	if (++total_freed < FREE_UIORQS_IN_PASS) goto again;

	zap_only_reserved:
	if (!p->n_reserved_uiorqs) return 0;
	if (for_myself) return 1;
	i = (p->n_reserved_uiorqs - RESERVED_UIORQS_MIN(p)) >> 1;
	if (i < FREE_UIORQS_IN_PASS) i = FREE_UIORQS_IN_PASS;
	return FREE_RESERVED_UIORQS(p, i);
}

static int HANDLE_CHECKMAP(VMENTITY *e)
{
	HANDLE *h;
	LOWER_SPL(SPL_DEV);
	h = VMENTITY_TO_HANDLE(e);
	if (h->handle_num == -1) return 0;
	return SHUTDOWN_HANDLE(h, 1);
}

static void HANDLE_WRITE(VMENTITY *e, PROC *p, int trashing)
{
	LOWER_SPL(SPL_DEV);
}

static int HANDLE_SWAPOUT(VMENTITY *e)
{
	HANDLE *h;
	PROC *p;
	WQ *wq;
	LOWER_SPL(SPL_DEV);
	h = VMENTITY_TO_HANDLE(e);
	if (__likely(h->handle_num == -1)) {
		FREE_RESERVED_HANDLE(h->name_addrspace, h);
		return 0;
	}
	if (__likely(!SHUTDOWN_HANDLE(h, 0))) return 0;

	/* this looks lame --- but we are obligated to free the handle and
		instead of making "WANTFREE" list of handles, we just kill
		the process. It goes down this path only under extreme stress
		so it doesn't matter that it's slow. */
	p = h->name_addrspace;
	/*__debug_printf("swapping out (on handle) %s\n", p->jobname);*/
	if (__likely(!(p->flags & PR_RUNDOWN))) p->error = -EINTR;
	SHUTDOWN_PROCESS(p, &wq);
	return !!wq;
}

static DECL_TIMER(reclaim_timer);
static AST reclaim_ast;

static void RECLAIM_TIMER(TIMER *t)
{
	CALL_AST(&reclaim_ast);
}

DECL_AST(RECLAIM_AST, SPL_DEV, AST)
{
	PROC *p;
	while (!XLIST_EMPTY(&reclaim_list)) {
		p = LIST_STRUCT(reclaim_list.next, PROC, reclaim_list);
		if (p->n_reserved_handles >> 1 > RESERVED_HANDLES_MAX(p)) FREE_RESERVED_HANDLES(p, (p->n_reserved_handles >> 1) - RESERVED_HANDLES_MAX(p));
		if (p->n_reserved_uiorqs >> 1 > RESERVED_UIORQS_MAX(p)) FREE_RESERVED_UIORQS(p, (p->n_reserved_uiorqs >> 1) - RESERVED_UIORQS_MAX(p));
		p->on_reclaim_list = 0;
		DEL_FROM_LIST(&p->reclaim_list);
	}
	KERNEL$SET_TIMER(RECLAIM_TIME, &reclaim_timer);
	RETURN;
}

static void INIT_RECLAIM_TIMER(void)
{
	reclaim_ast.fn = RECLAIM_AST;
	reclaim_timer.fn = RECLAIM_TIMER;
	KERNEL$SET_TIMER(JIFFIES_PER_SECOND, &reclaim_timer);
}

/*
static void dump_procs(PROC *p)
{
	PROC *pp;
	__debug_printf("%p(%d)[", p, p->sch.flags);
	LIST_FOR_EACH(pp, &p->sch.runq, PROC, sch.rentry) dump_procs(pp);
	__debug_printf("]");
}
*/

#define END_SYSCALL do { LOWER_SPL(SPL_USER); JMP_IDLE_LOOP(); return; } while (0)

#define CHECK_RQ_ALIGN(rq, align, label)				\
do {									\
	if (__unlikely((rq) & ((align) - 1))) {				\
		if (__likely(!PROC_RUN->error_msg[0])) {		\
			_snprintf(PROC_RUN->error_msg, __MAX_STR_LEN, "PROCESS TRIED TO POST UNALIGNED IORQ %lX", (rq));\
			PROC_RUN->error = -EDOM;			\
		}							\
		SHUTDOWN_PROCESS(PROC_RUN, (void *)&KERNEL$LIST_END);	\
		goto label;						\
	}								\
} while (0)

static void SCH_REFRESH(SCHED *sch)
{
	SREFRESH(sch, for_all_sch_subnodes, for_all_sch_subnodes_tail);
}

static sched_unsigned DO_PROC_ACCOUNT(sched_unsigned t)
{
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_VSPACE)))
		KERNEL$SUICIDE("DO_PROC_ACCOUNT AT SPL %08X", KERNEL$SPL);
#endif
	if (__unlikely((sched_signed)(t - PROC_ACCOUNT_TICKS) < 0)) return t;
	SACCOUNT(&KERNEL$PROC_ACCOUNT->sch, t - PROC_ACCOUNT_TICKS, t, FINE_TICKS_PER_MINUTE, proc_sch_isroot, proc_sch_parent);
	PROC_ACCOUNT_TICKS = t;
	return t;
}

void KERNEL$SWITCH_PROC_ACCOUNT(PROC *proc)
{
	sched_unsigned t = GET_FINE_TICKS();
	DO_PROC_ACCOUNT(t);
	KERNEL$PROC_ACCOUNT = proc;
}

void KERNEL$SWITCH_PROC_ACCOUNT_TICKS(PROC *proc, sched_unsigned t)
{
	DO_PROC_ACCOUNT(t);
	KERNEL$PROC_ACCOUNT = proc;
}

void DO_END_SYSCALL(void)
{
	END_SYSCALL;
}

static void SCHEDULE(sched_unsigned t)
{
	SCHED *res;
	PROC *p;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV)))
		KERNEL$SUICIDE("SCHEDULE AT SPL %08X", KERNEL$SPL);
#endif
	/*__debug_printf("SCHEDULE.");
	dump_procs(&KERNEL$PROC_KERNEL);*/
	RAISE_SPL(SPL_VSPACE);
	t = DO_PROC_ACCOUNT(t);
	if (PROC_RUN) {
#if __DEBUG >= 1
		if (__unlikely(!(PROC_RUN->sch.flags & SCHED_READY)))
			KERNEL$SUICIDE("SCHEDULE: PROC_RUN IS NOT READY");
#endif
		SSETLIST(&PROC_RUN->sch, t, proc_sch_parent, proc_sch_isroot);
	}
	SSELECT(&KERNEL$PROC_KERNEL.sch, res, SCH_REFRESH);
	if (__unlikely(!res)) {
		PROC_RUN = NULL, KERNEL$PROC_ACCOUNT = &KERNEL$PROC_KERNEL;
	} else {
		p = sch2proc(res);
		PROC_RUN = KERNEL$PROC_ACCOUNT = sch2proc(res);
		LOWER_SPL(SPL_CACHE);
		KERNEL$CACHE_TOUCH_VM_ENTITY(&p->vme, p->parent);
	}
	LOWER_SPL(SPL_DEV);
	/*__debug_printf("SCHEDULE: %p.", PROC_RUN);*/
}

static void PROC_UNBLOCK(PROC *p)
{
	sched_unsigned t;
	if (__unlikely(p != KERNEL$PROC_ACCOUNT)) {
		t = GET_FINE_TICKS();	/* it's good to overlap rdtsc with other instructions */
	} else {
		/* rdtsc is slow --- if we already executed it in driver, do not execute it again */
		t = PROC_ACCOUNT_TICKS;
	}
	/*__debug_printf("unblock(%p)", p);*/
	/*dump_procs(&KERNEL$PROC_KERNEL);*/
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV))) KERNEL$SUICIDE("PROC_UNBLOCK AT SPL %08X", KERNEL$SPL);
	if (__unlikely(!(p->flags & PR_BLOCKREASON)))
		KERNEL$SUICIDE("PROC_UNBLOCK: NOT BLOCKED (FLAGS %X)", p->flags);
#endif
	p->flags &= ~PR_BLOCKREASON;
	SSETREADY(&p->sch);
	if (__unlikely(p->flags & PR_RUNDOWN)) {
		SHUTDOWN_PROCESS(p, (void *)&KERNEL$LIST_END);
		return;
	}
	RAISE_SPL(SPL_VSPACE);
	if (__unlikely(!PROC_RUN)) goto do_preempt;
	SSETLIST(&PROC_RUN->sch, t, proc_sch_parent, proc_sch_isroot);
		/* !!! FIXME: don't preempt just preempted proc (should bring better latency at LOCKUP_11 test) */
	SPREEMPT(&p->sch, proc_sch_isroot, proc_sch_parent, do_preempt, dont_preempt);

	if (0) {
		do_preempt:
			/*__debug_printf("(PREEMPT %s->%s)\n", PROC_RUN->jobname, p->jobname), KERNEL$STACK_DUMP();*/
		t = DO_PROC_ACCOUNT(t);
		KERNEL$PROC_ACCOUNT = PROC_RUN = p;
		LOWER_SPL(SPL_CACHE);
		KERNEL$CACHE_TOUCH_VM_ENTITY(&p->vme, p->parent);
		LOWER_SPL(SPL_DEV);
	} else {
		dont_preempt:
			/*if (PROC_RUN) __debug_printf("(no preempt %s->%s)\n", PROC_RUN->jobname, p->jobname), KERNEL$STACK_DUMP();*/
		LOWER_SPL(SPL_DEV);
		SSETLIST(&p->sch, t, proc_sch_parent, proc_sch_isroot);
	}
	/*__debug_printf("proc_unblock(%p, RUN %p).", p, PROC_RUN);*/
}

static void PROC_BLOCK(PROC *p, int reason)
{
	/*__debug_printf("block(%p, %x)", p, reason);*/
	/*dump_procs(&KERNEL$PROC_KERNEL);*/
	sched_unsigned t = GET_FINE_TICKS();
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV))) KERNEL$SUICIDE("PROC_BLOCK AT SPL %08X", KERNEL$SPL);
	if (__unlikely(p->flags & PR_BLOCKREASON))
		KERNEL$SUICIDE("PROC_BLOCK: ALREADY BLOCKED (FLAGS %X)", p->flags);
#endif
	p->flags |= reason;
	SUNSETREADY(&p->sch, t, proc_sch_parent, proc_sch_isroot);
	if (p == PROC_RUN) PROC_RUN = NULL;
	SCHEDULE(t);
}

static void SCHEDULE_TIMER_FN(TIMER *t)
{
	CALL_AST(&SCHEDULE_AST);
}

DECL_AST(SCHEDULE_AST_FN, SPL_DEV, AST)
{
	KERNEL$SET_TIMER(SCHEDULE_TIME, &SCHEDULE_TIMER);
	SCHEDULE(GET_FINE_TICKS());
	RETURN;
}

unsigned KERNEL$PROC_DEPTH(PROC *proc)
{
	return proc->depth;
}

char *KERNEL$PROC_PATH(PROC *proc, unsigned *val)
{
	PROC *p;
	unsigned x;
	x = (*val & ~PROC_PATH_FORKED) + 1;
	if (__unlikely(x > proc->depth)) return NULL;
	p = proc->backptr[x];
	x |= (((unsigned long)p->parent_forktable | -(unsigned long)p->parent_forktable) >> ((sizeof(unsigned long) - sizeof(unsigned)) * 8)) & PROC_PATH_FORKED;
	*val = x;
	return p->jobname;
}

static int PROC_CHECKMAP(VMENTITY *e)
{
	LOWER_SPL(SPL_DEV);
/* SMPFIX: process swapping is not implemented until we have SMP --- it would
   be wasted work now */
	return 1;
}

static void PROC_WRITE(VMENTITY *e, PROC *p, int trashing)
{
	LOWER_SPL(SPL_DEV);
}

static PROC *psw;

static int fap_swap(PROC *p);

static int PROC_SWAPOUT(VMENTITY *e)
{
	WQ *wq;
	LOWER_SPL(SPL_DEV);
	psw = GET_STRUCT(e, PROC, vme);
	if (__unlikely(FOR_ALL_PROCS(psw, fap_swap))) {
		return 3;
	}

	/*__debug_printf("swapping out %s\n", psw->jobname);*/
	if (__likely(!(psw->flags & PR_RUNDOWN))) psw->error = -EINTR;
	SHUTDOWN_PROCESS(psw, &wq);
	return !!wq;
}

static int fap_swap(PROC *p)
{
	if (__unlikely(p != psw) /*&& __likely(!(p->flags & PR_RUNDOWN))*/ && __likely(!CACHE_VMENTITY_IS_FREEABLE(&p->vme))) {
		if (!psw->vme.wired && (p->vme.wired || p->vme.queue > psw->vme.queue)) {
			RAISE_SPL(SPL_CACHE);
			KERNEL$CACHE_TRANSFER_QUEUE_STATE(&psw->vme, &p->vme);
			LOWER_SPL(SPL_DEV);
/*{
	static time_t t;
	if (time(NULL) != t) __debug_printf("denied swapout %s because of %s, (w %p, q %d)\n", psw->jobname, p->jobname, p->vme.wired, p->vme.queue);
	time(&t);
}*/
			return 1;
		}
	}
	return 0;
}

/* PROC SPAWN */

static PROC *ALLOC_PROC(PROC *parent, WQ **wq)
{
	QUOTA *pzap;
	PROC *p;
	/*__debug_printf("alloc for %s: ", parent->jobname);*/
	if (__unlikely((*wq = KERNEL$MAY_ALLOC(parent, sizeof(PROC))) != NULL)) {
		/*__debug_printf("wait-may-alloc\n");*/
		return NULL;
	}
#if __DEBUG_PROCESS_MEMORY_SHORTAGE > 0
	if (!KERNEL$SLAB_EMPTY(&proc_slab) && (random() & 255) < __DEBUG_PROCESS_MEMORY_SHORTAGE) {
		pzap = &KERNEL$PROC_KERNEL.procq;
		p = NULL;
		goto zapp;
	}
#endif
	p = __slalloc(&proc_slab);
	if (__unlikely(!p)) {
		pzap = &KERNEL$PROC_KERNEL.procq;
		/* is is expected that p == NULL here */
		goto zapp;
	}
	ARCH_PROC_INIT(p, parent);
	PROC_INIT_COMMON(p);
	p->parent = parent;
	p->flags = PR_NEW;
	p->error = 0;
	p->error_msg[0] = 0;

	QALLOC(&parent->procq, 1, proc_procq_isroot, proc_procq_parent, Q_NULL_CALL, pzap, {
		QUOTA *zapq;
		PROC *z;
		ARCH_PROC_DESTROY(p);
		ARCH_PROC_FREE(p);
		__slow_slfree(p);
		zapp:
		QZAP(pzap, for_all_procq_subnodes, for_all_procq_subnodes_tail, zapq, 0);
		z = LIST_STRUCT(zapq, PROC, procq);
		/*if (z == &KERNEL$PROC_KERNEL) KERNEL$SUICIDE("ALLOC_PROC: NO PROCESSES CURRENTLY AVAILABLE"); OK now, but in case we allocated from proc_slab for other purposes, this would make incorrect suicides */
		if (__unlikely(z == parent)) {
			PROC *zzz;
			PROC *zz = LIST_STRUCT(pzap, PROC, procq);
			scan_zap:
			if (__unlikely(!z->parent) || __unlikely(z == zz)) goto noproc;
			XLIST_FOR_EACH(zzz, &z->parent->children, PROC, child_entry) {
				if (zzz != z) {
					pzap = &zzz->procq;
					goto zapp;
				}
			}
			z = z->parent;
			goto scan_zap;
			noproc:
			if (!p) {
				/*__debug_printf("freemem wait\n");*/
				*wq = &KERNEL$FREEMEM_WAIT;
				return NULL;
			}
			/*__debug_printf("!!!!!!!!!!! dquot !!!!!!!!!!!!!\n");*/
			return __ERR_PTR(-EDQUOT);
		}
		if (!(z->flags & PR_RUNDOWN)) z->error = -EINTR;
			/*__debug_printf("shutting down: %s: ", z->jobname);*/
		SHUTDOWN_PROCESS(z, wq);
			/*if (*wq) __debug_printf("wait-shutdown\n");
			else __debug_printf("shutdown\n");*/
		return NULL;
	});
	p->depth = parent->depth + 1;
	ADD_TO_XLIST(&parent->children, &p->child_entry);
	memcpy(p->backptr, parent->backptr, sizeof p->backptr);
	p->backptr[p->depth] = p;
	RAISE_SPL(SPL_CACHE);
	KERNEL$CACHE_INSERT_VM_ENTITY(&p->vme, parent, VM_ENTITY_NOSTREAM);
	LOWER_SPL(SPL_DEV);
	/*__debug_printf("ok\n");*/
	return p;
}

static void FREE_ALLOCED_PROC(PROC *p)
{
	RAISE_SPL(SPL_CACHE);
	KERNEL$CACHE_REMOVE_VM_ENTITY(&p->vme);
	LOWER_SPL(SPL_DEV);
	DEL_FROM_LIST(&p->child_entry);
	QFREE(&p->parent->procq, 1, proc_procq_isroot, proc_procq_parent, Q_NULL_CALL);
	ARCH_PROC_DESTROY(p);
	ARCH_PROC_FREE(p);
	__slow_slfree(p);
}

static void *SETUP_PROC(PROC *p, char **opts, int n_opts)
{
	char option[__MAX_STR_LEN], *v;
	char *opt;
	void *vmf;
	int i, u;
	while (n_opts) {
		uread_ptr(p->parent, opts, opt, {
			vmf = opts;
			goto ret;
		});
		uread_str(p->parent, opt, option, __MAX_STR_LEN, 0, vmf, {
			goto ret;
		});
		if (__unlikely(__IS_ERR(vmf))) goto ret;
		v = strchr(option, '=');
		if (__unlikely(!v)) {
			einval:
			vmf = __ERR_PTR(-EINVAL);
			goto ret;
		}
		u = 0;
		for (i = 0; PROC_OPTIONS[i].name; i++) if (__unlikely(!__strcasexcmp(PROC_OPTIONS[i].name, option, v))) {
			long l;
			void *ptr = (char *)p + PROC_OPTIONS[i].offset;
			switch (PROC_OPTIONS[i].type) {
				case OPT_LONG:
				case OPT_LONG_DISADV:
				case OPT_INV_PRI:
					if (__unlikely(__get_number(v + 1, v + strlen(v), 0, &l)))
						goto einval;
					if (l < PROC_OPTIONS[i].min || l > PROC_OPTIONS[i].max) goto einval;
					if (PROC_OPTIONS[i].type == OPT_LONG_DISADV) *(long *)ptr = DISADVANTAGE(l);
					else if (PROC_OPTIONS[i].type == OPT_INV_PRI) *(sched_unsigned *)ptr = PRI_INV(l);
					else *(long *)ptr = l;
					break;
				default:
					KERNEL$SUICIDE("SETUP_PROC: INVALID OPTION %d", PROC_OPTIONS[i].type);
			}
			u = 1;
		}
		if (__unlikely(!u)) goto einval;
		opts++;
		n_opts--;
	}
	QINIT2(&p->procq);
	QRINIT2(&p->hq);
	QRINIT2(&p->uq);
	QRINIT2(&p->ioq);
	QINIT2(&p->pgtblq);
	QINIT2(&p->vmq);
	QINIT2(&p->wireq);
	SINIT2(&p->sch, PROC_ACCOUNT_TICKS, FINE_TICKS_PER_MINUTE, BORROW_PER_MINUTE);
	SINIT3(&p->sch, &p->parent->sch);
	for (i = 0; i < max_iosched; i++) {
		SDINIT(&p->ios[i].sch);
		p->ios[i].sch.mininvpri = p->iosch_mininvpri;
		p->ios[i].sch.maxinvpri = p->iosch_maxinvpri;
		SDINIT2(&p->ios[i].sch, PROC_ACCOUNT_TICKS, FINE_TICKS_PER_MINUTE >> IOSCHED_FINE_TICKS_SHIFT);
		SDINIT3(&p->ios[i].sch, &p->parent->ios[i].sch);
		/*INIT_LIST(&p->ios[i].queue);*/
	}

	UNIFY_ALLOC_RATE(p->parent);
	p->alloc_rate = p->created_alloc_rate = p->parent->alloc_rate;
	p->alloc_rate_time = p->created_alloc_rate_time = p->parent->alloc_rate_time;

	vmf = NULL;

	ret:
	return vmf;
}

extern IO_STUB PROC_CANCEL;

DECL_IOCALL(KERNEL_KSPAWN, SPL_DEV, KSPAWNRQ)
{
	WQ *wq;
	PROC *p;
	void *s;
	alloc:
	/*__debug_printf("krnl creating: %s: ", RQ->jobname);*/
	p = ALLOC_PROC(&KERNEL$PROC_KERNEL, &wq);
	/*__debug_printf("returned: %p/%p\n", p, wq);*/
	if (__unlikely(__IS_ERR(p))) {
		RQ->status = __PTR_ERR(p);
		RETURN_AST(RQ);
	}
	if (__unlikely(!p)) {
		if (__likely(!wq)) goto alloc;
		WQ_WAIT(wq, RQ, KERNEL_KSPAWN);
		RETURN;
	}
	p->parent_lnt = RQ->lnt;
	p->ln_mode = RQ->ln_mode;
	p->parent_forktable = RQ->forktable;
	if (__unlikely((unsigned long)RQ->forktable & (PTR_ALIGN - 1)))
		KERNEL$SUICIDE("KERNEL_KSPAWN: UNALIGNED FORKTABLE: %p", RQ->forktable);
	p->parent_forktable_n = RQ->forktable_n;
	p->parent_rq = RQ;
	if (__unlikely((s = SETUP_PROC(p, RQ->options, RQ->n_options)) != NULL)) {
		if (__unlikely(!__IS_ERR(s))) {
#if __DEBUG_PAGEFAULTS > 0
			RETURN_IORQ_LSTAT(RQ, KERNEL_KSPAWN);
#else
			KERNEL$SUICIDE("KERNEL_KSPAWN: PAGE FAULT ON SETUP_PROC ON ADDRESS %p", s);
#endif
		}
		FREE_ALLOCED_PROC(p);
		RQ->status = __PTR_ERR(s);
		RETURN_AST(RQ);
	}
	strncpy(p->jobname, RQ->jobname, 9);
	RQ->proc = p;
	IO_ENABLE_CANCEL(SPL_DEV, RQ, PROC_CANCEL);
	PROC_UNBLOCK(p);
	RETURN;
}

static void SHUTDOWN_FINISH(PROC *p, WQ **wq);
static void SHUTDOWN_RETURN_ERROR1(PROC *p, WQ **wq);
extern IO_STUB SHUTDOWN_IORQ_SUCCES;
extern AST_STUB SHUTDOWN_IORQ_FAILURE;

DECL_IOCALL(PROC_CANCEL, SPL_DEV, KSPAWNRQ)
{
	PROC *p = RQ->proc;
	if (!(p->flags & PR_RUNDOWN)) p->error = -EINTR;
	RQ->status = RQS_CANCEL_ACK;
	if (__unlikely((p->flags & PR_RUNDOWN) == PR_RUNDOWN_5)) SHUTDOWN_RETURN_ERROR1(p, (void *)&KERNEL$LIST_END);
	else SHUTDOWN_PROCESS(p, (void *)&KERNEL$LIST_END);
	RETURN;
}

static void SHUTDOWN_PROCESS(PROC *p, WQ **wq)
{
	UIORQ *u;
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV))) KERNEL$SUICIDE("SHUTDOWN_PROCESS AT SPL %08X", KERNEL$SPL);
	/*__debug_printf("shutdown(%p/%s/%d)\"%s\"(%ld)", p, p->jobname, p->depth, p->error_msg, p->error);*/
	if (__unlikely(p == &KERNEL$PROC_KERNEL)) {
		*wq = &KERNEL$FREEMEM_WAIT;
		return;
	}
	recurse:
#if __DEBUG >= 2
	if (__unlikely(!p->child_entry.next))
		KERNEL$SUICIDE("SHUTDOWN_PROCESS: SHUTTING DOWN ALREADY DESTROYED PROCESS");
#endif
	if (__likely(!(p->flags & PR_BLOCKREASON))) PROC_BLOCK(p, PR_SHUTDOWN);
	*wq = &p->shutdown;
	if (__unlikely((p->flags & PR_RUNDOWN) > PR_RUNDOWN_1)) {
		if ((p->flags & PR_RUNDOWN) == PR_RUNDOWN_FINISH) SHUTDOWN_RETURN_ERROR1(p, wq);
		return;
	}
	p->flags |= PR_RUNDOWN_1;
	if (__unlikely(!XLIST_EMPTY(&p->children))) {
		/*__debug_printf("shutting down child, depth %d\n", p->depth);*/
		p = LIST_STRUCT(p->children.next, PROC, child_entry);
		if (__likely(!(p->flags & PR_RUNDOWN))) p->error = -EINTR;
		goto recurse;
	}
	if (__unlikely(p->fork_parent != NULL)) {
		if (__unlikely((*wq = VM_ARCH_READ_ONLY(p->fork_parent)) != NULL)) {
			DELAYED_SHUTDOWN();
/*__debug_printf("w1\n");*/
			return;
		}
		p->fork_parent->fork_child = NULL;
		p->fork_parent = NULL;
	}
	if (__unlikely(!XLIST_EMPTY(&p->uiorqs))) {
		XLIST_FOR_EACH(u, &p->uiorqs, UIORQ, list) KERNEL$CIO(&u->u.iorq);
/*__debug_printf("w2\n");*/
		return;
	}

	RAISE_SPL(SPL_TIMER);
	KERNEL$DEL_TIMER(&p->timer);
	VOID_LIST_ENTRY(&p->timer.list);
	LOWER_SPL(SPL_DEV);
	if (__unlikely(p->timer_iorq_posted)) return;
	if (__unlikely((p->flags & PR_BLOCKREASON) != PR_SHUTDOWN)) {
		if ((p->flags & PR_BLOCKREASON) == PR_SOFTFAULT) {
			cio:
			KERNEL$CIO(&p->fault_iorq);
/*__debug_printf("w3\n");*/
			return;
		}
		if ((p->flags & PR_BLOCKREASON) == PR_WQ_WAIT) goto cio;
		if ((p->flags & PR_BLOCKREASON) == PR_IO_WAIT) goto block_ok;
		return;
	}
	block_ok:

	FREE_RESERVED_UIORQS(p, 0);
	p->flags = (p->flags & ~PR_RUNDOWN) | PR_RUNDOWN_2;
	SHUTDOWN_RETURN_ERROR1(p, wq);
}

DECL_IOCALL(SHUTDOWN_IORQ_SUCCESS, SPL_DEV, IORQ)
{
	PROC *p = GET_STRUCT(RQ, PROC, fault_iorq);
	SHUTDOWN_RETURN_ERROR1(p, (void *)&KERNEL$LIST_END);
	RETURN;
}

DECL_AST(SHUTDOWN_IORQ_FAILURE, SPL_DEV, IORQ)
{
	PROC *p = GET_STRUCT(RQ, PROC, fault_iorq);
#if __DEBUG >= 1
	if (__unlikely((p->fault_iorq.status & RQS_PROC_MASK) == RQS_PROC))
		KERNEL$SUICIDE("SHUTDOWN_IORQ_FAILURE: ACTIVE REQUEST, STATUS %lX", p->fault_iorq.status);
#endif
	if ((p->flags & PR_RUNDOWN) != PR_RUNDOWN_2) {
		PROC *pp = p->parent;
		if (__likely(!pp->error_msg[0])) {
			_snprintf(pp->error_msg, __MAX_STR_LEN, "ERROR POSTING PROCESS TERMINATION: %s", strerror(-RQ->status));
			pp->error = RQ->status;
		}
		SHUTDOWN_PROCESS(pp, (void *)&KERNEL$LIST_END);
	}
	p->flags += PR_RUNDOWN_1;
	SHUTDOWN_RETURN_ERROR1(p, (void *)&KERNEL$LIST_END);
	RETURN;
}

static void SHUTDOWN_RETURN_ERROR1(PROC *p, WQ **wq)
{
	WQ *w;
	unsigned long vmf;
	if (__unlikely(p == PROC_RUN)) KERNEL$SUICIDE("SHUTDOWN_RETURN_ERROR1: SHUTTING DOWN RUNNING PROCESS, FLAGS %X", p->flags);
	switch (p->flags & PR_RUNDOWN) {
		case PR_RUNDOWN_2:
		f0:
		vmf = PROC_POST_TIMER(p);
		if (__unlikely(vmf != 0)) {
			p->fault_iorq.fn = SHUTDOWN_IORQ_FAILURE;
			p->fault_iorq.tmp1 = (unsigned long)SHUTDOWN_IORQ_SUCCESS;
			p->fault_iorq.status = RQS_PROCESSING;
			if (!VM_FAULT(p, (void *)(vmf & ~1UL), (vmf & PF_WRITE) | PF_SWAPPER, &p->fault_iorq)) goto f0;
	/*__debug_printf("shutdown block 1: %s\n", p->jobname);*/
			return;
		}
		p->flags = (p->flags & ~PR_RUNDOWN) | PR_RUNDOWN_3;
		case PR_RUNDOWN_3:
		f1:
		uwrite_str(p->parent, &p->parent_rq->error, p->error_msg, vmf, {
			p->fault_iorq.fn = SHUTDOWN_IORQ_FAILURE;
			p->fault_iorq.tmp1 = (unsigned long)SHUTDOWN_IORQ_SUCCESS;
			p->fault_iorq.status = RQS_PROCESSING;
			if (!VM_FAULT(p->parent, (void *)vmf, PF_WRITE | PF_SWAPPER, &p->fault_iorq)) goto f1;
	/*__debug_printf("shutdown block 2: %s\n", p->jobname);*/
			return;
		});
		p->flags = (p->flags & ~PR_RUNDOWN) | PR_RUNDOWN_4;
		case PR_RUNDOWN_4:
		case PR_RUNDOWN_5:

		if (__unlikely(p->error < 0) && __unlikely(!__IS_ERR(__ERR_PTR(p->error))) && __likely(!__IS_EXIT_SIGNAL(p->error))) p->error = -EINVAL;

		if (__unlikely(p->parent == &KERNEL$PROC_KERNEL)) {
			IO_DISABLE_CANCEL(SPL_DEV, p->parent_rq, {
				p->flags = (p->flags & ~PR_RUNDOWN) | PR_RUNDOWN_5;
	/*__debug_printf("shutdown block 3: %s\n", p->jobname);*/
				return;
			});
			p->parent_rq->status = p->error;
			CALL_AST(p->parent_rq);
		} else {
			f2:
			vmf = PROC_POST_IO(p->parent, (IORQ *)p->parent_rq, p->error);
			/*__debug_printf("post_io(%d,%p,%ld)->%ld.\n", p->depth, p->parent_rq, p->error, vmf);*/
			if (__unlikely(vmf != 0)) {
				p->fault_iorq.fn = SHUTDOWN_IORQ_FAILURE;
				p->fault_iorq.tmp1 = (unsigned long)SHUTDOWN_IORQ_SUCCESS;
				p->fault_iorq.status = RQS_PROCESSING;
				if (!VM_FAULT(p->parent, (void *)(vmf & ~1UL), (vmf & PF_WRITE) | PF_SWAPPER, &p->fault_iorq)) goto f2;
	/*__debug_printf("shutdown block 4: %s\n", p->jobname);*/
				return;
			}
			if (__likely(!(p->parent->flags & PR_RUNDOWN))) {
				UNBLOCK_IO(p->parent);
			}
		}
		p->flags = (p->flags & ~PR_RUNDOWN) | PR_RUNDOWN_6;
		case PR_RUNDOWN_6:
		if (__unlikely((w = VM_ARCH_UNMAP_RANGE(p, 0, KUVMTOP)) != NULL)) {
			wq_wait:
			p->fault_iorq.fn = SHUTDOWN_IORQ_FAILURE;
			p->fault_iorq.tmp1 = (unsigned long)SHUTDOWN_IORQ_SUCCESS;
			p->fault_iorq.status = RQS_PROCESSING;
			WQ_WAIT_F(w, &p->fault_iorq);
			LOWER_SPL(SPL_DEV);
	/*__debug_printf("shutdown block 5: %s\n", p->jobname);*/
			return;
		}
		case PR_RUNDOWN_7:
		case PR_RUNDOWN_FINISH:
		if (__unlikely(PROC_CURRENT_LOCK) && __unlikely(PROC_CURRENT == p)) {
			p->flags = (p->flags & ~PR_RUNDOWN) | PR_RUNDOWN_7;
			w = &PROC_CURRENT_LOCK_WAIT;
			goto wq_wait;
		}
		p->flags = (p->flags & ~PR_RUNDOWN) | PR_RUNDOWN_FINISH;
		SHUTDOWN_FINISH(p, wq);
	}
}

static void SHUTDOWN_FINISH(PROC *p, WQ **wq)
{
	PROC *pa;
	int i;
	HANDLE *h;

	if (__unlikely(p == PROC_RUN)) KERNEL$SUICIDE("SHUTDOWN_FINISH: SHUTTING DOWN RUNNING PROCESS, FLAGS %X", p->flags);

	ZAP_WIRED_ENTITIES(p);

	RAISE_SPL(SPL_VSPACE);
	if (__unlikely(p->ioq.q_node_usage != 0)) {
		PREFETCH_TAG *io;
	/*__debug_printf("shutdown w1: %s\n", p->jobname);*/
		XLIST_FOR_EACH(io, &p->prefetch, PREFETCH_TAG, list)
			KERNEL$CIO((IORQ *)io);
		LOWER_SPL(SPL_DEV);
		return;
	}
	if (__unlikely(!XLIST_EMPTY(&p->prefetch)))
		KERNEL$SUICIDE("SHUTDOWN_PROCESS: PREFETCH LIST NOT EMPTY");

	if (__unlikely(p->writeback != 0)) {
	/*__debug_printf("shutdown w2: %s\n", p->jobname);*/
		LOWER_SPL(SPL_DEV);
		return;
	}
	LOWER_SPL(SPL_DEV);

	/* no return since here */
	/*__debug_printf("shutdown finish: %s\n", p->jobname);*/

	for (i = 0; i <= p->handle_hash_mask; i++) while ((h = LIST_STRUCT(p->handles[i].next, HANDLE, proc_hash)) != LIST_STRUCT(&KERNEL$LIST_END, HANDLE, proc_hash)) {
		RELEASE_HANDLE(h);
	}
	FREE_RESERVED_HANDLES(p, 0);

	*wq = NULL;
	RAISE_SPL(SPL_VSPACE);
	if (__unlikely(p == KERNEL$PROC_ACCOUNT)) {
		KERNEL$SWITCH_PROC_ACCOUNT(p->parent);
	}
	DEL_FROM_LIST(&p->child_entry);
	LOWER_SPL(SPL_DEV);

#if __DEBUG >= 2
	{
		UIORQ *u;
		i = 0;
		XLIST_FOR_EACH(h, &p->reserved_handle_list, HANDLE, proc_hash) i++;
		if (__unlikely(i != RESERVED_HANDLES))
			KERNEL$SUICIDE("SHUTDOWN_FINISH: RESERVED HANDLES LEAKED, COUNT %d, SHOULD BE %d", i, RESERVED_HANDLES);
		XLIST_FOR_EACH(h, &p->reserved_handle_list, HANDLE, proc_hash) if (__unlikely(h < p->reserved_handles) || __unlikely(h >= &p->reserved_handles[RESERVED_HANDLES]))
			KERNEL$SUICIDE("SHUTDOWN_FINISH: INVALID RESERVED HANDLE ON LIST, PROC %p, HANDLE %p", p, h);
		i = 0;
		XLIST_FOR_EACH(u, &p->reserved_uiorq_list, UIORQ, list) i++;
		if (__unlikely(i != RESERVED_UIORQS))
			KERNEL$SUICIDE("SHUTDOWN_FINISH: RESERVED UIORQS LEAKED, COUNT %d, SHOULD BE %d", i, RESERVED_UIORQS);
		XLIST_FOR_EACH(u, &p->reserved_uiorq_list, UIORQ, list) if (__unlikely(u < p->reserved_uiorqs) || __unlikely(u >= &p->reserved_uiorqs[RESERVED_UIORQS]))
			KERNEL$SUICIDE("SHUTDOWN_FINISH: INVALID RESERVED UIORQ ON LIST, PROC %p, UIORQ %p", p, u);
	}
#endif

	QFREE(&p->parent->procq, 1, proc_procq_isroot, proc_procq_parent, Q_NULL_CALL);
	WQ_WAKE_ALL(&p->shutdown);
	WQ_WAKE_ALL(&p->ioq_wait);
	WQ_WAKE_ALL(&p->free_resources);

	QDONE6(&p->procq, "PROCQ", &p->hq, "HQ", &p->uq, "UQ", &p->ioq, "IOQ", &p->pgtblq, "PGTBLQ", &p->vmq, "VMQ");

	SDONE(&p->sch, "SCH");
	for (i = 0; i < max_iosched; i++) if (__BT(ioschedalloc, i)) {
		SDDONE(&p->ios[i].sch, "IOSCH");
		/*if (__unlikely(!LIST_EMPTY(&p->ios[i].queue))) KERNEL$SUICIDE("SHUTDOWN_FINISH: IO QUEUE NOT EMPTY");*/
	}

	{
		unsigned long sh;
		UNIFY_ALLOC_RATE(p->parent);
		sh = (p->alloc_rate_time - p->created_alloc_rate_time) >> __BSR_CONST(ALLOC_FORGET_TIME);
		if (__unlikely(sh >= sizeof(unsigned long) * 8)) sh = sizeof(unsigned long) * 8 - 1;
		if (__unlikely((p->parent->alloc_rate += p->alloc_rate - (p->created_alloc_rate >> sh)) > MAXLONG)) p->parent->alloc_rate = MAXLONG;
	}

	if (__unlikely(p->fork_parent != NULL))
		KERNEL$SUICIDE("SHUTDOWN_FINISH: PROCESS HAS STILL FORKED PARENT");
	if (__unlikely(p->fork_child != NULL))
		KERNEL$SUICIDE("SHUTDOWN_FINISH: PROCESS HAS STILL FORKED CHILD");

	if (__unlikely(p->on_reclaim_list)) DEL_FROM_LIST(&p->reclaim_list), p->on_reclaim_list = 0;

	pa = p->parent;

	RAISE_SPL(SPL_CACHE);
	KERNEL$CACHE_REMOVE_VM_ENTITY(&p->vme);
	LOWER_SPL(SPL_DEV);
	ARCH_PROC_DESTROY(p);
	if (__likely(PROC_CURRENT == p)) {
#if __DEBUG >= 1
		if (__unlikely(PROC_CURRENT_LOCK))
			KERNEL$SUICIDE("SHUTDOWN_FINISH: PROC_CURRENT LOCKED WHILE IT SHOULDN'T BE (%d)", PROC_CURRENT_LOCK);
#endif
		SET_PROC_CURRENT(pa);
	}
	ARCH_PROC_FREE(p);
	__slfree(p);
	if (__unlikely(pa->flags & PR_RUNDOWN)) SHUTDOWN_PROCESS(pa, (void *)&KERNEL$LIST_END);
}

extern AST_STUB XCPT_IORQ_FAILURE;
extern IO_STUB XCPT_IORQ_SUCCESS;

DECL_AST(SOFTFAULT_IORQ_FAILURE, SPL_DEV, IORQ)
{
	PROC *p = GET_STRUCT(RQ, PROC, fault_iorq);
#if __DEBUG >= 1
	if (__unlikely((p->fault_iorq.status & RQS_PROC_MASK) == RQS_PROC))
		KERNEL$SUICIDE("SOFTFAULT_IORQ_FAILURE: ACTIVE REQUEST, STATUS %lX", p->fault_iorq.status);
#endif
	if (__unlikely(p->fault_iorq.status == -EINTR)) {
		/*static u_jiffies_lo_t last = 0;
		u_jiffies_lo_t j = KERNEL$GET_JIFFIES_LO();
		last = j;*/
		PROC_UNBLOCK(p);
		RETURN;
	}
	p->xcpt_type = !(p->fault_access & PF_WRITE) ? XCPT_RPF : XCPT_WPF;
	p->xcpt_address = p->fault_address;
	p->xcpt_error = p->fault_iorq.status;
	if (__unlikely(p->fault_proc != p)) p->flags |= PR_XCPT_OTHERPROC;
	p->fault_iorq.fn = XCPT_IORQ_FAILURE;
	p->fault_iorq.tmp1 = (unsigned long)XCPT_IORQ_SUCCESS;
	RETURN_IORQ(&p->fault_iorq, XCPT_IORQ_SUCCESS);
}

DECL_IOCALL(FAULT_IORQ_SUCCESS, SPL_DEV, IORQ)
{
	PROC *p = GET_STRUCT(RQ, PROC, fault_iorq);
	if (__unlikely(KERNEL$LOCKUP_LEVEL >= LOCKUP_LEVEL_ALL_IORQS)) {
/* in case the driver constantly fails get_page and succeeds with
   vspace_get_pagein_rq, do not lockup hard, allow the process to be killed */
		goto do_unblock;
	}
	/*__debug_printf("fis.");*/
	if (__likely(!VM_FAULT(p->fault_proc, p->fault_address, p->fault_access, &p->fault_iorq))) {
		do_unblock:
		PROC_UNBLOCK(p);
		RETURN;
	}
	/*__debug_printf("block!");*/
	RETURN;
}

DECL_AST(XCPT_IORQ_FAILURE, SPL_DEV, IORQ)
{
	PROC *p = GET_STRUCT(RQ, PROC, fault_iorq);
#if __DEBUG >= 1
	if (__unlikely((p->fault_iorq.status & RQS_PROC_MASK) == RQS_PROC))
		KERNEL$SUICIDE("XCPT_IORQ_FAILURE: ACTIVE REQUEST, STATUS %lX", p->fault_iorq.status);
#endif
	if (__likely(!p->error_msg[0])) {
		_snprintf(p->error_msg, __MAX_STR_LEN, "FATAL PAGE FAULT WHEN DELIVERING %s%s%s", __exceptionmsg(p->xcpt_type, p->xcpt_ip, p->xcpt_address, p->xcpt_error, 0), RQ->status ? ": ERROR: " : "", RQ->status ? strerror(-RQ->status) : "");
		p->error = RQ->status;
	}
	p->flags &= ~PR_BLOCKREASON;
	SHUTDOWN_PROCESS(p, (void *)&KERNEL$LIST_END);
	RETURN;
}

	/* !!! FIXME: remove */
static void dumpxcpt(PROC *p)
{
	unsigned long eax, ecx, edx, ebx, esp, ebp, esi, edi, eflags, eip;
	unsigned es, cs, ss, ds, fs, gs;
	unsigned spl, ppl, bpl;
	int i;
	uread_long(p, KUPLACE(UDATA_EAX), eax, goto skip_reg;);
	uread_long(p, KUPLACE(UDATA_ECX), ecx, goto skip_reg;);
	uread_long(p, KUPLACE(UDATA_EDX), edx, goto skip_reg;);
	uread_long(p, KUPLACE(UDATA_EBX), ebx, goto skip_reg;);
	uread_long(p, KUPLACE(UDATA_ESP), esp, goto skip_reg;);
	uread_long(p, KUPLACE(UDATA_EBP), ebp, goto skip_reg;);
	uread_long(p, KUPLACE(UDATA_ESI), esi, goto skip_reg;);
	uread_long(p, KUPLACE(UDATA_EDI), edi, goto skip_reg;);
	uread_long(p, KUPLACE(UDATA_EFLAGS), eflags, goto skip_reg;);
	uread_long(p, KUPLACE(UDATA_EIP), eip, goto skip_reg;);
	uread_int(p, KUPLACE(UDATA_ES), es, goto skip_reg;);
	uread_int(p, KUPLACE(UDATA_CS), cs, goto skip_reg;);
	uread_int(p, KUPLACE(UDATA_SS), ss, goto skip_reg;);
	uread_int(p, KUPLACE(UDATA_DS), ds, goto skip_reg;);
	uread_int(p, KUPLACE(UDATA_FS), fs, goto skip_reg;);
	uread_int(p, KUPLACE(UDATA_GS), gs, goto skip_reg;);
	uread_int(p, KUPLACE(UDATA_SPL), spl, goto skip_reg;);
	uread_int(p, KUPLACE(UDATA_PPL), ppl, goto skip_reg;);
	uread_int(p, KUPLACE(UDATA_BPL), bpl, goto skip_reg;);
	es &= 0xffff;
	ds &= 0xffff;
	fs &= 0xffff;
	gs &= 0xffff;
	__debug_printf("EIP: %08lX  EFLAGS: %08lX\nEAX: %08lX  ECX: %08lX  EDX: %08lX  EBX: %08lX\nESP: %08lX  EBP: %08lX  ESI: %08lX  EDI: %08lX\nES: %04X  CS: %04X  SS: %04X  DS: %04X  FS: %04X  GS: %04X\nSPL: %08X  PPL: %08X  BPL: %08X\n", eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi, es, cs, ss, ds, fs, gs, spl, ppl, bpl);
	__debug_printf("STACK DUMP:\n");
	for (i = 0; i < 64; i++) {
		unsigned long val;
		uread_long(p, esp + i * sizeof(unsigned long), val, goto skip_reg;);
		__debug_printf("%08lX%s", val, (i & 7) == 7 ? "\n" : "  ");
	}
	__debug_printf("UDATA DUMP:\n");
	for (i = UDATA_AST_QUEUES; i < UDATA_AST_QUEUES + 4096; i += 4) {
		unsigned long val;
		uread_long(p, KUPLACE(i), val, goto skip_reg;);
		__debug_printf("%08lX%s", val, ((i * 4) & 7) == 7 ? "\n" : "  ");
	}
	skip_reg:
	__debug_printf("EXCEPTION IN %s: %s\n", p->jobname, p->error_msg);
}

DECL_IOCALL(XCPT_IORQ_SUCCESS, SPL_DEV, IORQ)
{
	int hndlr;
	unsigned long pf;
	PROC *p = GET_STRUCT(RQ, PROC, fault_iorq);
	rd1:
	uread_8(p, KUPLACE(UDATA_STRUCT + OFF_UDATA_xcpt_available), hndlr, {
		if (!VM_FAULT(p, (void *)KUPLACE(UDATA_STRUCT + OFF_UDATA_xcpt_available), PF_WRITE | PF_SWAPPER, &p->fault_iorq)) goto rd1;
		RETURN;
	});
	if (__unlikely(!hndlr) || __unlikely(p->flags & PR_XCPT_OTHERPROC)) {
		if (__likely(!p->error_msg[0])) {
			if (__likely(!(p->flags & PR_XCPT_OTHERPROC))) {
				_snprintf(p->error_msg, __MAX_STR_LEN, "UNHANDLED %s", __exceptionmsg(p->xcpt_type, p->xcpt_ip, p->xcpt_address, p->xcpt_error, 0));
			} else {
				_snprintf(p->error_msg, __MAX_STR_LEN, "WHEN ACCESSING PARENT PROCESS %s", __exceptionmsg(p->xcpt_type, p->xcpt_ip, p->xcpt_address, p->xcpt_error, 0));
			}
			p->error = -EFAULT;
			dumpxcpt(p);
		}
		p->flags &= ~PR_BLOCKREASON;
		SHUTDOWN_PROCESS(p, (void *)&KERNEL$LIST_END);
		RETURN;
	}
	/*if ((unsigned long)p->xcpt_ip < 0xBFFE9E00) __debug_printf("exception: %s, handled: %d.\n", __exceptionmsg(p->xcpt_type, p->xcpt_ip, p->xcpt_address, p->xcpt_error, 0), hndlr);*/
	rd15:
	uwrite_ptr(p, KUPLACE(UDATA_STRUCT + OFF_UDATA_xcpt_return), p->xcpt_ip, {
		if (!VM_FAULT(p, (void *)KUPLACE(UDATA_STRUCT + OFF_UDATA_xcpt_return), PF_WRITE | PF_SWAPPER, &p->fault_iorq)) goto rd15;
		RETURN;
	});
	rd2:
	uwrite_long(p, KUPLACE(UDATA_STRUCT + OFF_UDATA_xcpt_type), p->xcpt_type, {
		if (!VM_FAULT(p, (void *)KUPLACE(UDATA_STRUCT + OFF_UDATA_xcpt_type), PF_WRITE | PF_SWAPPER, &p->fault_iorq)) goto rd2;
		RETURN;
	});
	rd3:
	uwrite_ptr(p, KUPLACE(UDATA_STRUCT + OFF_UDATA_xcpt_address), p->xcpt_address, {
		if (!VM_FAULT(p, (void *)KUPLACE(UDATA_STRUCT + OFF_UDATA_xcpt_address), PF_WRITE | PF_SWAPPER, &p->fault_iorq)) goto rd3;
		RETURN;
	});
	rd4:
	uwrite_long(p, KUPLACE(UDATA_STRUCT + OFF_UDATA_xcpt_error), p->xcpt_error, {
		if (!VM_FAULT(p, (void *)KUPLACE(UDATA_STRUCT + OFF_UDATA_xcpt_error), PF_WRITE | PF_SWAPPER, &p->fault_iorq)) goto rd4;
		RETURN;
	});
	rd5:
	pf = PROC_HANDLE_XCPT(p);
	if (__unlikely(pf != 0)) {
		if (!VM_FAULT(p, (void *)(pf & ~1UL), (pf & PF_WRITE) | PF_SWAPPER, &p->fault_iorq)) goto rd5;
		RETURN;
	}
	PROC_UNBLOCK(p);
	RETURN;
}

int PROC_VM_FAULT(PROC *p, void *addr, int acc)
{
#if __DEBUG >= 1
	if (__unlikely(p->flags & PR_BLOCKREASON))
		KERNEL$SUICIDE("PROC_VM_FAULT: ALREADY BLOCKED (FLAGS %X)", p->flags);
#endif
#if __DEBUG >= 2 && defined(__GNUC__)
	p->xcpt_ip = __builtin_return_address(0);
#else
	p->xcpt_ip = 0;
#endif
	p->fault_proc = p;
	p->fault_address = addr;
	p->fault_access = acc;
	p->fault_iorq.fn = SOFTFAULT_IORQ_FAILURE;
	p->fault_iorq.tmp1 = (unsigned long)FAULT_IORQ_SUCCESS;
	p->fault_iorq.status = RQS_PROCESSING;
	if (__likely(!VM_FAULT(p, addr, acc, &p->fault_iorq))) return 0;
	PROC_BLOCK(p, PR_SOFTFAULT);
	return 1;
}

int PROC_OTHER_VM_FAULT(PROC *p, PROC *pp, void *addr, int acc)
{
#if __DEBUG >= 1
	if (__unlikely(p->flags & PR_BLOCKREASON))
		KERNEL$SUICIDE("PROC_VM_FAULT: ALREADY BLOCKED (FLAGS %X)", p->flags);
#endif
#if __DEBUG >= 2 && defined(__GNUC__)
	p->xcpt_ip = __builtin_return_address(0);
#else
	p->xcpt_ip = 0;
#endif
	p->fault_proc = pp;
	p->fault_address = addr;
	p->fault_access = acc;
	p->fault_iorq.fn = SOFTFAULT_IORQ_FAILURE;
	p->fault_iorq.tmp1 = (unsigned long)FAULT_IORQ_SUCCESS;
	p->fault_iorq.status = RQS_PROCESSING;
	if (__likely(!VM_FAULT(pp, addr, acc, &p->fault_iorq))) return 0;
	PROC_BLOCK(p, PR_SOFTFAULT);
	return 1;
}

DECL_IOCALL(PROC_WAIT_SUCCESS, SPL_DEV, IORQ)
{
	PROC *p = GET_STRUCT(RQ, PROC, fault_iorq);
	PROC_UNBLOCK(p);
	RETURN;
}

DECL_AST(PROC_WAIT_INTERRUPTED, SPL_DEV, IORQ)
{
	PROC *p = GET_STRUCT(RQ, PROC, fault_iorq);
#if __DEBUG >= 1
	if (__unlikely((p->fault_iorq.status & RQS_PROC_MASK) == RQS_PROC))
		KERNEL$SUICIDE("PROC_WAIT_INTERRUPTED: ACTIVE REQUEST, STATUS %lX", p->fault_iorq.status);
#endif
	RETURN_IORQ(&p->fault_iorq, PROC_WAIT_SUCCESS);
}

void PROC_WAIT(PROC *p, WQ *wq)
{
	p->fault_iorq.fn = PROC_WAIT_INTERRUPTED;
	p->fault_iorq.tmp1 = (unsigned long)PROC_WAIT_SUCCESS;
	p->fault_iorq.status = RQS_PROCESSING;
	WQ_WAIT_F(wq, &p->fault_iorq);
	LOWER_SPL(SPL_DEV);
	PROC_BLOCK(p, PR_WQ_WAIT);
}


__finline__ int SYSCALL_RETURN(PROC *p, unsigned long ret)
{
	unsigned long vf;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV)))
		KERNEL$SUICIDE("SYSCALL_RETURN AT SPL %08X", KERNEL$SPL);
#endif
	again:
	if (__likely(!(vf = ARCH_SYSCALL_RETURN(p, ret)))) return 0;
	if (__likely(vf != 1))
		if (__likely(!PROC_VM_FAULT(p, (void *)vf, PF_WRITE))) goto again;
	return 1;
}

static __finline__ unsigned long SYSCALL_RETURN_ANY_SPL(PROC *p, unsigned long ret)
{
	return ARCH_SYSCALL_RETURN(p, ret);
}

/*
void dumpx(PROC *p)
{
	unsigned long data;
	uread_32(p, 0x438010, data, goto skip2;);
	__debug_printf("0x438010: %x\n", data);
	skip2:;
}
*/

void VM_FAULT_EXCEPTION(void *addr, int wr, void *ip)
{
	PROC *p = PROC_RUN;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV)))
		KERNEL$SUICIDE("VM_FAULT_EXCEPTION AT SPL %08X", KERNEL$SPL);
#endif
	/*__debug_printf("vm fault on %p(%d), eip=%p, proc=%p.\n", addr, wr, ip, p);
		uread_32(p, KUPLACE(UDATA_ESP), *(int *)&KERNEL$LIST_END, goto skip2;);
	__debug_printf("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x, esp: %08x, ebp: %08x, esi: %08x, edi: %08x, spl: %08x, ppl: %08x\n", *(int *)KUPLACE(UDATA_EAX), *(int *)KUPLACE(UDATA_EBX), *(int *)KUPLACE(UDATA_ECX), *(int *)KUPLACE(UDATA_EDX), *(int *)KUPLACE(UDATA_ESP), *(int *)KUPLACE(UDATA_EBP), *(int *)KUPLACE(UDATA_ESI), *(int *)KUPLACE(UDATA_EDI), *(int *)KUPLACE(UDATA_SPL), *(int *)KUPLACE(UDATA_PPL));*/
	/*{
		unsigned long esp, data;
		int i;
		uread_32(p, KUPLACE(UDATA_ESP), esp, goto skip;);
		__debug_printf("esp %x: ", esp);
		for (i = 0; i < 100; i++) {
			unsigned long val;
			uread_32(p, esp, val, goto skip;);
			__debug_printf("%x, ", val);
			esp += 4;
		}
		__debug_printf("\n");
		skip:;
		dumpx(p);
	}*/
	/*skip2:*/
	if (__unlikely(((unsigned long)addr & ~(unsigned long)(PG_SIZE * PG_BANK - 1)) == (unsigned long)VM_KERNEL_COPY_OF_LAST_BANK * PG_SIZE * PG_BANK))
		addr = (char *)addr - (unsigned long)(VM_KERNEL_COPY_OF_LAST_BANK - __KERNEL_USER_VBANKS + 1) * PG_SIZE * PG_BANK;
	p->xcpt_ip = ip;
	p->fault_proc = p;
	p->fault_address = addr;
	p->fault_access = wr;
	p->fault_iorq.fn = SOFTFAULT_IORQ_FAILURE;
	p->fault_iorq.tmp1 = (unsigned long)FAULT_IORQ_SUCCESS;
	p->fault_iorq.status = RQS_PROCESSING;
	if (__likely(!VM_FAULT(p, addr, wr, &PROC_RUN->fault_iorq))) {
		goto ret;
	}
	PROC_BLOCK(p, PR_SOFTFAULT);
	ret:
	LOWER_SPL(SPL_USER);
	JMP_IDLE_LOOP();
}

void USER_EXCEPTION(unsigned long except, void *addr, void *ip)
{
	PROC *p = PROC_RUN;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV)))
		KERNEL$SUICIDE("USER_EXCEPTION AT SPL %08X", KERNEL$SPL);
#endif
	/*__debug_printf("except: %lu, %p, %p\n", except, addr, ip);*/
	PROC_BLOCK(p, PR_HARDFAULT);
	p->xcpt_type = except;
	p->xcpt_address = addr;
	p->xcpt_ip = ip;
	p->xcpt_error = 0;
	p->fault_iorq.fn = XCPT_IORQ_FAILURE;
	p->fault_iorq.tmp1 = (unsigned long)XCPT_IORQ_SUCCESS;
	CALL_IORQ(&p->fault_iorq, XCPT_IORQ_SUCCESS);
	LOWER_SPL(SPL_USER);
	JMP_IDLE_LOOP();
}

DECL_IOCALL(IORQ_POST_AST, SPL_DEV, IORQ)
{
	UIORQ *u = GET_STRUCT(RQ, UIORQ, pfw);
	RETURN_AST(&u->u.iorq);
}

DECL_AST(IORQ_PAGE_ERROR, SPL_DEV, IORQ)
{
	UIORQ *u = GET_STRUCT(RQ, UIORQ, pfw);
	PROC *p = u->proc;
#if __DEBUG >= 1
	if (__unlikely((u->pfw.status & RQS_PROC_MASK) == RQS_PROC))
		KERNEL$SUICIDE("IORQ_PAGE_ERROR: ACTIVE REQUEST, STATUS %lX", u->pfw.status);
#endif
	if (__likely(!p->error_msg[0])) {
		_snprintf(p->error_msg, __MAX_STR_LEN, "ERROR POSTING AST %p (STATUS %ld): %s", u->iorq, u->u.iorq.status, strerror(-RQ->status));
		p->error = RQ->status;
		dumpxcpt(p);
	}
	/* order is important! PROC_FREE_UIORQ may free the process, however
	   SHUTDOWN_PROCESS never frees it if it has pending UIORQ */
	/* also, do not call SHUTDOWN_PROCESS needlessly --- to avoid quadratic
	   complexity w.r.t. number of UIORQs */
	if (!(p->flags & PR_RUNDOWN)) SHUTDOWN_PROCESS(p, (void *)&KERNEL$LIST_END);
	PROC_FREE_UIORQ(u);
	RETURN;
}

static void SYSCALL_UNMAP(unsigned long sys, unsigned long start, unsigned long len)
{
	WQ *wq;
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, start, len);*/
	if (__unlikely((wq = VM_ARCH_UNMAP_RANGE(PROC_RUN, start, len)) != NULL)) {
		PROC_WAIT(PROC_RUN, wq);
		goto end;
	}
	if (sys & 1) {
		HANDLE *h;
		int n;
		recall:
		PROC_RUN->fault_iorq.fn = PROC_WAIT_INTERRUPTED;
		PROC_RUN->fault_iorq.tmp1 = (unsigned long)PROC_WAIT_SUCCESS;
		PROC_RUN->fault_iorq.status = RQS_PROCESSING;
		h = GET_HANDLE(PROC_RUN, SWAPPER_HANDLE, 0, &PROC_RUN->fault_iorq);
		if (__unlikely(!h)) {
			PROC_BLOCK(PROC_RUN, PR_SOFTFAULT);
			goto end;
		}
		if (__unlikely(__IS_ERR(h))) {
			LOWER_SPL(SPL_DEV);
			SYSCALL_RETURN(PROC_RUN, __PTR_ERR(h));
			goto end;
		}
		n = h->op->swap_op(h, SWAP_OP_ZAP_PAGES, (__v_off)start, (__v_off)len, PROC_RUN);
		LOWER_SPL(SPL_DEV);
		if (__unlikely(n)) {
			if (__unlikely(n == 2)) {
				PROC_WAIT(PROC_RUN, &KERNEL$FREEMEM_WAIT);
				goto end;
			}
			goto recall;
		}
	}
	SYSCALL_RETURN(PROC_RUN, 0);
	end:
	END_SYSCALL;
}

static void SYSCALL_EXIT(unsigned long sys, unsigned long error, unsigned long msg)
{
	PROC *p;
	void *v;
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, error, msg);*/
	if (msg) {
		reread:
		uread_str(PROC_RUN, msg, PROC_RUN->error_msg, __MAX_STR_LEN, 0, v, {
			PROC_RUN->error_msg[0] = 0;
			if (__likely(!PROC_VM_FAULT(PROC_RUN, v, 0))) goto reread;
			goto end;
		});
		if (__unlikely(__IS_ERR(v))) PROC_RUN->error_msg[0] = 0;
	}
	p = PROC_RUN;
	if (__unlikely(SYSCALL_RETURN(PROC_RUN, 0))) {
		p->error_msg[0] = 0;
		goto end;
	}
	
	if (PROC_RUN == PROC_FPU) FLUSH_FPU();	/* not needed, just per4mance
		enhancement. we can flush fpu now because we know there's
		nobody under us (note that kernel might use fpu too although
		any kernel code using it needs to save/restore fpu state) ...
		but we don't know it in VM_ARCH code. that causes delayed
		flushing and waiting on wait queue */
	
	PROC_RUN->error = error;
	if (__likely(error != -ESTOPPED) && __likely(error != -EBACKGROUND)) {
			/* the process won't run anymore */
		if (__unlikely(PROC_RUN->fork_parent != NULL)) {
			PROC_RUN->fork_parent->fork_child = NULL;
			PROC_RUN->fork_parent = NULL;
		}
	}
	SHUTDOWN_PROCESS(PROC_RUN, (void *)&KERNEL$LIST_END);
	end:
	END_SYSCALL;
}

DECL_IOCALL(OPEN_SUCCESS, SPL_DEV, IORQ)
{
	UIORQ *u = GET_STRUCT(RQ, UIORQ, u.openrq);
	HANDLE *h;
	CHECK_RQ_STATE((IORQ *)&u->u.openrq);
	h = GET_HANDLE(u->proc, u->u.openrq.handle, u->u.openrq.flags, (IORQ *)&u->u.openrq);
	if (__unlikely(__IS_ERR(h))) {
		CHECK_RQ_STATE((IORQ *)&u->u.openrq);
		u->u.openrq.status = __PTR_ERR(h);
		RETURN_AST(&u->u.openrq);
	}
	if (__unlikely(!h)) RETURN;
	CHECK_RQ_STATE((IORQ *)&u->u.openrq);
	if (__likely(!(u->u.openrq.flags & _O_CLOSE))) {
		if (__unlikely(h->op->open_handle != NULL) && __likely(!(u->u.openrq.flags & _O_NOOPEN_CALL))) h->op->open_handle(h, u->u.openrq.flags);
		u->u.openrq.status = u->u.openrq.handle;
	} else {
		KERNEL$DETACH_HANDLE(h), h->file_addrspace = NULL;
		u->u.openrq.status = 0;
	}
	RETURN_AST(&u->u.openrq);
}

DECL_AST(OPEN_AST, SPL_DEV, IORQ)
{
	unsigned long pf;
	UIORQ *u = GET_STRUCT(RQ, UIORQ, u.openrq);
#if __DEBUG >= 1
	if (__unlikely((u->u.openrq.status & RQS_PROC_MASK) == RQS_PROC))
		KERNEL$SUICIDE("OPEN_AST: ACTIVE REQUEST, STATUS %lX", u->u.openrq.status);
#endif
	copyagain:
	pf = PROC_POST_IO(u->proc, u->iorq, u->u.iorq.status);
	if (__unlikely(pf != 0)) {
		u->pfw.fn = IORQ_PAGE_ERROR;
		u->pfw.status = RQS_PROCESSING;
		u->pfw.tmp1 = (unsigned long)IORQ_POST_AST;
		if (!VM_FAULT(u->proc, (void *)(pf & ~1UL), (pf & PF_WRITE) | PF_SWAPPER, &u->pfw)) goto copyagain;
		RETURN;
	}
	PROC_FREE_UIORQ(u);
	RETURN;
}

static void SYSCALL_OPEN(unsigned long sys, unsigned long rq, unsigned long n)
{
	WQ *wq;
	UIORQ *u;
	HANDLE *h;
	int rename_handle;
	CHECK_RQ_ALIGN(rq, IORQ_ALIGN, end);
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, rq, n);*/
	if (__unlikely((sys & _O_RENAME) != 0)) {
		rd1:
		uread_int(PROC_RUN, &((OPENRQ *)rq)->rename_handle, rename_handle, {
			if (__likely(!PROC_VM_FAULT(PROC_RUN, &((OPENRQ *)rq)->rename_handle, PF_SWAPPER))) goto rd1;
			goto end;
		});
	} else rename_handle = -1;
	u = PROC_ALLOC_UIORQ(PROC_RUN, &wq);
	if (__unlikely(!u)) {
		PROC_WAIT(PROC_RUN, wq);
		goto end;
	}
	if (__unlikely(SYSCALL_RETURN(PROC_RUN, 0))) {
		PROC_FREE_UIORQ(u);
		goto end;
	}
	u->iorq = (IORQ *)rq;
	u->u.openrq.fn = OPEN_AST;
	u->u.openrq.tmp1 = (unsigned long)OPEN_SUCCESS;
	u->u.openrq.status = RQS_PROCESSING;
	u->u.openrq.handle = n;
	u->u.openrq.flags = sys & SYS_EXTENDED_PARAM;
	u->u.openrq.rename_handle = rename_handle;
	h = GET_HANDLE(PROC_RUN, n, sys & SYS_EXTENDED_PARAM, (IORQ *)&u->u.openrq);
	/*__debug_printf("GET_HANDLE: %p.", h);*/
	if (__unlikely(__IS_ERR(h))) {
		CHECK_RQ_STATE((IORQ *)&u->u.openrq);
		u->u.openrq.status = __PTR_ERR(h);
		goto post_err;
	}
	if (__unlikely(!h)) goto end;
	CHECK_RQ_STATE((IORQ *)&u->u.openrq);
	if (__likely(!(u->u.openrq.flags & _O_CLOSE))) {
		if (__unlikely(h->op->open_handle != NULL) && __likely(!(u->u.openrq.flags & _O_NOOPEN_CALL))) h->op->open_handle(h, u->u.openrq.flags);
		u->u.openrq.status = n;
	} else {
		KERNEL$DETACH_HANDLE(h), h->file_addrspace = NULL;
		LOWER_SPL(SPL_DEV);
		u->u.openrq.status = 0;
	}
	post_err:
	LOWER_SPL(SPL_USER);
	CALL_AST(&u->u.openrq);
	JMP_IDLE_LOOP();
	return;

	end:
	END_SYSCALL;
}

DECL_IOCALL(CLOSE_SUCCESS, SPL_DEV, CLOSERQ)
{
	int r;
	UIORQ *u = GET_STRUCT(RQ, UIORQ, u.closerq);
	HANDLE *h;
	GET_HANDLE_FAST(h, u->proc, u->u.closerq.h, 0, (IORQ *)&u->u.closerq, {
		if (__unlikely(__IS_ERR(h))) {
			u->u.closerq.status = __PTR_ERR(h);
			RETURN_AST(&u->u.closerq);
		}
		if (__unlikely(!h)) RETURN;
	});
	if (__unlikely(!h->op->close_handle)) r = 0;
	else {
		r = h->op->close_handle(h, (IORQ *)&u->u.closerq);
		if (__unlikely(r > 0)) RETURN;
	}
	u->u.closerq.status = r;
	RETURN_AST(&u->u.closerq);
}

DECL_AST(CLOSE_AST, SPL_DEV, IORQ)
{
	unsigned long pf;
	UIORQ *u = GET_STRUCT(RQ, UIORQ, u.closerq);
	HANDLE *h;
#if __DEBUG >= 1
	if (__unlikely((u->u.closerq.status & RQS_PROC_MASK) == RQS_PROC))
		KERNEL$SUICIDE("CLOSE_AST: ACTIVE REQUEST, STATUS %lX", u->u.closerq.status);
#endif
	copyagain:
	pf = PROC_POST_IO(u->proc, u->iorq, u->u.closerq.status);
	if (__unlikely(pf != 0)) {
		u->pfw.fn = IORQ_PAGE_ERROR;
		u->pfw.status = RQS_PROCESSING;
		u->pfw.tmp1 = (unsigned long)IORQ_POST_AST;
		if (!VM_FAULT(u->proc, (void *)(pf & ~1UL), (pf & PF_WRITE) | PF_SWAPPER, &u->pfw)) goto copyagain;
		RETURN;
	}
	h = GET_HANDLE_TEST(u->proc, u->u.closerq.h, 1);
	if (__likely(h != NULL)) KERNEL$DETACH_HANDLE(h), h->file_addrspace = NULL;
	LOWER_SPL(SPL_DEV);
	PROC_FREE_UIORQ(u);
	RETURN;
}

static void SYSCALL_CLOSE(unsigned long sys, unsigned long rq, unsigned long n)
{
	WQ *wq;
	UIORQ *u;
	HANDLE *h;
	/*__debug_printf("syscall: %lx, %lx, %lx, depth %d\n", sys, rq, n, PROC_RUN->depth);*/
	CHECK_RQ_ALIGN(rq, IORQ_ALIGN, end);
	h = GET_HANDLE_TEST(PROC_RUN, n, 0);
	if (__likely(h != NULL)) {
		if (__likely(!h->op->close_handle)) {
			KERNEL$DETACH_HANDLE(h), h->file_addrspace = NULL;
			LOWER_SPL(SPL_DEV);
			SYSCALL_RETURN(PROC_RUN, 1);
			goto end;
		}
		LOWER_SPL(SPL_DEV);
	}
	u = PROC_ALLOC_UIORQ(PROC_RUN, &wq);
	if (__unlikely(!u)) {
		PROC_WAIT(PROC_RUN, wq);
		goto end;
	}
	if (__unlikely(SYSCALL_RETURN(PROC_RUN, 0))) {
		PROC_FREE_UIORQ(u);
		goto end;
	}
	u->iorq = (IORQ *)rq;
	u->u.closerq.fn = CLOSE_AST;
	u->u.closerq.tmp1 = (unsigned long)CLOSE_SUCCESS;
	u->u.closerq.status = RQS_PROCESSING;
	u->u.closerq.h = n;
	if (__unlikely(!h)) {
		GET_HANDLE_FAST(h, PROC_RUN, n, 0, (IORQ *)&u->u.closerq, {
			if (__unlikely(__IS_ERR(h))) {
				u->u.closerq.status = __PTR_ERR(h);
				goto post_err;
			}
			if (__unlikely(!h)) goto end;
		});
	}
	LOWER_SPL(SPL_USER);
	CALL_IORQ(&u->u.closerq, CLOSE_SUCCESS);
	JMP_IDLE_LOOP();
	return;

	post_err:
	LOWER_SPL(SPL_USER);
	CALL_AST(&u->u.closerq);
	JMP_IDLE_LOOP();
	return;

	end:
	END_SYSCALL;
}

static void SYSCALL_FAST_CLOSE(unsigned long sys, unsigned long n, unsigned long arg3)
{
	HANDLE *h;
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, n, arg3);*/
	if (__unlikely(SYSCALL_RETURN(PROC_RUN, 0))) goto end;
	h = GET_HANDLE_TEST(PROC_RUN, n, 1);
	if (__likely(h != NULL)) KERNEL$DETACH_HANDLE(h), h->file_addrspace = NULL;
	end:
	END_SYSCALL;
}

#define IORQ_ SIORQ
#define IORQ_AST_ SIORQ_AST
#define __FN_IORQ_AST_ __FN_SIORQ_AST
#define IORQ_RECALL_ SIORQ_RECALL
#define __FN_IORQ_RECALL_ __FN_SIORQ_RECALL
#define iorq_ siorq
#define test_flags(sys)	(HANDLE_NUM_READ << (sys & 1))
#define op(sys) if (__likely(!((sys) & 1))) CALL_IORQ_LSTAT_EXPR(&u->u.iorq_, h->op->read); else CALL_IORQ_LSTAT_EXPR(&u->u.iorq_, h->op->write)
#define rop(sys) if (__likely(!((sys) & 1))) RETURN_IORQ_LSTAT(&u->u.iorq_, h->op->read); else RETURN_IORQ_LSTAT(&u->u.iorq_, h->op->write)
#define POST PROC_POST_SIO
#define GET PROC_GET_SIO
#define SYSCALL_NAME SYSCALL_READWRITE

#include "PROCIO.I"

#define IORQ_ AIORQ
#define IORQ_AST_ AIORQ_AST
#define __FN_IORQ_AST_ __FN_AIORQ_AST
#define IORQ_RECALL_ AIORQ_RECALL
#define __FN_IORQ_RECALL_ __FN_AIORQ_RECALL
#define iorq_ aiorq
#define test_flags(sys)	(HANDLE_NUM_READ << (sys & 1))
#define op(sys) if (__likely(!((sys) & 1))) CALL_IORQ_LSTAT_EXPR(&u->u.iorq_, h->op->aread); else CALL_IORQ_LSTAT_EXPR(&u->u.iorq_, h->op->awrite)
#define rop(sys) if (__likely(!((sys) & 1))) RETURN_IORQ_LSTAT(&u->u.iorq_, h->op->aread); else RETURN_IORQ_LSTAT(&u->u.iorq_, h->op->awrite)
#define POST PROC_POST_AIO
#define GET PROC_GET_AIO
#define SYSCALL_NAME SYSCALL_AREADAWRITE

#include "PROCIO.I"

#define IORQ_ IOCTLRQ
#define IORQ_AST_ IOCTLRQ_AST
#define __FN_IORQ_AST_ __FN_IOCTLRQ_AST
#define IORQ_RECALL_ IOCTLRQ_RECALL
#define __FN_IORQ_RECALL_ __FN_IOCTLRQ_RECALL
#define iorq_ ioctlrq
#define op(sys)	CALL_IORQ_LSTAT_EXPR(&u->u.iorq_, h->op->ioctl)
#define rop(sys) RETURN_IORQ_LSTAT(&u->u.iorq_, h->op->ioctl)
#define POST PROC_POST_IOCTL
#define GET PROC_GET_IOCTL
#define SYSCALL_NAME SYSCALL_IOCTL

#include "PROCIO.I"

static void SYSCALL_BLOCK(unsigned long sys, unsigned long wq, unsigned long wqx)
{
	void *rd;
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, arg2, arg3);*/
	retry:
	CHECK_RQ_ALIGN(wq, PTR_ALIGN, end);
	uread_ptr(PROC_RUN, (void *)wq, rd, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, (void *)wq, 0))) goto retry;
		goto end;
	});
	if (__unlikely(SYSCALL_RETURN(PROC_RUN, 0))) goto end;
	if (__unlikely(rd != (void *)wqx)) goto end;
	PROC_BLOCK(PROC_RUN, PR_IO_WAIT);
	end:
	END_SYSCALL;
}

static void SYSCALL_SET_TIMER(unsigned long sys, unsigned long lo, unsigned long hi)
{
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, lo, hi);*/
	if (__unlikely(SYSCALL_RETURN(PROC_RUN, 0))) goto end;
	RAISE_SPL(SPL_TIMER);
	KERNEL$DEL_TIMER(&PROC_RUN->timer);
	KERNEL$SET_TIMER(__make64(lo, hi), &PROC_RUN->timer);
	end:
	END_SYSCALL;
}

static void PROC_TIMER_FN(TIMER *t)
{
	PROC *p;
	LOWER_SPL(SPL_TIMER);
	VOID_LIST_ENTRY(&t->list);
	p = GET_STRUCT(t, PROC, timer);
	if (__unlikely(p->timer_iorq_posted)) return;
	p->timer_iorq_posted = 1;
	CALL_IORQ(&p->timer_iorq, PROC_TIMER_IORQ);
}

DECL_IOCALL(PROC_TIMER_IORQ, SPL_DEV, IORQ)
{
	unsigned long pf;
	PROC *p = GET_STRUCT(RQ, PROC, timer_iorq);
	if (__unlikely(p->flags & PR_RUNDOWN)) {
		p->timer_iorq_posted = 0;
		SHUTDOWN_PROCESS(p, (void *)&KERNEL$LIST_END);
		RETURN;
	}
	again:
	pf = PROC_POST_TIMER(p);
	if (__unlikely(pf != 0)) {
		p->timer_iorq.fn = PROC_TIMER_FAILED;
		p->timer_iorq.tmp1 = (unsigned long)PROC_TIMER_IORQ;
		if (__likely(!VM_FAULT(p, (void *)(pf & ~1UL), (pf & PF_WRITE) | PF_SWAPPER, &p->timer_iorq))) goto again;
		RETURN;
	}
	p->timer_iorq_posted = 0;
	UNBLOCK_IO(p);
	RETURN;
}

DECL_AST(PROC_TIMER_FAILED, SPL_DEV, IORQ)
{
	PROC *p = GET_STRUCT(RQ, PROC, timer_iorq);
#if __DEBUG >= 1
	if (__unlikely((p->timer_iorq.status & RQS_PROC_MASK) == RQS_PROC))
		KERNEL$SUICIDE("PROC_TIMER_FAILED: ACTIVE REQUEST, STATUS %lX", p->timer_iorq.status);
#endif
	p->timer_iorq_posted = 0;
	if (__likely(!p->error_msg[0])) {
		_snprintf(p->error_msg, __MAX_STR_LEN, "ERROR POSTING TIMER: %s", strerror(-RQ->status));
		p->error = RQ->status;
	}
	SHUTDOWN_PROCESS(p, (void *)&KERNEL$LIST_END);
	RETURN;
}

static void SYSCALL_LN_UNBLOCK(unsigned long sys, unsigned long arg2, unsigned long arg3)
{
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, arg2, arg3);*/
	SYSCALL_RETURN(PROC_RUN, 0);
	UNBLOCK_LNT();
	END_SYSCALL;
}

static void SYSCALL_LN_LIST(unsigned long sys, unsigned long rq, unsigned long str_len)
{
	int r;
	struct ln_list_internal ln_list_internal;
	DECL_LN_LIST_STAT(1) *u_ln_list_stat = (void *)rq;
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, ptr, arg3);*/
	ln_list_internal.result_left = str_len;
	ln_list_internal.result_addr = u_ln_list_stat->result;
	ln_list_internal.proc = PROC_RUN;
	ln_list_internal.success = 0;
	ln_list_internal.j = KERNEL$GET_JIFFIES_LO();
	CHECK_RQ_ALIGN(rq, LN_LIST_STAT_ALIGN, end);
	rd1:
	uread_int(PROC_RUN, &u_ln_list_stat->depth, ln_list_internal.depth, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, &u_ln_list_stat->depth, 0))) goto rd1;
		goto end;
	});
	rd2:
	uread_int(PROC_RUN, &u_ln_list_stat->hash, ln_list_internal.hash, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, &u_ln_list_stat->hash, 0))) goto rd2;
		goto end;
	});
	rd3:
	uread_ptr(PROC_RUN, &u_ln_list_stat->ptr, ln_list_internal.ptr, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, &u_ln_list_stat->ptr, 0))) goto rd3;
		goto end;
	});
	r = get_lnm(&ln_list_internal);
	if (__unlikely(r < 0)) {
		SYSCALL_RETURN(PROC_RUN, r);
		goto end;
	}
	if (__unlikely(r == 1)) {
		/* VM fault happened */
		goto end;
	}
	wr1:
	uwrite_int(PROC_RUN, &u_ln_list_stat->depth_res, ln_list_internal.depth, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, &u_ln_list_stat->depth_res, PF_WRITE))) goto wr1;
		goto end;
	});
	wr2:
	uwrite_int(PROC_RUN, &u_ln_list_stat->hash_res, ln_list_internal.hash, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, &u_ln_list_stat->hash_res, PF_WRITE))) goto wr2;
		goto end;
	});
	wr3:
	uwrite_ptr(PROC_RUN, &u_ln_list_stat->ptr_res, ln_list_internal.ptr, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, &u_ln_list_stat->ptr_res, PF_WRITE))) goto wr3;
		goto end;
	});
	SYSCALL_RETURN(PROC_RUN, ln_list_internal.result_addr - u_ln_list_stat->result);
	
	end:
	END_SYSCALL;
}

static void SYSCALL_SPAWN(unsigned long sys, unsigned long rq, unsigned long arg3)
{
	char jobname[9];
	WQ *wq;
	KSPAWNRQ *u_ksp = (KSPAWNRQ *)rq;
	PROC *p, *pp;
	char **opts;
	int n_opts;
	void *s;
	HANDLE *h;
	unsigned long vf;
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, rq, arg3);*/
	CHECK_RQ_ALIGN((unsigned long)u_ksp, KSPAWNRQ_ALIGN, end);
	if (__unlikely(PROC_RUN->depth == MAX_PROC_DEPTH - 1)) {
		SYSCALL_RETURN(PROC_RUN, -EDQUOT);
		goto end;
	}
	alloc:
	pp = PROC_RUN;
	p = ALLOC_PROC(pp, &wq);
	/*__debug_printf("returned: %p/%p\n", p, wq);*/
	if (__unlikely(__IS_ERR(p))) {
		if (__unlikely(PROC_RUN != pp)) goto end;
		SYSCALL_RETURN(PROC_RUN, __PTR_ERR(p));
		goto end;
	}
	if (__unlikely(!p)) {
		if (__unlikely(PROC_RUN != pp)) {
			/*__debug_printf("proc run changed\n");*/
			goto end;
		}
		if (__likely(!wq)) goto alloc;
		PROC_WAIT(PROC_RUN, wq);
		goto end;
	}
	p->parent_rq = u_ksp;
	r1:
	uread_ptr(PROC_RUN, &u_ksp->lnt, p->parent_lnt, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, &u_ksp->lnt, 0))) goto r1;
		goto dealloc_end;
	});
	CHECK_RQ_ALIGN((unsigned long)p->parent_lnt, LNT_ALIGN, dealloc_end);
	r2:
	uread_int(PROC_RUN, &u_ksp->ln_mode, p->ln_mode, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, &u_ksp->ln_mode, 0))) goto r2;
		goto dealloc_end;
	});
	r3:
	uread_ptr(PROC_RUN, &u_ksp->forktable, p->parent_forktable, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, &u_ksp->forktable, 0))) goto r3;
		goto dealloc_end;
	});
	CHECK_RQ_ALIGN((unsigned long)p->parent_forktable, PTR_ALIGN, dealloc_end);
	r4:
	uread_int(PROC_RUN, &u_ksp->forktable_n, p->parent_forktable_n, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, &u_ksp->forktable_n, 0))) goto r4;
		goto dealloc_end;
	});
	r5:
	uread_ptr(PROC_RUN, &u_ksp->options, opts, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, &u_ksp->options, 0))) goto r5;
		goto dealloc_end;
	});
	CHECK_RQ_ALIGN((unsigned long)opts, PTR_ALIGN, dealloc_end);
	r6:
	uread_int(PROC_RUN, &u_ksp->n_options, n_opts, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, &u_ksp->n_options, 0))) goto r6;
		goto dealloc_end;
	});
	r7:
	if (__unlikely((s = SETUP_PROC(p, opts, n_opts)) != NULL)) {
		if (__unlikely(__IS_ERR(s))) {
			SYSCALL_RETURN(PROC_RUN, __PTR_ERR(s));
			goto dealloc_end;
		}
		if (__likely(!PROC_VM_FAULT(PROC_RUN, s, 0))) goto r7;
		goto dealloc_end;
	}
	r8:
	uread_str(PROC_RUN, &u_ksp->jobname, jobname, 9, 0, s, {
		if (__likely(!PROC_VM_FAULT(PROC_RUN, s, 0))) goto r8;
		goto dealloc_end;
	});
	if (__unlikely(__IS_ERR(s))) {
		SYSCALL_RETURN(PROC_RUN, __PTR_ERR(s));
		goto dealloc_end;
	}
	strncpy(p->jobname, jobname, 9);
	if (sys & 1) {
		PROC_RUN->fault_iorq.fn = PROC_WAIT_INTERRUPTED;
		PROC_RUN->fault_iorq.tmp1 = (unsigned long)PROC_WAIT_SUCCESS;
		PROC_RUN->fault_iorq.status = RQS_PROCESSING;
		h = GET_HANDLE(PROC_RUN, SWAPPER_HANDLE, 0, &PROC_RUN->fault_iorq);
		if (__unlikely(!h)) {
			PROC_BLOCK(PROC_RUN, PR_SOFTFAULT);
			goto dealloc_end;
		}
		if (__unlikely(__IS_ERR(h))) {
			SYSCALL_RETURN(PROC_RUN, __PTR_ERR(h));
			goto dealloc_end;
		}
	} else h = 0;	/* warning go away */
	if (__unlikely((vf = SYSCALL_RETURN_ANY_SPL(PROC_RUN, PROC_RUN->depth)) != NULL)) {
		LOWER_SPL(SPL_DEV);
		if (__likely(vf != 1))
			PROC_VM_FAULT(PROC_RUN, (void *)vf, PF_WRITE);
		goto dealloc_end;
	}
	if (sys & 1) {
		h->op->swap_op(h, SWAP_OP_FORK, jobname);
		LOWER_SPL(SPL_DEV);
		if (__unlikely(PROC_RUN->fork_child != NULL)) KERNEL$SUICIDE("SYSCALL_SPAWN: PROC HAS ALREADY FORK CHILD");
		p->fork_parent = PROC_RUN;
		PROC_RUN->fork_child = p;
		VM_ARCH_AFTER_FORK(PROC_RUN);
	}
	/*__debug_printf("new proc(%s)\n", p->jobname);*/
	PROC_UNBLOCK(p);
#if __DEBUG_WAIT_PAGEFAULTS > 0
	if (__unlikely(sys & 1) && (random() & 255) < __DEBUG_WAIT_PAGEFAULTS) {
		if (__likely(!(p->flags & PR_RUNDOWN))) p->error = -EINTR;
		SHUTDOWN_PROCESS(p, (WQ **)(void *)&KERNEL$LIST_END);
	}
#endif
	goto end;
	dealloc_end:
	FREE_ALLOCED_PROC(p);
	end:
	/*__debug_printf("end spawn syscall.");*/
	END_SYSCALL;
}

static void SYSCALL_CANCEL(unsigned long sys, unsigned long rq, unsigned long arg3)
{
	PROC *p;
	UIORQ *u;
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, rq, arg3);*/
	if (__unlikely(SYSCALL_RETURN(PROC_RUN, 0))) goto ret;
	XLIST_FOR_EACH(u, &PROC_RUN->uiorqs, UIORQ, list) if (__unlikely(u->iorq == (IORQ *)rq)) {
		KERNEL$CIO(&u->u.iorq);
		goto ret;
	}
	XLIST_FOR_EACH_UNLIKELY(p, &PROC_RUN->children, PROC, child_entry) if (__unlikely(p->parent_rq == (KSPAWNRQ *)rq)) {
		if (__likely(!(p->flags & PR_RUNDOWN))) p->error = -EINTR;
		SHUTDOWN_PROCESS(p, (void *)&KERNEL$LIST_END);
		goto ret;
	}
	ret:
	END_SYSCALL;
}

static int BREAK_1_PROC(PROC *p)
{
	if (__unlikely(p->flags & PR_RUNDOWN)) {
		return 0;
	}
	CLEAR_LN_CACHE(p);
	RAISE_SPL(SPL_TIMER);
	KERNEL$DEL_TIMER(&p->timer);
	p->timer.fn(&p->timer);
	LOWER_SPL(SPL_DEV);
	return 0;
}

void DO_BREAK(PROC *r, KSPAWNRQ *rq, unsigned recursive)
{
	PROC *p;
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV)))
		KERNEL$SUICIDE("DO_BREAK AT SPL %08X", KERNEL$SPL);
	XLIST_FOR_EACH(p, &r->children, PROC, child_entry) if (p->parent_rq == rq || __unlikely(!rq)) {
		if (__likely(!recursive)) {
			FOR_ALL_PROCS(p, CLEAR_LN_CACHE);
			BREAK_1_PROC(p);
		} else {
			FOR_ALL_PROCS(p, BREAK_1_PROC);
		}
		if (__likely(rq != NULL)) break;
	}
}

static void SYSCALL_BREAK(unsigned long sys, unsigned long rq, unsigned long arg3)
{
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, rq, arg3);*/
	if (__unlikely(SYSCALL_RETURN(PROC_RUN, 0))) goto end;
	DO_BREAK(PROC_RUN, (KSPAWNRQ *)rq, sys & 1);
	end:
	END_SYSCALL;
}

static void SYSCALL_DEPTH(unsigned long sys, unsigned long arg2, unsigned long arg3)
{
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, arg2, arg3);*/
	SYSCALL_RETURN(PROC_RUN, PROC_RUN->depth);
	END_SYSCALL;
}

static void SYSCALL_EXEC(unsigned long sys, unsigned long arg2, unsigned long arg3)
{
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, arg2, arg3);*/
	if (__unlikely(SYSCALL_RETURN(PROC_RUN, 0))) goto end;
	if (PROC_RUN->fork_parent) {
		PROC_RUN->fork_parent->fork_child = NULL;
		PROC_RUN->fork_parent = NULL;
	}
	end:
	END_SYSCALL;
}

static void SYSCALL_CANCEL_ALL(unsigned long sys, unsigned long arg2, unsigned long arg3)
{
	UIORQ *u;
	PROC *p = PROC_RUN;
	/*__debug_printf("syscall: %lx, %lx, %lx\n", sys, arg2, arg3);*/
	if (!XLIST_EMPTY(&p->uiorqs)) {
		XLIST_FOR_EACH(u, &p->uiorqs, UIORQ, list) KERNEL$CIO(&u->u.iorq);
		PROC_BLOCK(p, PR_IO_WAIT);
		END_SYSCALL;
	}
	SYSCALL_RETURN(p, 0);
	END_SYSCALL;
}

static void SYSCALL_FLUSH_LN_CACHE(unsigned long sys, unsigned long arg2, unsigned long arg3)
{
	FOR_ALL_PROCS(PROC_RUN, CLEAR_LN_CACHE);
	SYSCALL_RETURN(PROC_RUN, 0);
	END_SYSCALL;
}

void (*SYSCALLS[N_SYSCALLS])(unsigned long arg1, unsigned long arg2, unsigned long arg3);

void USER_SYSCALL(unsigned long arg1, unsigned long arg2, unsigned long arg3)
{
	if (__unlikely(SYSCALL_RETURN(PROC_RUN, -ENOSYS))) goto end;
	/*__debug_printf("syscall: %lx, %lx, %lx\n", arg1, arg2, arg3);*/
	end:
	END_SYSCALL;
}

static void INIT_SYSCALLS(void)
{
	int i;
	for (i = 0; i < N_SYSCALLS; i++) SYSCALLS[i] = USER_SYSCALL;
	SYSCALLS[SYS_UNMAP_RANGE >> SYSCALL_SHIFT] = SYSCALL_UNMAP;
	SYSCALLS[SYS_EXIT >> SYSCALL_SHIFT] = SYSCALL_EXIT;
	SYSCALLS[SYS_OPEN >> SYSCALL_SHIFT] = SYSCALL_OPEN;
	SYSCALLS[SYS_READ >> SYSCALL_SHIFT] = SYSCALL_READWRITE;
	SYSCALLS[SYS_AREAD >> SYSCALL_SHIFT] = SYSCALL_AREADAWRITE;
	SYSCALLS[SYS_IOCTL >> SYSCALL_SHIFT] = SYSCALL_IOCTL;
	SYSCALLS[SYS_BLOCK >> SYSCALL_SHIFT] = SYSCALL_BLOCK;
	SYSCALLS[SYS_CLOSE >> SYSCALL_SHIFT] = SYSCALL_CLOSE;
	SYSCALLS[SYS_FAST_CLOSE >> SYSCALL_SHIFT] = SYSCALL_FAST_CLOSE;
	SYSCALLS[SYS_SET_TIMER >> SYSCALL_SHIFT] = SYSCALL_SET_TIMER;
	SYSCALLS[SYS_LN_UNBLOCK >> SYSCALL_SHIFT] = SYSCALL_LN_UNBLOCK;
	SYSCALLS[SYS_LN_LIST >> SYSCALL_SHIFT] = SYSCALL_LN_LIST;
	SYSCALLS[SYS_SPAWN >> SYSCALL_SHIFT] = SYSCALL_SPAWN;
	SYSCALLS[SYS_CANCEL >> SYSCALL_SHIFT] = SYSCALL_CANCEL;
	SYSCALLS[SYS_DEPTH >> SYSCALL_SHIFT] = SYSCALL_DEPTH;
	SYSCALLS[SYS_BREAK >> SYSCALL_SHIFT] = SYSCALL_BREAK;
	SYSCALLS[SYS_EXEC >> SYSCALL_SHIFT] = SYSCALL_EXEC;
	SYSCALLS[SYS_CANCEL_ALL >> SYSCALL_SHIFT] = SYSCALL_CANCEL_ALL;
	SYSCALLS[SYS_INVD_EXTD_PAGE >> SYSCALL_SHIFT] = SYSCALL_INVD_EXTD_PAGE;
	SYSCALLS[SYS_FLUSH_LN_CACHE >> SYSCALL_SHIFT] = SYSCALL_FLUSH_LN_CACHE;
}

