#include <SPAD/AC.H>
#include <SPAD/WQ.H>
#include <SPAD/LIBC.H>
#include <SPAD/SLAB.H>
#include <KERNEL/VM_ARCH.H>
#include <SPAD/SYNC.H>
#include <SPAD/TIMER.H>
#include <ARCH/MOV.H>
#include <SPAD/DL.H>
#include <DLFCN.H>
#include <KERNEL/PARAMS.H>

#include <KERNEL/VM.H>
#include <SPAD/ALLOC.H>

#define CACHE_RESERVE_PCT		8
#define TARGET_INCREASE_LATENCY		(JIFFIES_PER_SECOND)
#define TARGET_DECREASE_LATENCY		(TARGET_INCREASE_LATENCY + 120 * JIFFIES_PER_SECOND)
#define USER_TARGET_DECREASE_LATENCY	(JIFFIES_PER_SECOND * 5)

#define FREE_MEMORY_PCT		16

#define VM_JIFFIES		(JIFFIES_PER_SECOND / 40)
#define FREEMEM_JIFFIES		(JIFFIES_PER_SECOND * 5)
#define WATERMARK_REFRESH_N	12

#if __DEBUG_MALLOC_SHORTAGE > 0
#undef FREEMEM_JIFFIES
#define FREEMEM_JIFFIES		1
#endif

#if __DEBUG >= 2
#define TEST_MEMORY
#define TEST_OFFSET_1		0
#define TEST_OFFSET_2		(__offsetof(struct __slpage, __n_alloc) / 4)
#define TEST_OFFSET_3		((PAGE_CLUSTER_SIZE - 4) / 4)
#define CALLER_OFFSET		((PAGE_CLUSTER_SIZE - 16) / 4)
#define MAGIC_1			0x055D4929U
#define MAGIC_2			0xEF8E8D2CU
#define MAGIC_3			0xE6D581BBU
#endif

struct pg_zone {
	LIST_HEAD freelist;
	unsigned long n_free;
};

extern IO_STUB NO_PAGE_RELEASE;
extern IO_STUB KERNEL_PAGE_RELEASE;
extern IO_STUB UNALLOCATED_PAGE_RELEASE;
extern IO_STUB RELEASE_FREE_PAGE;

static unsigned long VM_STATS[N_VM_TYPES + 1];

static struct pg_zone KERNEL_ZONE;
static unsigned long kernel_free_target;
static unsigned long max_kernel_free_target;
static u_jiffies_lo_t last_kernel_target_incr = 0;
static struct pg_zone IO_ZONE;
static unsigned long io_free_target;
static unsigned long max_io_free_target;
static u_jiffies_lo_t last_io_target_incr = 0;
#if KERNEL_HIGH_ZONE
static struct pg_zone HIGH_ZONE;
static unsigned long high_free_target;
static unsigned long max_high_free_target;
static u_jiffies_lo_t last_high_target_incr = 0;
#endif
static unsigned long user_free_target;
static u_jiffies_lo_t last_user_target_decr = 0;
unsigned long VM_FREEMEM_WATERMARK;
static int VM_FREEMEM_WATERMARK_REFRESH;

WQ_DECL(KERNEL$FREEMEM_WAIT, "KERNEL$FREEMEM_WAIT");
WQ_DECL(WRITEBACK_WAIT, "KERNEL$WRITEBACK_WAIT");
WQ_DECL(FREEMEM_ABOVE, "KERNEL$FREEMEM_ABOVE");
WQ_DECL(FREE_ENTITIES, "KERNEL$FREE_ENTITIES");

#define can_increase(x)		((u_jiffies_lo_t)(KERNEL$GET_JIFFIES_LO() - (x)) >= TARGET_INCREASE_LATENCY)
#define can_decrease(j, x)	((u_jiffies_lo_t)((j) - (x)) >= TARGET_DECREASE_LATENCY)

#if !KERNEL_HIGH_ZONE
#define FREE_UNDER_MIN()	(IO_ZONE.n_free + KERNEL_ZONE.n_free <= io_free_target + user_free_target || __unlikely(VM_ENTITIES > VM_ENTITIES_SOFT_LIMIT))
#define FREE_UNDER_KERNEL_MIN()	(IO_ZONE.n_free + KERNEL_ZONE.n_free <= (io_free_target >> 1))
#define FREE_ZERO()		(!(IO_ZONE.n_free + KERNEL_ZONE.n_free))
#else
#define FREE_UNDER_MIN()	(HIGH_ZONE.n_free + IO_ZONE.n_free + KERNEL_ZONE.n_free <= high_free_target + user_free_target || __unlikely(VM_ENTITIES > VM_ENTITIES_SOFT_LIMIT))
#define FREE_UNDER_KERNEL_MIN()	(HIGH_ZONE.n_free + IO_ZONE.n_free + KERNEL_ZONE.n_free <= (high_free_target >> 1))
#define FREE_ZERO()		(!(HIGH_ZONE.n_free + IO_ZONE.n_free + KERNEL_ZONE.n_free))
#endif

static DECL_TIMER(vm_timer);
static int vm_timer_pending = 0;

static DECL_TIMER(freemem_timer);

static int OOM(void)
{
	return KERNEL$SLAB_REAP();
	/*
	if (KERNEL$SLAB_REAP()) return 1;
	return 0;
	*/
}

extern AST_STUB DO_MEMORY_BALANCE;
static volatile int memory_balance_in_progress = 0;
static IORQ memory_balance = { DO_MEMORY_BALANCE };

extern AST_STUB DO_CACHE_FREE;
static volatile int cache_free_in_progress = 0;
static AST cache_free = { DO_CACHE_FREE };

static void START_OPTIONAL_VM_TIMER(void);

void MEMORY_BALANCE(void)
{
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_TOP))) KERNEL$SUICIDE("MEMORY_BALANCE AT SPL %08X", KERNEL$SPL);
#endif
	if (__unlikely(KERNEL$LOCKUP_LEVEL >= LOCKUP_LEVEL_ALL_IORQS)) {
		START_OPTIONAL_VM_TIMER();
		return;
	}
	if (__likely(!memory_balance_in_progress)) {
		if (__unlikely(KERNEL_ZONE.n_free <= kernel_free_target)
#if KERNEL_HIGH_ZONE
			|| __unlikely(IO_ZONE.n_free + KERNEL_ZONE.n_free <= io_free_target)
#endif
		) {
			memory_balance_in_progress = 1;
			CALL_AST(&memory_balance);
		}
	}
	if (!cache_free_in_progress) {
		if (__likely(FREE_UNDER_MIN())) {
			cache_free_in_progress = 1;
			CALL_AST(&cache_free);
		}
	}
}

#ifdef TEST_MEMORY
static void DISPLAY_ROW(__u8 *addr, unsigned offset)
{
	unsigned i;
	__critical_printf("%06X: ", offset);
	for (i = 0; i < 16; i++) {
		unsigned char c = addr[offset + i];
		__critical_printf("%02X ", c);
	}
	__critical_printf(" ");
	for (i = 0; i < 16; i++) {
		unsigned char c = addr[offset + i];
		if (c < 32 || c >= 127) c = '.';
		__critical_printf("%c", c);
	}
	__critical_printf("\n");
}

static __NORET_ATTR__ void TEST_PAGE_FAILED(PAGE *p, __u32 *v)
{
	void *caller;
	const char *str_c;
	unsigned long off_c;
	void *prev_rele;
	const char *str_p;
	unsigned long off_p;
	int i;
	for (i = 0; i < 64; i += 16) DISPLAY_ROW((__u8 *)v, i);
	for (i = 32; i > 0; i -= 16) DISPLAY_ROW((__u8 *)v, PAGE_CLUSTER_SIZE - i);
	caller = *(void **)&v[CALLER_OFFSET];
	str_c = KERNEL$DL_GET_SYMBOL_NAME(caller, &off_c, 0);
	if (PAGE_2_PHYS(p) != 0) prev_rele = p[-1].release;
	else prev_rele = NULL;
	str_p = KERNEL$DL_GET_SYMBOL_NAME(prev_rele, &off_p, 0);
	KERNEL$SUICIDE("TEST_PAGE_FAILED: PAGE %"__64_format"X ON FREELIST CORRUPTED (MAGIC_1 %08X, MAGIC_2 %08X, MAGIC_3 %08X), FREED AT %s+%lX. PREVIOUS PAGE RELEASE %s+%lX", (__u64)PAGE_2_PHYS(p), v[TEST_OFFSET_1], v[TEST_OFFSET_2], v[TEST_OFFSET_3], str_c ? str_c : "?", off_c, str_p ? str_p : "?", off_p);
}
#endif

static __finline__ PAGE *ZONE_ALLOC(struct pg_zone *z)
{
	PAGE *p;
#if __DEBUG >= 1
	if (__unlikely(!z->n_free != LIST_EMPTY(&z->freelist)))
		KERNEL$SUICIDE("ZONE_ALLOC: FREELIST SKEW, N_FREE == %ld", z->n_free);
#endif
	if (__unlikely(!z->n_free)) return NULL;
	z->n_free--;
	p = LIST_STRUCT(z->freelist.next, PAGE, hash_entry);
	DEL_FROM_LIST(&p->hash_entry);
#ifdef TEST_MEMORY
	{
		__u32 *v = KERNEL$MAP_PHYSICAL_PAGE(p);
		if (__unlikely(v[TEST_OFFSET_1] != MAGIC_1) ||
		    __unlikely(v[TEST_OFFSET_2] != MAGIC_2) ||
		    __unlikely(v[TEST_OFFSET_3] != MAGIC_3)) {
			TEST_PAGE_FAILED(p, v);
		}
		KERNEL$UNMAP_PHYSICAL_BANK(v);
	}
#endif
	return p;
}

PAGE *KERNEL$ALLOC_IO_PAGE(int type)
{
	PAGE *p;
	int spl = KERNEL$SPL;
	RAISE_SPL(SPL_TOP);
	if (__likely((p = ZONE_ALLOC(&IO_ZONE)) != NULL)) {
#if !KERNEL_HIGH_ZONE
		if (__unlikely(IO_ZONE.n_free + KERNEL_ZONE.n_free <= io_free_target + user_free_target))
#else
		if (__unlikely(IO_ZONE.n_free + KERNEL_ZONE.n_free <= io_free_target) || __unlikely(IO_ZONE.n_free + KERNEL_ZONE.n_free + HIGH_ZONE.n_free <= high_free_target + user_free_target))
#endif
			mb: MEMORY_BALANCE();
		is:
		VM_STATS[type]++;
		p->release = NO_PAGE_RELEASE;
		LOWER_SPLX(spl);
		return p;
	}
	TEST_SPLX(spl, SPL_X(SPL_TOP));
	if (__likely((p = ZONE_ALLOC(&KERNEL_ZONE)) != NULL)) {
		if (__unlikely(KERNEL_ZONE.n_free <= kernel_free_target)) goto mb;
		goto is;
	}
	TEST_SPLX(spl, SPL_X(SPL_TOP));
	MEMORY_BALANCE();
	if (__likely(io_free_target < max_io_free_target)) {
		if (can_increase(last_io_target_incr)) {
			io_free_target++;
			last_io_target_incr = KERNEL$GET_JIFFIES_LO();
#if KERNEL_HIGH_ZONE
			if (io_free_target > high_free_target) high_free_target = io_free_target, last_high_target_incr = last_io_target_incr;
#endif
		}
	}
	LOWER_SPLX(spl);
	return NULL;
}

PAGE *KERNEL$ALLOC_USER_PAGE(int type)
{
#if KERNEL_HIGH_ZONE
	PAGE *p;
	int spl = KERNEL$SPL;
	RAISE_SPL(SPL_TOP);
	if (__likely((p = ZONE_ALLOC(&HIGH_ZONE)) != NULL)) {
		if (__unlikely(HIGH_ZONE.n_free + IO_ZONE.n_free + KERNEL_ZONE.n_free <= high_free_target + user_free_target))
			mb: MEMORY_BALANCE();
		is:
		VM_STATS[type]++;
		p->release = NO_PAGE_RELEASE;
		LOWER_SPLX(spl);
		return p;
	}
	TEST_SPLX(spl, SPL_X(SPL_TOP));
	if (__likely((p = ZONE_ALLOC(&IO_ZONE)) != NULL)) {
		if (__unlikely(IO_ZONE.n_free + KERNEL_ZONE.n_free <= io_free_target)) goto mb;
		goto is;
	}
	TEST_SPLX(spl, SPL_X(SPL_TOP));
	if (__likely((p = ZONE_ALLOC(&KERNEL_ZONE)) != NULL)) {
		if (__unlikely(KERNEL_ZONE.n_free <= kernel_free_target)) goto mb;
		goto is;
	}
	TEST_SPLX(spl, SPL_X(SPL_TOP));
	MEMORY_BALANCE();
	if (__likely(high_free_target < max_high_free_target)) {
		if (can_increase(last_high_target_incr)) {
			high_free_target++;
			last_high_target_incr = KERNEL$GET_JIFFIES_LO();
		}
	}
	LOWER_SPLX(spl);
	return NULL;
#else
	return KERNEL$ALLOC_IO_PAGE(type);
#endif
}

void *KERNEL$ALLOC_KERNEL_PAGE(int type)
{
	PAGE *p;
	int spl = KERNEL$SPL;
	RAISE_SPL(SPL_TOP);
	if (__likely((p = ZONE_ALLOC(&KERNEL_ZONE)) != NULL)) {
#if !KERNEL_HIGH_ZONE
		if (__unlikely(KERNEL_ZONE.n_free <= kernel_free_target) ||
		    __unlikely(IO_ZONE.n_free + KERNEL_ZONE.n_free <= io_free_target + user_free_target))
#else
		if (__unlikely(KERNEL_ZONE.n_free <= kernel_free_target) ||
		    __unlikely(IO_ZONE.n_free + KERNEL_ZONE.n_free <= io_free_target) ||
		    __unlikely(IO_ZONE.n_free + KERNEL_ZONE.n_free + HIGH_ZONE.n_free <= high_free_target + user_free_target))
#endif
			MEMORY_BALANCE();
		VM_STATS[type]++;
		p->release = KERNEL_PAGE_RELEASE;
		LOWER_SPLX(spl);
		return PAGE_2_VIRT(p);
	}
	TEST_SPLX(spl, SPL_X(SPL_TOP));
	MEMORY_BALANCE();
	if (__likely(kernel_free_target < max_kernel_free_target)) {
		if (can_increase(last_kernel_target_incr)) {
			kernel_free_target++;
			last_kernel_target_incr = KERNEL$GET_JIFFIES_LO();
			if (kernel_free_target > io_free_target) io_free_target = kernel_free_target, last_io_target_incr = last_kernel_target_incr;
#if KERNEL_HIGH_ZONE
			if (io_free_target > high_free_target) high_free_target = io_free_target, last_high_target_incr = last_kernel_target_incr;
#endif
		}
	}
	LOWER_SPLX(spl);
	return NULL;
}

void KERNEL$FREE_USER_PAGE(PAGE *p, int type)
{
	int spl;
	struct pg_zone *z;
#ifdef TEST_MEMORY
	{
		__u32 *v = KERNEL$MAP_PHYSICAL_PAGE(p);
		__MOVNTI32(&v[TEST_OFFSET_1], MAGIC_1);
		__MOVNTI32(&v[TEST_OFFSET_2], MAGIC_2);
		__MOVNTI32(&v[TEST_OFFSET_3], MAGIC_3);
#ifdef __GNUC__
		__MOVNTIPTR((void **)&v[CALLER_OFFSET], __builtin_return_address(0));
#endif
		__MOVNTI_FLUSH();
		KERNEL$UNMAP_PHYSICAL_BANK(v);
	}
#endif
	spl = KERNEL$SPL;
	z = p->zone;
	RAISE_SPL(SPL_TOP);
#if __DEBUG >= 1
	if (__unlikely(p->release == RELEASE_FREE_PAGE))
		KERNEL$SUICIDE("KERNEL$FREE_USER_PAGE: FREEING FREE PAGE %"__64_format"X", (__u64)PAGE_2_PHYS(p));
	p->lockdown = NULL;
#endif
	p->release = RELEASE_FREE_PAGE;
	ADD_TO_LIST(&z->freelist, &p->hash_entry);
	z->n_free++;
#if __DEBUG >= 1
	if (__unlikely(!VM_STATS[type]) && __likely(type != N_VM_TYPES))
		KERNEL$SUICIDE("KERNEL$FREE_USER_PAGE: VM_STATS[%d] UNDERFLOW", type);
#endif
	VM_STATS[type]--;
	LOWER_SPLX(spl);
	WQ_WAKE_ALL(&KERNEL$FREEMEM_WAIT);
#if !KERNEL_HIGH_ZONE
	if (__likely(IO_ZONE.n_free + KERNEL_ZONE.n_free >= io_free_target + user_free_target))
#else
	if (__likely(HIGH_ZONE.n_free + IO_ZONE.n_free + KERNEL_ZONE.n_free >= high_free_target + user_free_target))
#endif
		WQ_WAKE_ALL(&FREEMEM_ABOVE);
}

void KERNEL$FREE_KERNEL_PAGE(void *ptr, int type)
{
	KERNEL$FREE_USER_PAGE(VIRT_2_PAGE_ALIGNED(ptr), type);
}

void KERNEL_TRANSFER_VM_STATE(int type_from, int type_to)
{
#if __DEBUG >= 1
	if (__unlikely(!VM_STATS[type_from]))
		KERNEL$SUICIDE("KERNEL_TRANSFER_VM_STATE: VM_STATS[%d] UNDERFLOW", type_from);
#endif
	VM_STATS[type_from]--;
	VM_STATS[type_to]++;
}

static void VM_TIMER(TIMER *t)
{
	vm_timer_pending = 0;
	MEMORY_BALANCE();
}

static void START_OPTIONAL_VM_TIMER(void)
{
	if (__unlikely(KERNEL_ZONE.n_free <= kernel_free_target)
#if KERNEL_HIGH_ZONE
	 || __unlikely(IO_ZONE.n_free + KERNEL_ZONE.n_free <= io_free_target)
#endif
	 || FREE_UNDER_MIN()) {
		int spl = KERNEL$SPL;
		RAISE_SPL(SPL_TOP);
		if (!vm_timer_pending) {
			vm_timer_pending = 1;
			vm_timer.fn = VM_TIMER;
			KERNEL$SET_TIMER(VM_JIFFIES, &vm_timer);
		}
		LOWER_SPLX(spl);
	}
}

static PAGE_RELEASE_REQUEST prr;

static int release_bank = 0;
static int release_page = 0;
static int krnl_page;
static int n_released = 0;

DECL_AST(PAGE_IN_ZONE_FREED, SPL_DEV, PAGE_RELEASE_REQUEST)
{
	if (__likely(!RQ->status)) {
		KERNEL$FREE_USER_PAGE(prr.pg, N_VM_TYPES);
		n_released += krnl_page;
	}
	MTX_UNLOCK(&VM_MUTEX);
	RETURN_AST(&memory_balance);
}

DECL_AST(PBANK_CREATED, SPL_DEV, AST)
{
	MTX_UNLOCK(&VM_MUTEX);
	RETURN_AST(&memory_balance);
}

static AST pbank_ast = { &PBANK_CREATED };

DECL_AST(DO_MEMORY_BALANCE, SPL_DEV, IORQ)
{
	memory_balance.status = RQS_PROCESSING;
	new_page:
	if (__unlikely(!(release_bank | release_page))) OOM();
	if (release_bank < N_PBANKS) {
		PAGE *p;
		if (PBANKS[release_bank] & PBANK_DATA) krnl_page = 1;
		else krnl_page = 0;
		if (KERNEL_ZONE.n_free <= kernel_free_target && krnl_page)
			if (IO_ZONE.n_free
#if KERNEL_HIGH_ZONE
				+ HIGH_ZONE.n_free
#endif
			) goto do_rel;
#if KERNEL_HIGH_ZONE
		if (IO_ZONE.n_free + KERNEL_ZONE.n_free <= io_free_target && !(PBANKS[release_bank] & PBANK_HIGHMEM) && HIGH_ZONE.n_free) goto do_rel;
#endif
		if (krnl_page) n_released++;
		release_bank++;
		release_page = 0;
		goto new_page;
		do_rel:
		p = &KERNEL_PAGE_MAP[release_bank * (PG_BANK / PG_CLUSTER) + release_page];
		if (p->release == &UNALLOCATED_PAGE_RELEASE || p->release == &NO_PAGE_RELEASE || p->release == &KERNEL_PAGE_RELEASE || p->release == &RELEASE_FREE_PAGE) {
			if (__unlikely(++release_page == PG_BANK / PG_CLUSTER)) {
				release_page = 0;
				release_bank++;
			}
			goto new_page;
		}
		MTX_LOCK(&VM_MUTEX, &memory_balance, KERNEL$SUCCESS, RETURN);
		if (__unlikely(++release_page == PG_BANK / PG_CLUSTER)) {
			release_page = 0;
			release_bank++;
		}
		prr.pg = p;
		prr.fn = PAGE_IN_ZONE_FREED;
		RETURN_IORQ(&prr, KERNEL$WAKE_PAGE_RELEASE);
	}
	if (!n_released && KERNEL_ZONE.n_free <= kernel_free_target) {
		n_released = 1;
		MTX_LOCK(&VM_MUTEX, &memory_balance, KERNEL$SUCCESS, RETURN);
		if (VM_ARCH_NEW_PBANK(PBANK_DATA, &pbank_ast)) RETURN;
		MTX_UNLOCK(&VM_MUTEX);
		CACHE_UPCALL();
	}
	release_bank = 0;
	release_page = 0;
	n_released = 0;
	START_OPTIONAL_VM_TIMER();
	RAISE_SPL(SPL_TIMER);
	KERNEL$DEL_TIMER(&freemem_timer);
	KERNEL$SET_TIMER(2, &freemem_timer);
	memory_balance_in_progress = 0;
	RETURN;
}

DECL_AST(DO_CACHE_FREE, SPL_DEV, AST)
{
	retry:
	/*__debug_printf("cnt: %d, free: %ld, %ld, %ld\n", cnt, IO_ZONE.n_free, KERNEL_ZONE.n_free, io_free_target);*/
	CACHE_UPCALL();
	OOM();
	/*__debug_printf("cnt after: %d, free: %ld, %ld, %ld\n", VM_PRESSURE, IO_ZONE.n_free, KERNEL_ZONE.n_free, io_free_target);*/
	if (__unlikely(FREE_UNDER_MIN())) {
		if (__unlikely(IO_IN_PROGRESS >= MAX_IO_IN_PROGRESS)) {
			if (__unlikely(FREE_UNDER_KERNEL_MIN())) {
				if (__unlikely(FREE_ZERO())) {
					MAX_IO_IN_PROGRESS <<= 1;
				} else {
					MAX_IO_IN_PROGRESS += MAX_IO_IN_PROGRESS >> 1;
				}
				if (__unlikely(MAX_IO_IN_PROGRESS >= VM_MAXIO_LIMIT)) MAX_IO_IN_PROGRESS = VM_MAXIO_LIMIT;
				VM_PRESSURE++;
				CACHE_UPCALL();
			}
			goto cfp;
		}
		if (__likely(++VM_PRESSURE < MAX_UPCALL_TRIES)) goto retry;
		START_OPTIONAL_VM_TIMER();
	}
	cfp:
	/* warning: this must be at the end, because CACHE_UPCALL may retrigger
	   DO_CACHE_FREE too and it would lead to lockup --- test in
	   MEMORY_BALANCE prevents the lockup anyway, but performance will be
	   degraded if we let it loop until timeout */
	cache_free_in_progress = 0;
	VM_PRESSURE = 0;
	RETURN;
}

DECL_IOCALL(UNALLOCATED_PAGE_RELEASE, SPL_TOP, PAGE_RELEASE_REQUEST)
{
	RQ->status = -ENOENT;
	RETURN_AST(RQ);
}

DECL_IOCALL(NO_PAGE_RELEASE, SPL_BOTTOM, PAGE_RELEASE_REQUEST)
{
	PAGE *pg = RQ->pg;
	if (pg->release != &NO_PAGE_RELEASE) RETURN_IORQ_LSTAT(RQ, KERNEL$WAKE_PAGE_RELEASE);
	RQ->status = -EIO;
	RETURN_AST(RQ);
}

DECL_IOCALL(KERNEL_PAGE_RELEASE, SPL_TOP, PAGE_RELEASE_REQUEST)
{
	RQ->status = -EEXIST;
	RETURN_AST(RQ);
}

__finline__ void KERNEL$PAGE_RELEASE(PAGE *pg, int type)
{
	int spl;
	pg->release = &NO_PAGE_RELEASE;
#if __DEBUG >= 1
	pg->lockdown = NULL;
#endif
	spl = KERNEL$SPL;
	RAISE_SPL(SPL_TOP);
	VM_STATS[type]--;
	LOWER_SPLX(spl);
}

DECL_IOCALL(KERNEL$WAKE_PAGE_RELEASE, SPL_TOP, PAGE_RELEASE_REQUEST)
{
	if (__unlikely(KERNEL$LOCKUP_LEVEL >= LOCKUP_LEVEL_ONE_PASS) /* && __likely((RQ->status & RQS_PRIORITY_MASK) < RQS_ADD_PRIORITY * KERNEL_NO_RESTART_PRIORITY)*/) {
		WQ_WAIT(&KERNEL$LOCKUP_EVENTS, RQ, KERNEL$WAKE_PAGE_RELEASE);
		RETURN;
	}
	RETURN_IORQ_LSTAT(RQ, RQ->pg->release);
}

DECL_IOCALL(RELEASE_FREE_PAGE, SPL_TOP, PAGE_RELEASE_REQUEST)
{
	PAGE *pg = RQ->pg;
	if (pg->release != &RELEASE_FREE_PAGE) RETURN_IORQ_LSTAT(RQ, KERNEL$WAKE_PAGE_RELEASE);
	pg->zone->n_free--;
	DEL_FROM_LIST(&pg->hash_entry);
	KERNEL$PAGE_RELEASE(pg, N_VM_TYPES);
	RQ->status = 0;
	RETURN_AST(RQ);
}

extern AST_STUB gca_try_to_free;
extern AST_STUB gca_cont_scan;

static AST gca_cont_scan_ast = { &gca_cont_scan };

MTX_DECL(VM_MUTEX, "KERNEL$VM_MUTEX");

static CONTIG_AREA_REQUEST *car;

static long gca_scan_bank;
static long gca_scan_page;
static long gca_free_base;
static long gca_free_offset;

static int gca_get_all;

DECL_IOCALL(KERNEL$VM_GRAB_CONTIG_AREA, SPL_DEV, CONTIG_AREA_REQUEST)
{
	if (__unlikely(!RQ->nclusters)) {
		RQ->ptr = 0;
		RQ->status = 0;
		RETURN_AST(RQ);
	}
	if (__unlikely(RQ->nclusters > PG_BANK / PG_CLUSTER)) {
		RQ->status = -ERANGE;
		RETURN_AST(RQ);
	}
	MTX_LOCK(&VM_MUTEX, RQ, &KERNEL$VM_GRAB_CONTIG_AREA, RETURN);
	car = RQ;
	gca_get_all = 0;
	gca_scan_bank = 0;
	gca_scan_page = 0;
	RETURN_AST(&gca_cont_scan_ast);
}

DECL_AST(gca_cont_scan, SPL_DEV, AST)
{
	int pbankf;
	int top;
	int i;
	long a;
	again:
	if (__unlikely(gca_scan_bank >= N_PBANKS)) {
		int pb = car->flags & CARF_CODE ? PBANK_CODE : car->flags & CARF_DATA ? PBANK_OTHER : car->flags & CARF_ISADMA ? PBANK_ISADMA : 0;
		if (pb) {
			if (VM_ARCH_NEW_PBANK(pb, &gca_cont_scan_ast)) {
				gca_scan_bank = 0;
				gca_scan_page = 0;
				RETURN;
			}
			if (++gca_get_all <= 2) {
				gca_scan_bank = 0;
				gca_scan_page = 0;
				goto again;
			}
		}
		car->status = -ENOMEM;
		MTX_UNLOCK(&VM_MUTEX);
		RETURN_AST(car);
	}
	pbankf = PBANKS[gca_scan_bank];
	top = PG_BANK / PG_CLUSTER;
	if (car->flags & CARF_PCIDMA && __unlikely(pbankf & PBANK_NOPCIDMA)) goto fail;
	if (car->flags & CARF_CODE) {
		if (!(pbankf & (!gca_get_all ? PBANK_CODE : gca_get_all == 1 ? PBANK_CODE | PBANK_DATA | PBANK_OTHER : PBANK_CODE | PBANK_DATA | PBANK_ISADMA | PBANK_OTHER))) {
			fail:
			gca_scan_bank++;
			gca_scan_page = 0;
			goto again;
		}
	}
	if (car->flags & CARF_DATA) {
		if (!(pbankf & (!gca_get_all ? (!FOLD_DL ? PBANK_DATA : PBANK_CODE) | PBANK_OTHER : gca_get_all == 1 ? PBANK_CODE | PBANK_DATA | PBANK_OTHER : PBANK_CODE | PBANK_DATA | PBANK_ISADMA | PBANK_OTHER))) goto fail;
	}
	if (car->flags & CARF_ISADMA) {
		if (!(pbankf & PBANK_ISADMA)) goto fail;
	}
	next_page:
	if (gca_scan_page + car->nclusters > top) goto fail;
	if ((int)(gca_scan_page * PAGE_CLUSTER_SIZE) & car->align) {
		gca_scan_page++;
		goto next_page;
	}
	a = (long)gca_scan_bank * (PG_BANK / PG_CLUSTER) + gca_scan_page;
	for (i = 0; i < car->nclusters; i++) {
		if (KERNEL_PAGE_MAP[a + i].release == &UNALLOCATED_PAGE_RELEASE ||
		    KERNEL_PAGE_MAP[a + i].release == &NO_PAGE_RELEASE ||
		    KERNEL_PAGE_MAP[a + i].release == &KERNEL_PAGE_RELEASE) {
			gca_scan_page += i + 1;
			goto next_page;
		}
	}
	gca_free_base = a;
	gca_free_offset = 0;
	prr.fn = &gca_try_to_free;
	RETURN_AST(&prr);
}

DECL_AST(gca_try_to_free, SPL_DEV, AST)
{
	if (gca_free_offset) {
		IO_DISABLE_CHAIN_CANCEL(SPL_DEV, car);
		if (prr.status) {
			int i;
			for (i = 0; i < gca_free_offset - 1; i++) KERNEL$FREE_USER_PAGE(&KERNEL_PAGE_MAP[gca_free_base + i], N_VM_TYPES);
			gca_scan_page += gca_free_offset;
			RETURN_AST(&gca_cont_scan_ast);
		}
		if (prr.pg->release != &NO_PAGE_RELEASE)
			KERNEL$SUICIDE("gca_try_to_free: PAGE NOT RELEASED, PAGE %p, FUNCTION %p", prr.pg, prr.pg->release);
	}
	if (gca_free_offset == car->nclusters) {
		if (car->flags & (CARF_CODE | CARF_DATA | CARF_ISADMA)) {
			car->ptr = KMAP_PAGE_2_VIRT(KERNEL_PAGE_MAP + gca_free_base);
			/*__critical_printf("%08x %08x %08x", KERNEL_PAGE_MAP + gca_free_base, PAGE_2_PHYS(KERNEL_PAGE_MAP + gca_free_base), car->ptr);*/
			RAISE_SPL(SPL_TOP);
			VM_STATS[VM_TYPE_WIRED_MAPPED] += car->nclusters;
			LOWER_SPL(SPL_DEV);
		} else {
			car->ptr = KERNEL_PAGE_MAP + gca_free_base;
			RAISE_SPL(SPL_TOP);
			VM_STATS[VM_TYPE_WIRED_UNMAPPED] += car->nclusters;
			LOWER_SPL(SPL_DEV);
		}
		car->status = 0;
		MTX_UNLOCK(&VM_MUTEX);
		RETURN_AST(car);
	}
	prr.pg = &KERNEL_PAGE_MAP[gca_free_base + gca_free_offset++];
	RETURN_IORQ_CANCELABLE(&prr, KERNEL$WAKE_PAGE_RELEASE, car);
}

void KERNEL$VM_RELEASE_CONTIG_AREA(void *vmap, int npages)
{
	while (npages--) KERNEL$FREE_KERNEL_PAGE(vmap, VM_TYPE_WIRED_MAPPED), vmap = (char *)vmap + PG_SIZE * PG_CLUSTER;
}

static struct pg_zone *VM_GET_PAGE_ZONE(PAGE *p)
{
	int b = (p - KERNEL_PAGE_MAP) >> (PG_BANK_BITS - PG_CLUSTER_BITS);
	if (PBANKS[b] & PBANK_DATA) return &KERNEL_ZONE;
#if KERNEL_HIGH_ZONE
	if (!PBANKS[b] & PBANK_HIGHMEM) return &HIGH_ZONE;
#endif
	return &IO_ZONE;
}

static WQ *PAGE_LOCKDOWN(PAGE *p, int lock)
{
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_VSPACE)))
		KERNEL$SUICIDE("PAGE_LOCKDOWN AT SPL %08X", KERNEL$SPL);
#endif
	if (!lock) {
		if (__unlikely((p->flags & PAGE_DMALOCKCOUNT) == PAGE_DMALOCKCOUNT)) return &p->wait;
		p->flags += PAGE_DMALOCKCOUNT_1;
		return NULL;
	} else {
#if __DEBUG >= 1
		if (__unlikely(!(p->flags & PAGE_DMALOCKCOUNT)))
			KERNEL$SUICIDE("PAGE_LOCKDOWN: UNLOCKING UNLOCKED PAGE");
#endif
		if (__unlikely((p->flags & PAGE_DMALOCKCOUNT) == PAGE_DMALOCKCOUNT)) WQ_WAKE_ALL(&p->wait);
		if (__likely(!((p->flags -= PAGE_DMALOCKCOUNT_1) & PAGE_DMALOCKCOUNT))) WQ_WAKE_ALL(&p->wait);
		return NULL;
	}
}

void VM_ADD_PAGE(PAGE *p, int res)
{
	int spl;
	if (__unlikely(p->release != NULL)) {
		if (!res && p->release == UNALLOCATED_PAGE_RELEASE && __likely(!VM_ARCH_CHECK_HW_MEM(p))) {
			p->zone = VM_GET_PAGE_ZONE(p);
			KERNEL$FREE_USER_PAGE(p, VM_TYPE_RESERVED);
		}
		return;
	}
	p->zone = VM_GET_PAGE_ZONE(p);
	if (__likely(!res)) {
		if (__unlikely(VM_ARCH_CHECK_HW_MEM(p))) goto reserved;
#if __DEBUG >= 2
		KERNEL$UNMAP_PHYSICAL_BANK(memset(KERNEL$MAP_PHYSICAL_PAGE(p), 0xAA, PAGE_CLUSTER_SIZE));
#endif
		KERNEL$FREE_USER_PAGE(p, N_VM_TYPES);
	} else if (res == 1) {
		reserved:
		p->flags = PAGE_WRITEABLE;
		p->release = UNALLOCATED_PAGE_RELEASE;
		p->lockdown = PAGE_LOCKDOWN;
		WQ_INIT(&p->wait, "KERNEL$RESERVED_PAGE_WAIT");
		INIT_XLIST(&p->mapping);
		spl = KERNEL$SPL;
		RAISE_SPL(SPL_TOP);
		VM_STATS[VM_TYPE_RESERVED]++;
		LOWER_SPLX(spl);
	} else if (res == 2) {
		p->release = KERNEL_PAGE_RELEASE;
		spl = KERNEL$SPL;
		RAISE_SPL(SPL_TOP);
		VM_STATS[VM_TYPE_WIRED_MAPPED]++;
		LOWER_SPLX(spl);
	} else KERNEL$SUICIDE("VM_ADD_PAGE: RES %d, PAGE %p", res, p);
}

int VM_RELEASE_PAGES(unsigned long from, unsigned long to_reserve, unsigned long to)
{
	PAGE *p;
	PAGE_RELEASE_REQUEST prr;
	unsigned long i;
	int spl = KERNEL$SPL;
	if (__unlikely(from > to_reserve) || __unlikely(to_reserve > to)) KERNEL$SUICIDE("VM_RELEASE_PAGES: BAD PARAMS %lX, %lX, %lX", from, to_reserve, to);
	for (i = from; i < to; i++) {
		TEST_LOCKUP_SYNC;
		p = &KERNEL_PAGE_MAP[i];
		if (p->release == UNALLOCATED_PAGE_RELEASE) {
			RAISE_SPL(SPL_TOP);
			VM_STATS[VM_TYPE_RESERVED]--;
			LOWER_SPLX(spl);
			continue;
		}
		prr.pg = p;
		SYNC_IO_CANCELABLE(&prr, KERNEL$WAKE_PAGE_RELEASE);
		if (__unlikely(prr.status != 0)) {
			for (i--; i != from - 1; i--) {
				if (KERNEL_PAGE_MAP[i].release != UNALLOCATED_PAGE_RELEASE) {
					KERNEL$FREE_USER_PAGE(&KERNEL_PAGE_MAP[i], N_VM_TYPES);
				} else {
					RAISE_SPL(SPL_TOP);
					VM_STATS[VM_TYPE_RESERVED]++;
					LOWER_SPLX(spl);
				}
			}
			return prr.status;
		}
	}
	for (i = from; i < to_reserve; i++) {
		RAISE_SPL(SPL_TOP);
		KERNEL_PAGE_MAP[i].release = NULL;
		VM_ADD_PAGE(&KERNEL_PAGE_MAP[i], 1);
		LOWER_SPLX(spl);
	}
	memset(&KERNEL_PAGE_MAP[to_reserve], 0, sizeof(PAGE) * (to - to_reserve));
	return 0;
}

void VM_RESET_ZONES(int bank)
{
	unsigned long i;
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV))) KERNEL$SUICIDE("VM_RESET_ZONES AT SPL %08X", KERNEL$SPL);
	for (i = (unsigned long)bank * (PG_BANK / PG_CLUSTER); i < (unsigned long)(bank + 1) * (PG_BANK / PG_CLUSTER); i++) {
		struct pg_zone *z;
		PAGE *p = &KERNEL_PAGE_MAP[i];
		RAISE_SPL(SPL_TOP);
		z = VM_GET_PAGE_ZONE(p);
		if (z != p->zone) {
			if (p->release == RELEASE_FREE_PAGE) {
				p->zone->n_free--;
				DEL_FROM_LIST(&p->hash_entry);
				ADD_TO_LIST(&z->freelist, &p->hash_entry);
				z->n_free++;
			}
			p->zone = z;
		}
		LOWER_SPL(SPL_DEV);
	}
	WQ_WAKE_ALL(&KERNEL$FREEMEM_WAIT);
}

int KERNEL$OOM(int type)
{
	if (__likely(VM_STATS[VM_TYPE_CACHED_MAPPED] != 0)) return 0;
	if (type == VM_TYPE_WIRED_MAPPED || type == VM_TYPE_CACHED_MAPPED || type == VM_TYPE_USER_MAPPED) {
		if (VM_STATS[VM_TYPE_CACHED_UNMAPPED] | VM_STATS[VM_TYPE_USER_UNMAPPED] | IO_ZONE.n_free
#if KERNEL_HIGH_ZONE
		| HIGH_ZONE.n_free
#endif
		) {
			unsigned b;
			if (VM_ARCH_NEW_PBANK(PBANK_DATA, NULL)) return 0;
			for (b = 0; b < N_PBANKS; b++) if (PBANKS[b] & PBANK_DATA) {
				int c;
				for (c = 0; c < PG_BANK / PG_CLUSTER; c++) {
					PAGE *p = &KERNEL_PAGE_MAP[b * (PG_BANK / PG_CLUSTER) + c];
					if (p->release == &UNALLOCATED_PAGE_RELEASE || p->release == &NO_PAGE_RELEASE || p->release == &KERNEL_PAGE_RELEASE) continue;
					return 0;
				}
			}
		}
	} else if (type == VM_TYPE_WIRED_UNMAPPED || type == VM_TYPE_CACHED_UNMAPPED || type == VM_TYPE_USER_UNMAPPED) {
		if ((VM_STATS[VM_TYPE_CACHED_UNMAPPED] | VM_STATS[VM_TYPE_USER_UNMAPPED])
#if KERNEL_HIGH_ZONE
		| HIGH_ZONE.n_free
#endif
		) return 0;
	} else KERNEL$SUICIDE("KERNEL$OOM(%d)", type);
	return 1;
}

int KERNEL$OOM_ERR(size_t sz)
{
	if (__unlikely(sz > PAGE_CLUSTER_SIZE)) return -ERANGE;
	if (__unlikely(KERNEL$OOM(VM_TYPE_WIRED_MAPPED))) return -ENOMEM;
	return 0;
}

void KERNEL$MEMWAIT(IORQ *rq, IO_STUB *fn, size_t sz)
{
	int r;
	if (__unlikely(r = KERNEL$OOM_ERR(sz))) {
		rq->status = r;
		CALL_AST(rq);
		return;
	}
	WQ_WAIT(&KERNEL$FREEMEM_WAIT, rq, fn);
}

int KERNEL$MEMWAIT_SYNC(size_t sz)
{
	int r;
	if (__unlikely(r = KERNEL$OOM_ERR(sz))) goto ret_err;
	r = WQ_WAIT_SYNC_CANCELABLE(&KERNEL$FREEMEM_WAIT);
	if (__unlikely(r)) ret_err: errno = -r;
	return r;
}


DECL_IOCALL(KERNEL$UNIVERSAL_MALLOC, SPL_MALLOC, MALLOC_REQUEST)
{
	size_t sz = RQ->size;
	if (__unlikely(!sz)) {
		RQ->ptr = KERNEL$__STDHEAP_TOP;
		goto ok;
	}
	if (__unlikely(sz < sizeof(AST))) sz = sizeof(AST);
	if (__likely((RQ->ptr = malloc(sz)) != NULL)) {
		ok:
		RQ->status = 0;
		RETURN_AST(RQ);
	}
	KERNEL$MEMWAIT((IORQ *)RQ, KERNEL$UNIVERSAL_MALLOC, sz);
	RETURN;
}

DECL_AST(FREE_AT_SPL_MALLOC, SPL_MALLOC, AST)
{
	free(RQ);
	RETURN;
}

void KERNEL$UNIVERSAL_FREE(void *ptr)
{
	AST *a;
	if (__unlikely(!ptr) || __unlikely(ptr == KERNEL$__STDHEAP_TOP)) return;
#if __DEBUG >= 1
	if (__unlikely(__alloc_size(ptr) < sizeof(AST)))
		KERNEL$SUICIDE("KERNEL$UNIVERSAL_FREE: BLOCK NOT OBTAINED WITH KERNEL$UNIVERSAL_MALLOC (SIZE %ld)", (long)__alloc_size(ptr));
#endif
	if (__likely(!SPLX_BELOW(SPL_X(SPL_MALLOC), KERNEL$SPL))) {
		free(ptr);
		return;
	}
	a = ptr;
	a->fn = &FREE_AT_SPL_MALLOC;
	CALL_AST(a);
	return;
}

/* shadowed in LIB/KERNEL/UVM.C */

WQ *KERNEL$VM_UNMAP_PAGE(PAGE *p)
{
	WQ *wq;
	if (__unlikely((wq = VM_ARCH_UNMAP_MAPPING(&p->mapping)) != NULL)) return wq;
	/* raises SPL to SPL_VSPACE */
	if (__unlikely(p->flags & (PAGE_BUSY | PAGE_DMALOCKCOUNT | PAGE_WRITECOUNT)))
		return &p->wait;
	return NULL;
}

WQ *KERNEL$VM_UNSET_WRITEABLE(PAGE *p)
{
	WQ *wq;
	if (__unlikely((void *)(wq = VM_ARCH_CHECK_MAPPING(&p->mapping, 1)) > (void *)1)) return wq;
	/* raises SPL to SPL_VSPACE */
	if (__unlikely(p->flags & (PAGE_BUSY | PAGE_DMALOCKCOUNT)))
		return &p->wait;
	return NULL;
}

int KERNEL$VM_SCAN_PAGE(PAGE *p)
{
	WQ *r;
	int spl = KERNEL$SPL;
	r = VM_ARCH_CHECK_MAPPING(&p->mapping, 0);
	/* raises SPL to SPL_VSPACE */
	if (__unlikely((void *)r > (void *)1)) r = (void *)1;
	LOWER_SPLX(spl);
	return (int)(unsigned long)r;
}

WQ *KERNEL$VM_UNMAP_SPAGE(SPAGE *sp)
{
	WQ *wq;
	if (__unlikely((wq = VM_ARCH_UNMAP_MAPPING(&sp->mapping)) != NULL)) return wq;
	/* raises SPL to SPL_VSPACE */
	if (__unlikely(sp->flags & (PAGE_BUSY | PAGE_DMALOCKCOUNT | PAGE_WRITECOUNT)))
		return &sp->wait;
	return NULL;
}

int KERNEL$VM_SCAN_SPAGE(SPAGE *sp)
{
	WQ *r;
	int spl = KERNEL$SPL;
	r = VM_ARCH_CHECK_MAPPING(&sp->mapping, 0);
	/* raises SPL to SPL_VSPACE */
	if (__unlikely((void *)r > (void *)1)) r = (void *)1;
	LOWER_SPLX(spl);
	return (int)(unsigned long)r;
}

void KERNEL$VM_PREPARE_PAGE_FOR_MMAP(PAGE *p)
{
	p->flags = PAGE_WRITEABLE;
	p->lockdown = PAGE_LOCKDOWN;
	WQ_INIT(&p->wait, "KERNEL$PAGE_WAIT");
	INIT_XLIST(&p->mapping);
}

#define zone_init(z)		\
do {				\
	INIT_LIST(&z.freelist);	\
	z.n_free = 0;		\
} while (0)

void VM_COUNT_ZONES(unsigned long *kernel, unsigned long *io, unsigned long *high)
{
	unsigned long i;
	unsigned long kz = 0, iz = 0, hz = 0;
#define step (PG_BANK / PG_CLUSTER)
	for (i = 0; i < N_PBANKS * (PG_BANK / PG_CLUSTER); i += step) {
		if (KERNEL_PAGE_MAP[i].zone == &KERNEL_ZONE) kz += step;
		else if (KERNEL_PAGE_MAP[i].zone == &IO_ZONE) iz += step;
#if KERNEL_HIGH_ZONE
		else if (KERNEL_PAGE_MAP[i].zone == &HIGH_ZONE) hz += step;
#endif
		else KERNEL$SUICIDE("VM_SET_TARGETS: PAGE %lu HAS UNKNOWN ZONE %p", i, KERNEL_PAGE_MAP[i].zone);
	}
#undef step
	*kernel = kz;
	*io = iz;
	*high = hz;
	return;
}

void VM_REFRESH_WATERMARKS(void)
{
	unsigned wm;
	VM_FREEMEM_WATERMARK_REFRESH = WATERMARK_REFRESH_N;
	if (__likely(!(wm = VM_FREEMEM_WATERMARK))) {
		unsigned long kz, iz, hz;
		unsigned long t;
		VM_COUNT_ZONES(&kz, &iz, &hz);

		t = __likely(kz + iz >= VM_STATS[VM_TYPE_WIRED_MAPPED] + VM_STATS[VM_TYPE_WIRED_UNMAPPED]) ? kz + iz - VM_STATS[VM_TYPE_WIRED_MAPPED] - VM_STATS[VM_TYPE_WIRED_UNMAPPED] : 0;
		t = t / FREE_MEMORY_PCT + 1;
		max_io_free_target = t;

		t = __likely(kz >= VM_STATS[VM_TYPE_WIRED_MAPPED]) ? kz - VM_STATS[VM_TYPE_WIRED_MAPPED] : 0;
		t = t / FREE_MEMORY_PCT + 1;
		if (__unlikely(t > max_io_free_target)) t = max_io_free_target;
		max_kernel_free_target = t;

#if KERNEL_HIGH_ZONE
		t = __likely(kz + iz + hz >= VM_STATS[VM_TYPE_WIRED_MAPPED] + VM_STATS[VM_TYPE_WIRED_UNMAPPED]) ? kz + iz + hz - VM_STATS[VM_TYPE_WIRED_MAPPED] - VM_STATS[VM_TYPE_WIRED_UNMAPPED] : 0;
		t = t / FREE_MEMORY_PCT + 1;
		max_high_free_target = t;
#endif
		/*__debug_printf("kz: %d, iz %d, hz %d\n", kz, iz, hz);
		__debug_printf("wm: %d, wunm: %d\n", VM_STATS[VM_TYPE_WIRED_MAPPED], VM_STATS[VM_TYPE_WIRED_UNMAPPED]);
		__debug_printf("watermarks: %d, %d\n", max_kernel_free_target, max_io_free_target);*/
	} else {
		max_kernel_free_target = wm;
		max_io_free_target = wm;
#if KERNEL_HIGH_ZONE
		max_high_free_target = wm;
#endif
	}
}

static void freemem_timer_fn(TIMER *t)
{
	/* SPL_TOP active here */
	u_jiffies_lo_t j = KERNEL$GET_JIFFIES_LO();
	if (__likely(kernel_free_target != 0) && __unlikely(can_decrease(j, last_kernel_target_incr))) {
		kernel_free_target--;
		last_kernel_target_incr = j - TARGET_INCREASE_LATENCY;
	}
	if (__likely(io_free_target != 0) && __unlikely(can_decrease(j, last_io_target_incr))) {
		io_free_target--;
		last_io_target_incr = j - TARGET_INCREASE_LATENCY;
	}
#if KERNEL_HIGH_ZONE
	if (__likely(high_free_target != 0) && __unlikely(can_decrease(j, last_high_target_incr))) {
		high_free_target--;
		last_high_target_incr = j - TARGET_INCREASE_LATENCY;
	}
#endif
	if (j - last_user_target_decr > USER_TARGET_DECREASE_LATENCY) {
		user_free_target -= ((user_free_target + 3) >> 2);
		last_user_target_decr = j;
	}
	LOWER_SPL(SPL_TIMER);
	if (__likely(!FREE_UNDER_KERNEL_MIN())) {
		MAX_IO_IN_PROGRESS -= (MAX_IO_IN_PROGRESS - VM_MAXIO + 3) >> 2;
	}
	WQ_WAKE_ALL(&KERNEL$FREEMEM_WAIT);
	WQ_WAKE_ALL(&WRITEBACK_WAIT);
	WQ_WAKE_ALL(&FREEMEM_ABOVE);
	WQ_WAKE_ALL(&FREE_ENTITIES);
	if (__unlikely(!--VM_FREEMEM_WATERMARK_REFRESH)) {
		VM_REFRESH_WATERMARKS();
	}
	KERNEL$SET_TIMER(FREEMEM_JIFFIES, &freemem_timer);
}

WQ *KERNEL$MAY_ALLOC(PROC *p, unsigned size)
{
	int spl;
	unsigned long freepg;
	if (__unlikely(p == &KERNEL$PROC_KERNEL)) return NULL;
	spl = KERNEL$SPL;
	RAISE_SPL(SPL_TOP);
	UNIFY_ALLOC_RATE(p);
	LOWER_SPLX(spl);
	/*if (__unlikely(!p->alloc_rate)) goto ok_to_alloc;*/
	freepg = p->alloc_rate >> (PG_SIZE_BITS + PG_CLUSTER_BITS);
#if !KERNEL_HIGH_ZONE
	if (__unlikely((freepg << 2) > max_io_free_target)) {
		freepg = (max_io_free_target >> 2) + ((freepg - (max_io_free_target >> 2)) >> 2);
		if (__unlikely((freepg << 1) > max_io_free_target))
			freepg = max_io_free_target >> 1;
	}
	if ((freepg << 1) > user_free_target) user_free_target = freepg << 1;
	if (__unlikely(freepg + VM_ENTITIES > VM_ENTITIES_HARD_LIMIT) && __likely(freepg != 0)) {
		spl = KERNEL$SPL;
		RAISE_SPL(SPL_TOP);
		MEMORY_BALANCE();
		LOWER_SPLX(spl);
		return &FREE_ENTITIES;
	}
	if (__unlikely(IO_ZONE.n_free + KERNEL_ZONE.n_free < freepg + io_free_target)) {
		spl = KERNEL$SPL;
		RAISE_SPL(SPL_TOP);
		MEMORY_BALANCE();
		LOWER_SPLX(spl);
		/*if (__unlikely(!KERNEL_ZONE.n_free)) return NULL;*/
		return &FREEMEM_ABOVE;
	}
#else
	if (__unlikely((freepg << 1) > max_high_free_target)) freepg = max_high_free_target >> 1;
	if ((freepg << 1) > user_free_target) user_free_target = freepg << 1;
	if (__unlikely(freepg + VM_ENTITIES > VM_ENTITIES_HARD_LIMIT) && __likely(freepg != 0)) {
		spl = KERNEL$SPL;
		RAISE_SPL(SPL_TOP);
		MEMORY_BALANCE();
		LOWER_SPLX(spl);
		return &FREE_ENTITIES;
	}
	if (__unlikely(HIGH_ZONE.n_free + IO_ZONE.n_free + KERNEL_ZONE.n_free <= freepg + high_free_target)) {
		spl = KERNEL$SPL;
		RAISE_SPL(SPL_TOP);
		MEMORY_BALANCE();
		LOWER_SPLX(spl);
		/*if (__unlikely(!KERNEL_ZONE.n_free)) return NULL;*/
		return &FREEMEM_ABOVE;
	}
#endif
	/*ok_to_alloc:*/
	spl = KERNEL$SPL;
	RAISE_SPL(SPL_TOP);
	if (__unlikely((p->alloc_rate += size) > MAXLONG)) p->alloc_rate = MAXLONG;
	LOWER_SPLX(spl);
	return NULL;
}


void KERNEL$NOTIFY_FREE(PROC *p, unsigned size)
{
	unsigned long pa;
	int spl = KERNEL$SPL;
	RAISE_SPL(SPL_TOP);
	pa = p->alloc_rate - size;
	pa &= (pa / __LONG_SGN_BIT) - 1;
	p->alloc_rate = pa;
	LOWER_SPLX(spl);
}

void KERNEL$NOTIFY_ALLOC(PROC *p, unsigned size)
{
	int spl = KERNEL$SPL;
	RAISE_SPL(SPL_TOP);
	p->alloc_rate += size;
	if (__unlikely(p->alloc_rate > MAXLONG)) p->alloc_rate = MAXLONG;
	LOWER_SPLX(spl);
}

static int dirty_wait_until_idle = 0;

WQ *KERNEL$MAY_DIRTY(PROC *p, unsigned mode)
{
	if (__likely(!mode)) {
		if (__likely(p->writeback <= 1 + VM_WIRED_STREAM_QUEUE + VM_WIRED_STREAM_QUEUE / 4)) return NULL;
	} else {
		if (!p->writeback) return NULL;
		dirty_wait_until_idle = 1;
	}
	if (__unlikely(p == &KERNEL$PROC_KERNEL)) return NULL;
	return &WRITEBACK_WAIT;
}

/* !!! TODO: acquire tag also on all dirty pages of to-be-written fnode */
void KERNEL$ACQUIRE_WRITEBACK_TAG(PROC *proc)
{
	__ADDI(&proc->writeback, 1);
}

void KERNEL$RELEASE_WRITEBACK_TAG(PROC *proc)
{
	__SUBI(&proc->writeback, 1);
	if (__unlikely(dirty_wait_until_idle)) {
		if (!proc->writeback) {
			WQ_WAKE_ALL_PL(&WRITEBACK_WAIT);
			dirty_wait_until_idle = 0;
		}
	}
	if (proc->writeback == (VM_WIRED_STREAM_QUEUE >> 1) + 1) WQ_WAKE_ALL(&WRITEBACK_WAIT);
#if __DEBUG >= 1
	if (__unlikely(proc->writeback < 0))
		KERNEL$SUICIDE("KERNEL$RELEASE_WRITEBACK_TAG: WRITEBACK COUNTER UNDERFLOW: %d", proc->writeback);
#endif
	if (__unlikely(proc->flags & PR_RUNDOWN) && !proc->writeback)
/* SMP warning, process may acquire another tag immediatelly while >= SPL_DEV */
		DELAYED_SHUTDOWN();
}

void KERNEL_VM_INIT(void)
{
	if (sizeof(PAGE) != PG_SIZEOF_STRUCT_PAGE || PG_SIZEOF_STRUCT_PAGE != (1 << PG_SIZEOF_STRUCT_PAGE_BITS) || PG_SIZEOF_STRUCT_PAGE & (PG_SIZEOF_STRUCT_PAGE - 1))
		KERNEL$SUICIDE("KERNEL_VM_INIT: BAD PAGE SIZE: %d, %d, %d", (int)sizeof(PAGE), PG_SIZEOF_STRUCT_PAGE, PG_SIZEOF_STRUCT_PAGE_BITS);
	memset(&VM_STATS, 0, sizeof VM_STATS);
#if KERNEL_HIGH_ZONE
	zone_init(HIGH_ZONE);
	high_free_target = 0;
#endif
	zone_init(IO_ZONE);
	io_free_target = 0;
	zone_init(KERNEL_ZONE);
	kernel_free_target = 0;
	user_free_target = 0;
	VM_FREEMEM_WATERMARK_REFRESH = 1;
}

void KERNEL_VM_INIT_2(void)
{
	freemem_timer.fn = freemem_timer_fn;
	KERNEL$SET_TIMER(FREEMEM_JIFFIES, &freemem_timer);
}

int KERNEL$QUERY_MEMORY(unsigned long *mem)
{
	int spl;
	memset(mem, 0, KQM_N_ENTRIES * sizeof(unsigned long));
	spl = KERNEL$SPL;
	RAISE_SPL(SPL_TOP);
	mem[KQM_KERNEL_FREE] = KERNEL_ZONE.n_free;
	mem[KQM_IO_FREE] = IO_ZONE.n_free;
#if KERNEL_HIGH_ZONE
	mem[KQM_HIGH_FREE] = HIGH_ZONE.n_free;
#endif
	memcpy(mem, VM_STATS, N_VM_TYPES * sizeof(unsigned long));
	LOWER_SPLX(spl);
	return 0;
}

unsigned long KERNEL_FREE_MEM(void)
{
	unsigned long f, t;
	if (__unlikely(VM_ENTITIES > VM_ENTITIES_SOFT_LIMIT)) return 0;
#if !KERNEL_HIGH_ZONE
	f = KERNEL_ZONE.n_free + IO_ZONE.n_free;
	t = io_free_target + user_free_target;
#else
	f = KERNEL_ZONE.n_free + IO_ZONE.n_free + HIGH_ZONE.n_free;
	t = high_free_target + user_free_target;
#endif
	if (__likely(f >= t)) return f - t;
	return 0;
}

unsigned long KERNEL$VM_OOMKILL(void)
{
	char *e;
	unsigned long mem[KQM_N_ENTRIES];
	unsigned long freemem;
	__u64 res;
	unsigned long lres;
	if (__unlikely(KERNEL$QUERY_MEMORY(mem))) return 1;
	e = getenv("@KERNEL$CACHE_RESERVED");
	if (__unlikely(e != NULL) && __likely(!__get_64_number(e, e + strlen(e), 0, &res))) res /= PAGE_CLUSTER_SIZE;
	else res = KERNEL$GET_MEMORY_SIZE(VM_TYPE_USER_UNMAPPED) / (PAGE_CLUSTER_SIZE * CACHE_RESERVE_PCT);
#if !KERNEL_HIGH_ZONE
	res += (max_io_free_target << 1) + 1;
#else
	res += (max_high_free_target << 1) + 1;
#endif
	if (__unlikely(res > MAXULONG)) lres = MAXULONG;
	else lres = res;
	freemem = mem[VM_TYPE_CACHED_MAPPED] + mem[VM_TYPE_CACHED_UNMAPPED] + mem[KQM_KERNEL_FREE] + mem[KQM_IO_FREE] + mem[KQM_HIGH_FREE];
	if (freemem < lres) return lres - freemem;
	else return 0;
}

void KERNEL$MEMSTAT_DUMP(void)
{
	unsigned long active;
	unsigned long mem[KQM_N_ENTRIES];
	long *net_limit, *net_avail;
	unsigned long net_mem;
	if (__unlikely(KERNEL$QUERY_MEMORY(mem))) return;
	net_limit = dlsym(RTLD_DEFAULT, "NET$MEMORY_LIMIT");
	net_avail = dlsym(RTLD_DEFAULT, "NET$MEMORY_AVAIL");
	if (__likely(net_limit != NULL) && __likely(net_avail != NULL)) net_mem = *net_limit - *net_avail;
	else net_mem = 0;
	__critical_printf("RESERVED: %lu, NET MAPPED: %lu, WIRED MAPPED: %lu, WIRED UNMAPPED: %lu\nCACHED MAPPED: %lu, CACHED UNMAPPED: %lu, USER MAPPED: %lu, USER UNMAPPED: %lu\nKERNEL FREE: %lu, IO FREE: %lu, HIGH FREE: %lu\n", mem[VM_TYPE_RESERVED], net_mem, mem[VM_TYPE_WIRED_MAPPED] - net_mem, mem[VM_TYPE_WIRED_UNMAPPED], mem[VM_TYPE_CACHED_MAPPED], mem[VM_TYPE_CACHED_UNMAPPED], mem[VM_TYPE_USER_MAPPED], mem[VM_TYPE_USER_UNMAPPED], mem[KQM_KERNEL_FREE], mem[KQM_IO_FREE], mem[KQM_HIGH_FREE]);
	__critical_printf("KERNEL TARGET: %lu, IO TARGET: %lu, HIGH TARGET: %lu, USER TARGET: %lu\n", kernel_free_target, io_free_target,
#if !KERNEL_HIGH_ZONE
	0L
#else
	high_free_target
#endif
	, user_free_target);
	active = CACHE_ACTIVE_ENTITIES();
	__critical_printf("VM ENTITIES: %lu, ACTIVE %lu, INACTIVE %lu\n", VM_ENTITIES, active, VM_ENTITIES - active);
	__critical_printf("SWAPIO: %d (MAX %d), BALANCE: %d, CACHE FREE: %d, PRESSURE: %d, TIMER: %d\n", IO_IN_PROGRESS, MAX_IO_IN_PROGRESS, memory_balance_in_progress, VM_PRESSURE, cache_free_in_progress, vm_timer_pending);
}

void VM_CHECK_MAGICS(void)
{
#ifdef TEST_MEMORY
	int i;
	int spl = KERNEL$SPL;
	RAISE_SPL(SPL_TOP);
#if !KERNEL_HIGH_ZONE
	for (i = 0; i < 2; i++) {
		struct pg_zone *z = !i ? &KERNEL_ZONE : &IO_ZONE;
#else
	for (i = 0; i < 3; i++) {
		struct pg_zone *z = !i ? &KERNEL_ZONE : i == 1 ? &IO_ZONE : &HIGH_ZONE;
#endif
		PAGE *p;
		LIST_FOR_EACH(p, &z->freelist, PAGE, hash_entry) {
			__u32 *v = KERNEL$MAP_PHYSICAL_PAGE(p);
			if (__unlikely(v[TEST_OFFSET_1] != MAGIC_1) ||
			    __unlikely(v[TEST_OFFSET_2] != MAGIC_2) ||
			    __unlikely(v[TEST_OFFSET_3] != MAGIC_3)) {
				TEST_PAGE_FAILED(p, v);
			}
			KERNEL$UNMAP_PHYSICAL_BANK(v);
		}
	}
	LOWER_SPLX(spl);
#endif
}
