#include <SPAD/LIBC.H>
#include <SPAD/LIST.H>
#include <SYS/TYPES.H>
#include <ARCH/SETUP.H>
#include <KERNEL/ASM.H>
#include <KERNEL/VM.H>
#include <KERNEL/PROC.H>
#include <SPAD/SLAB.H>
#include <KERNEL/UDATA.H>
#include <ARCH/CPU.H>
#include <ARCH/BITOPS.H>
#include <ARCH/IO.H>
#include <SPAD/SYSLOG.H>
#include <KERNEL/PARAMS.H>
#include <VALUES.H>

#include <KERNEL/VM_ARCH.H>

#define MAX_MAP_SWAPS		32

unsigned long KERNEL$STACKPAGE = KUVMBASE;

int FOLD_DL;

#if !__KERNEL_USE_PAE
typedef __u32 paddr_t;
#else
typedef __u64 paddr_t;
#endif

struct rmap {
	LIST_ENTRY list;
	struct pgtbl *pgtbl;
	unsigned idx;
};

struct pgtbl {
	paddr_t e[1024];	/* must be at least PG_SIZE large because it is used for iobmp and ldt pages too */
		/* some places in this code assume that non-present PTE won't have PTE_RW bit set */
	struct rmap rmap[1024 / PG_CLUSTER];
	LIST_ENTRY list;
	PROC *proc;
	unsigned idx;
	unsigned n_mappings;
	VMENTITY vm;
};

extern paddr_t VM_ZERO_PAGE_TABLE[1024];

__u32 VM_ZERO_PAGE_TABLE_PHYS;

#define VM_ZERO_PAGE_TABLE_ENTRY	(VM_ZERO_PAGE_TABLE_PHYS | PTE_P | PTE_RW | PTE_US | PTE_A)

#if !__KERNEL_USE_PAE
#define SET_PGDIR_ENTRY(pgd, off, entry) ((pgd)->pgdir[off].e = (entry))
#define SET_PGDIR_ENTRY_BIG SET_PGDIR_ENTRY
#define SET_PGDIR_ENTRY_EMPTY(pgd, off, val) SET_PGDIR_ENTRY(pgd, off, (val) << 1)
#define CMP_PGDIR_ENTRY(pgd, off, entry) ((pgd)->pgdir[off].e == (entry))
#define GET_PGDIR_ENTRY(pgd, off) ((pgd)->pgdir[off].e)
#else
#define SET_PGDIR_ENTRY(pgd, off, entry) ((pgd)->pgdir[off].e1 = (entry), (pgd)->pgdir[off].e2 = (entry) | 0x1000)
#define SET_PGDIR_ENTRY_BIG(pgd, off, entry) ((pgd)->pgdir[off].e1 = (entry), (pgd)->pgdir[off].e2 = (entry) | (0x200000 - ((((__u32)(entry) & PTE_PS) - 1) & 0x1ff000)))
#define SET_PGDIR_ENTRY_EMPTY(pgd, off, val) ((pgd)->pgdir[off].e1 = (__u32)(val) << 1, (pgd)->pgdir[off].e2 = 0)
#define CMP_PGDIR_ENTRY(pgd, off, entry) ((pgd)->pgdir[off].e1 == (entry))
#define GET_PGDIR_ENTRY(pgd, off) ((pgd)->pgdir[off].e1)
#endif

#define SPAGE_PADDING_ENTRY	(ZERO_PAD_PHYS + PTE_P + PTE_US + PTE_A)
#define IS_SPAGE_PADDING(e)	(((e) | PTE_A) == SPAGE_PADDING_ENTRY)

#define BANKS	(PG_MAXPAGES / PG_BANK)

int PBANKS[BANKS];
int N_PBANKS;
static __u64 MEMSIZE;
static int N_DIRECT_PBANKS;
static __u32 DIRECT_SIZE;	/* == N_DIRECT_PBANKS << (PG_BANK_BITS + PG_SIZE_BITS); */

static int VM_PBANK_MAP[BANKS];
static int VM_VBANK_MAP[1024];
#define VBANK_FREE		(-1)		/* must be -1 */
#define VBANK_RESERVED		(-MAXINT)	/* any number that doesn't interfere with others */
static int VM_VBANK_MAP_CLOCK;
static int N_USEABLE_VBANKS;

static unsigned long PAGE_ARRAY_SIZE;

#if !__KERNEL_USE_PAE
static __u32 GLOBAL_BIT = 0;
#define HAS_4M_PAGES	(KERNEL$CPU_FEATURES & CPU_HAS_4M_PAGES)
#else
#define GLOBAL_BIT	PTE_G
#define HAS_4M_PAGES	1
#endif

#define PAT2PTE(pat)	((((pat) << P_PTE_PWT) & (PTE_PWT | PTE_PCD)) | (((pat) << (P_PTE_PAT_PTE - __BSR_CONST(PAT_HIGH_BIT))) & PTE_PAT_PTE) | ((~(pat) >> (__BSR_CONST(PAT_RO) - P_PTE_RW)) & PTE_RW))
#define PTE2PAT(pte)	((((__u32)(pte) >> P_PTE_PWT) & 3) | (((__u32)(pte) >> (P_PTE_PAT_PTE - __BSR_CONST(PAT_HIGH_BIT))) & PAT_HIGH_BIT) | ((~(__u32)(pte) << (__BSR_CONST(PAT_RO) - P_PTE_RW)) & PAT_RO))
#define PAT2PDE(pat)	((((pat) << P_PTE_PWT) & (PTE_PWT | PTE_PCD)) | (((pat) << (P_PTE_PAT_PDE - __BSR_CONST(PAT_HIGH_BIT))) & PTE_PAT_PDE) | ((~(pat) >> (__BSR_CONST(PAT_RO) - P_PTE_RW)) & PTE_RW))
#define PDE2PAT(pte)	((((__u32)(pte) >> P_PTE_PWT) & 3) | (((__u32)(pte) >> (P_PTE_PAT_PDE - __BSR_CONST(PAT_HIGH_BIT))) & PAT_HIGH_BIT) | ((~(__u32)(pte) << (__BSR_CONST(PAT_RO) - P_PTE_RW)) & PAT_RO))

#define PHYSMAP_PTE(i, pat)	((i) | (PTE_P | PTE_A | PTE_D | PAT2PTE(pat)))
#define PHYSMAP_PDE(i, pat)	((i) | (PTE_P | PTE_A | PTE_D | PTE_PS | PAT2PDE(pat)))

#define VM_MIN_USEABLE_VBANKS ((SPL_TOP - SPL_ZERO + 1) * 2)

extern __u32 BASE_PHYSMAP_486[PG_BANK * 2];

paddr_t PHYSMAP_486[BANKS];

#define PHYSMAP_ENTRY(x, g)	(PHYSMAP_486[(unsigned)x] | ((g) ? GLOBAL_BIT : 0))

static int MEMMAP_REFCOUNTS[MEMMAP_PAGES];

static void VM_INIT_486MAP(void)
{
	unsigned i;
	memset(PHYSMAP_486, 0, sizeof PHYSMAP_486);
	if (__likely(HAS_4M_PAGES)) {
		for (i = 0; i < 2; i++) PHYSMAP_486[i] = PHYSMAP_PDE(i << (PG_BANK_BITS + PG_SIZE_BITS), PAT_WB);
	} else {
		for (i = 0; i < sizeof(BASE_PHYSMAP_486) / sizeof(*BASE_PHYSMAP_486); i++) {
			BASE_PHYSMAP_486[i] = PHYSMAP_PTE(i << PG_SIZE_BITS, PAT_WB);
			if (__unlikely(!(i & (PG_BANK - 1)))) {
	/* this function is run without paging, so physical address is the same
	   as virtual */
				PHYSMAP_486[i / PG_BANK] = (unsigned long)(BASE_PHYSMAP_486 + i) | PTE_P | PTE_RW | PTE_A;
			}
		}
	}
}

static void VM_INIT_ZERO_PAGE_TABLE(void)
{
	memset(&VM_ZERO_PAGE_TABLE, 0, sizeof VM_ZERO_PAGE_TABLE);
	VM_ZERO_PAGE_TABLE[RESERVED_APAGE] = (__u32)APAGE | PTE_P | PTE_US | PTE_A | GLOBAL_BIT;
	VM_ZERO_PAGE_TABLE[RESERVED_TSS] = (__u32)TSS_PAGE | PTE_P | PTE_RW | PTE_A | PTE_D | GLOBAL_BIT;
	VM_ZERO_PAGE_TABLE_PHYS = (__u32)VM_ZERO_PAGE_TABLE;
	memset(&MEMMAP_REFCOUNTS, 0, sizeof MEMMAP_REFCOUNTS);
}

static void VM_INIT_BOOT_BANKS(void)
{
		/* this is executed in unpaged mode ... */
	int i;
	memset(&KERNEL$PROC_KERNEL.pgdir, 0, sizeof KERNEL$PROC_KERNEL.pgdir);
#if __KERNEL_USE_PAE != 0
	for (i = 0; i < 4; i++) {
		KERNEL$PROC_KERNEL.pgdir1[i] = (__u64)&KERNEL$PROC_KERNEL.pgdir[i * 256];
	}
#endif
	SET_PGDIR_ENTRY_BIG(&KERNEL$PROC_KERNEL, VM_KERNEL_DIRECT_BANK, PHYSMAP_ENTRY(0, 1));
	SET_PGDIR_ENTRY_BIG(&KERNEL$PROC_KERNEL, VM_KERNEL_DIRECT_BANK + 1, PHYSMAP_ENTRY(1, 1));
	SET_PGDIR_ENTRY(&KERNEL$PROC_KERNEL, VM_KERNEL_RESERVED_BANK, VM_ZERO_PAGE_TABLE_ENTRY);
	memset(PBANKS, 0, sizeof PBANKS);
	PBANKS[0] = PBANK_CODE | PBANK_ISADMA;	/* when memory is extended to 12M, DMA flag will be cleared */
	PBANKS[1] = PBANK_DATA | PBANK_ISADMA;
	N_PBANKS = 2;
	N_DIRECT_PBANKS = 2;
	MEMSIZE = 2 * PG_SIZE * PG_BANK;
	DIRECT_SIZE = N_DIRECT_PBANKS << (PG_BANK_BITS + PG_SIZE_BITS);
	memset(VM_PBANK_MAP, 0, sizeof VM_PBANK_MAP);
	for (i = 0; i < VM_KERNEL_DIRECT_BANK; i++) VM_VBANK_MAP[i] = VBANK_RESERVED;
	for (; i < 1024; i++) VM_VBANK_MAP[i] = VBANK_FREE;
	VM_VBANK_MAP[VM_KERNEL_DIRECT_BANK] = VBANK_RESERVED;
	VM_VBANK_MAP[VM_KERNEL_DIRECT_BANK + 1] = VBANK_RESERVED;
	VM_VBANK_MAP[VM_KERNEL_RESERVED_BANK] = VBANK_RESERVED;
	VM_VBANK_MAP[VM_KERNEL_COPY_OF_LAST_BANK] = VBANK_RESERVED;
	N_USEABLE_VBANKS = VM_KERNEL_RESERVED_BANK - VM_KERNEL_DIRECT_BANK - 2;
	VM_VBANK_MAP_CLOCK = 0;
}

extern void PAGE_INIT;

extern struct pgtbl PAGE_PGTABLE;

static void VM_INIT_PAGE_MAP(void)
{
	int i;
	memset(&PAGE_PGTABLE, 0, sizeof PAGE_PGTABLE);
	for (i = 0; i < PG_CLUSTER; i++) {
		PAGE_PGTABLE.e[i] = ((unsigned long)&PAGE_INIT + (i * PG_SIZE)) | PTE_P | PTE_RW | PTE_A | PTE_D | GLOBAL_BIT;
	}
	PAGE_ARRAY_SIZE = PG_SIZE * PG_CLUSTER;
	SET_PGDIR_ENTRY(&KERNEL$PROC_KERNEL, VM_KERNEL_PAGE_BANK, (unsigned long)&PAGE_PGTABLE | PTE_P | PTE_RW | PTE_A);
}

#define N_HOLES		1024

static __u64 holes[N_HOLES * 2];

static int n_holes;

static void VM_INIT_HOLES(void)
{
	n_holes = 2;
	holes[0] = 0x0;
	holes[1] = 0x10000;
	holes[2] = 0xa0000;
	holes[3] = 0x100000;
}

void *VM_BOOT_INIT(void)
{
#if !__KERNEL_USE_PAE
	if (KERNEL$CPU_FEATURES & CPU_HAS_GLOBAL_PAGES) GLOBAL_BIT = PTE_G;
#endif
	if ((KERNEL$CPU_FEATURES & (CPU_HAS_4M_PAGES | CPU_INVLPG_BIG_PAGES)) == (CPU_HAS_4M_PAGES | CPU_INVLPG_BIG_PAGES)) FOLD_DL = 0;
	else FOLD_DL = 1;
	VM_INIT_486MAP();
	VM_INIT_ZERO_PAGE_TABLE();
	VM_INIT_BOOT_BANKS();
	VM_INIT_PAGE_MAP();
	VM_INIT_HOLES();
	SET_PGDIR_ENTRY_BIG(&KERNEL$PROC_KERNEL, 0, PHYSMAP_ENTRY(0, 0));
	return
#if !__KERNEL_USE_PAE
	KERNEL$PROC_KERNEL.pgdir;
#else
	KERNEL$PROC_KERNEL.pgdir1;
#endif
}

static unsigned long START_FREE_LATER = 0;
static unsigned long END_FREE_LATER = 0;

static void ADD_PAGE(PAGE *p, int res)
{
	int i;
	paddr_t addr;
	if (__unlikely(res)) goto vm_add;
	addr = PAGE_2_PHYS(p);
	if (__unlikely(addr >= MEMSIZE)) {
		res = 1;
		goto vm_add;
	}
	for (i = 0; i < n_holes; i++) if (addr + PAGE_CLUSTER_SIZE > holes[i * 2] && __unlikely(addr < holes[i * 2 + 1])) {
		res = 1;
		goto vm_add;
	}
	vm_add:
	VM_ADD_PAGE(p, res);
}

void VM_BOOT_GETMEM(unsigned long code_bottom, unsigned long free_later, unsigned long code_top, unsigned long data_bottom, unsigned long data_top, unsigned long mem_top)
{
	PAGE *p;
	KERNEL_VM_INIT();
	PROC_CURRENT = &KERNEL$PROC_KERNEL;
	PROC_CURRENT_LOCK = 0;
	SET_PGDIR_ENTRY_EMPTY(&KERNEL$PROC_KERNEL, 0, 0);
	TLB_INVD_NG();
	if (code_top >= mem_top || data_top >= mem_top) KERNEL$SUICIDE("VM_BOOT_GETMEM: KERNEL TOO LARGE: CODE %08lX - %08lX, DATA %08lX - %08lX, TOP %08lX", code_bottom, code_top, data_bottom, data_top, mem_top);
	START_FREE_LATER = free_later;
	END_FREE_LATER = code_top;
	for (p = KERNEL_PAGE_MAP + mem_top / PG_SIZE / PG_CLUSTER - 1; p >= KERNEL_PAGE_MAP; p--) {
		paddr_t addr = PAGE_2_PHYS(p);
		ADD_PAGE(p, (addr >= code_bottom && addr < code_top) || (addr >= data_bottom && addr < data_top) ? 2 : 0);
	}
	REFRESH_PARAMS();
}

void VM_BOOT_GET_MORE_MEM(void)
{
	PAGE *p;
	for (p = KERNEL_PAGE_MAP + END_FREE_LATER / PG_SIZE / PG_CLUSTER - 1; p >= KERNEL_PAGE_MAP + START_FREE_LATER / PG_SIZE / PG_CLUSTER; p--) KERNEL$FREE_USER_PAGE(p, VM_TYPE_WIRED_MAPPED);
}

void *KERNEL$MAP_PHYSICAL_BANK(__p_addr physaddr_)
{
	int spl;
	__u32 e;
	paddr_t physaddr = physaddr_ & (((paddr_t)PG_MAXPAGES << PG_SIZE_BITS) - 1);
	int *pbank_map;
	int vbank;
	if (physaddr < DIRECT_SIZE) {
#if __DEBUG >= 2
		LOCKDOWN_PROC();
#endif
		return (void *)(VM_KERNEL_DIRECT_OFFSET + (__u32)physaddr);
	}
	LOCKDOWN_PROC();
	pbank_map = &VM_PBANK_MAP[physaddr >> (PG_SIZE_BITS + PG_BANK_BITS)];
	if (__likely(*pbank_map)) {
		vbank = *pbank_map;
		if (__unlikely(VM_VBANK_MAP[vbank] < VBANK_FREE)) goto find_new;
		__asm__ volatile("INCL VM_VBANK_MAP(, %0, 4)" : : "r"(vbank) : "memory", "cc");
		if (__likely(CMP_PGDIR_ENTRY(PROC_CURRENT, vbank, PHYSMAP_ENTRY(physaddr >> (PG_SIZE_BITS + PG_BANK_BITS), 0)))) {
			goto ret;
		}
		if (__likely(!VM_VBANK_MAP[vbank])) {
			/* try to avoid trashing */
			u_jiffies_lo_t j;
			j = KERNEL$GET_JIFFIES_LO();
			if (j != PROC_CURRENT->last_map_swap_time) {
				PROC_CURRENT->last_map_swap_time = j;
				PROC_CURRENT->n_map_swaps = 0;
				goto fnd;
			}
			if (__unlikely(PROC_CURRENT->last_swapped_map == vbank)) goto no_fnd;
			PROC_CURRENT->last_swapped_map = vbank;
			if (__unlikely(++PROC_CURRENT->n_map_swaps >= MAX_MAP_SWAPS)) {
				PROC_CURRENT->n_map_swaps = 0;
				goto no_fnd;
			}
			goto fnd;
			no_fnd:;
		}
		__asm__ volatile("DECL VM_VBANK_MAP(, %0, 4)" : : "r"(vbank) : "memory", "cc");
	}
	find_new:
	__asm__ volatile ("					\n\
2:	INCL	VM_VBANK_MAP(, %0, 4)				\n\
	JNE	1f						\n\
	.SECTION .text.end					\n\
	.ALIGN	"__stringify(__CPU_BRANCH_ALIGN)"		\n\
1:	DECL	VM_VBANK_MAP(, %0, 4)				\n\
	INCL	%0						\n\
	CMPL	$1024, %0					\n\
	JB	2b						\n\
	MOVL	N_DIRECT_PBANKS, %0				\n\
	ADDL	$"__stringify(VM_KERNEL_DIRECT_BANK)", %0	\n\
	JMP	2b						\n\
	.PREVIOUS" : "=r"(vbank) : "0"(VM_VBANK_MAP_CLOCK) : "memory", "cc");
	if (__likely(vbank < 1024 - 1)) VM_VBANK_MAP_CLOCK = vbank + 1;
	else VM_VBANK_MAP_CLOCK = VM_KERNEL_DIRECT_BANK + N_DIRECT_PBANKS;
	*pbank_map = vbank;
	fnd:
	spl = KERNEL$SPL;
	RAISE_SPL(SPL_TOP);
	/* it may happen that interrupt here attempts to use new map before we
	   invalidate TLB */
	e = GET_PGDIR_ENTRY(PROC_CURRENT, vbank);
	SET_PGDIR_ENTRY_BIG(PROC_CURRENT, vbank, PHYSMAP_ENTRY(physaddr >> (PG_SIZE_BITS + PG_BANK_BITS), 0));
	/* can use invlpg if
		CPU has dedicated TLB for big entries  and
		mapping has not been split because of PAT settings  and
		it is not 4M page at physical offset 0 (these probably use
		split TLB entries becuase they may have different MTRR)

		if the mapping is non-present, it has PTE_PS bit also zero,
		so we will invalidate whole TLB. That is right because we don't
		know what has been mapped here before.
	*/
	if (__likely(KERNEL$CPU_FEATURES & CPU_INVLPG_BIG_PAGES) && __likely(e & PTE_PS) && __likely(e & ~(PG_SIZE * PG_BANK - 1))) TLB_INVD_PG(vbank << (PG_SIZE_BITS + PG_BANK_BITS));
	else TLB_INVD_NG();
	LOWER_SPLX(spl);
	ret:
	return (void *)((vbank << (PG_SIZE_BITS + PG_BANK_BITS)) + ((__u32)physaddr & (PG_SIZE * PG_BANK - 1)));
}

/* KERNEL$UNMAP_PHYSICAL_BANK and KERNEL$UNMAP_PHYSICAL_BANK_ADDR may be called
   with different address from the same bank */

void KERNEL$UNMAP_PHYSICAL_BANK(void *ptr)
{
	if ((unsigned long)ptr < (VM_KERNEL_DIRECT_BANK << (PG_SIZE_BITS + PG_BANK_BITS)) + DIRECT_SIZE) {
#if __DEBUG >= 2
		UNLOCKDOWN_PROC();
#endif
		return;
	}
	__asm__ volatile("DECL VM_VBANK_MAP(, %0, 4)" : : "r"((unsigned long)(ptr) >> (PG_SIZE_BITS + PG_BANK_BITS)) : "memory", "cc");
	UNLOCKDOWN_PROC();
}

__p_addr KERNEL$UNMAP_PHYSICAL_BANK_ADDR(void *ptr)
{
	paddr_t p;
	if ((unsigned long)ptr < (VM_KERNEL_DIRECT_BANK << (PG_SIZE_BITS + PG_BANK_BITS)) + DIRECT_SIZE) {
#if __DEBUG >= 2
		UNLOCKDOWN_PROC();
#endif
		return (unsigned long)ptr - VM_KERNEL_DIRECT_OFFSET;
	}
	p = GET_PGDIR_ENTRY(PROC_CURRENT, (unsigned long)ptr >> (PG_SIZE_BITS + PG_BANK_BITS));
	if (__unlikely(!(p & PTE_PS))) {
		p = *(paddr_t *)((p & ~(paddr_t)(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
	}
	KERNEL$UNMAP_PHYSICAL_BANK(ptr);
	return (p & ~(paddr_t)(PG_SIZE * PG_BANK - 1)) | ((unsigned long)ptr & (PG_SIZE * PG_BANK - 1));
}

PAGE *KERNEL$UNMAP_PHYSICAL_BANK_PAGE(void *ptr)
{
	paddr_t p;
	if ((unsigned long)ptr < (VM_KERNEL_DIRECT_BANK << (PG_SIZE_BITS + PG_BANK_BITS)) + DIRECT_SIZE) {
#if __DEBUG >= 2
		UNLOCKDOWN_PROC();
#endif
		return PHYS_2_PAGE((unsigned long)ptr - VM_KERNEL_DIRECT_OFFSET);
	}
	p = GET_PGDIR_ENTRY(PROC_CURRENT, (unsigned long)ptr >> (PG_SIZE_BITS + PG_BANK_BITS));
	if (__unlikely(!(p & PTE_PS))) {
		p = *(paddr_t *)((p & ~(paddr_t)(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
	}
	KERNEL$UNMAP_PHYSICAL_BANK(ptr);
	return PHYS_2_PAGE((p & ~(paddr_t)(PG_SIZE * PG_BANK - 1)) | ((unsigned long)ptr & (PG_SIZE * PG_BANK - 1)));
}

void *KERNEL$MAP_PHYSICAL_PAGE(PAGE *p)
{
	if (p < KERNEL_PAGE_MAP + (N_DIRECT_PBANKS << (PG_BANK_BITS - PG_CLUSTER_BITS))) {
#if __DEBUG >= 2
		LOCKDOWN_PROC();
#endif
		return (void *)((((char *)p - (char *)KERNEL_PAGE_MAP) << (PG_SIZE_BITS + PG_CLUSTER_BITS - PG_SIZEOF_STRUCT_PAGE_BITS)) + VM_KERNEL_DIRECT_OFFSET);
	}
	return KERNEL$MAP_PHYSICAL_BANK((paddr_t)PAGE_2_PHYS(p));
}

__p_addr KERNEL$PAGE_2_PHYS(PAGE *p)
{
	return PAGE_2_PHYS(p);
}

PAGE *KERNEL$PHYS_2_PAGE(__p_addr p)
{
	return PHYS_2_PAGE(p);
}

unsigned long KERNEL$VIRT_2_PHYS(void *virt)
{
	return (unsigned long)virt - VM_KERNEL_DIRECT_OFFSET;
}

void *KERNEL$PHYS_2_VIRT(unsigned long phys)
{
	return (void *)(phys + VM_KERNEL_DIRECT_OFFSET);
}

void *KERNEL$DMA_2_VIRT(unsigned long dma)
{
	return (void *)(dma + VM_KERNEL_DIRECT_OFFSET);
}

/* the resulting address must be divisible by page size */

__u32 KERNEL$MAP_PAGE_DMA(PAGE *p)
{
#if KERNEL_BOUNCE_NEEDED == 0
	return PAGE_2_PHYS(p);
#else
	!!! FIXME: bounce
#endif
}

PAGE *KERNEL$UNMAP_PAGE_DMA(__u32 d)
{
#if KERNEL_BOUNCE_NEEDED == 0
	return PHYS_2_PAGE(d);
#else
	!!! FIXME: unbounce
#endif
}

/* the resulting address must be divisible by page size */

__u64 KERNEL$MAP_PAGE_DMA64(PAGE *p)
{
	return PAGE_2_PHYS(p);
}

PAGE *KERNEL$UNMAP_PAGE_DMA64(__u64 d)
{
	return PHYS_2_PAGE(d);
}

static int DISTRIBUTE_MAPPING(PROC *p)
{
	if (__likely(p != &KERNEL$PROC_KERNEL)) {
		unsigned i;
		int spl = KERNEL$SPL;
		/* don't let anyone interrupting this call KERNEL$MAP_PHYSICAL_BANK */
		RAISE_SPL(SPL_TOP);
		for (i = __KERNEL_USER_VBANKS; i < VM_KERNEL_RESERVED_BANK; i++) {
			p->pgdir[i] = KERNEL$PROC_KERNEL.pgdir[i];
			if (__unlikely(SPLX_BUSY(spl))) {
				TLB_INVD_G();
				TEST_SPLX(spl, SPL_X(SPL_TOP));
			}
		}
		TLB_INVD_G();
		LOWER_SPLX(spl);
	}
	return 0;
}

static void DISTRIBUTE_PROC_KERNEL_MAPPING(void)
{
	int spl = KERNEL$SPL;
	if (SPLX_BELOW(spl, SPL_X(SPL_DEV))) RAISE_SPL(SPL_DEV);
	FOR_ALL_PROCS(&KERNEL$PROC_KERNEL, DISTRIBUTE_MAPPING);
	TLB_INVD_G();
	LOWER_SPLX(spl);
}

static AST *new_pbank_ast = NULL;
static int new_pbank_type;
extern AST_STUB new_pbank_bottom_fn;
static IORQ new_pbank_bottom = { new_pbank_bottom_fn };

int VM_ARCH_NEW_PBANK(int type, AST *ast)
{
	int i;
	if (ast && __unlikely(KERNEL$SPL != SPL_X(SPL_DEV))) KERNEL$SUICIDE("VM_ARCH_NEW_PBANK AT SPL %08X", KERNEL$SPL);
	if (__unlikely(type & PBANK_ISADMA) && N_DIRECT_PBANKS != 2) return 0;
	if (type & PBANK_DATA) {
		for (i = 0; i < N_DIRECT_PBANKS; i++) if (PBANKS[i] == PBANK_OTHER) {
			if (!ast) return 1;
			PBANKS[i] = type;
			VM_RESET_ZONES(i);
			CALL_AST(ast);
			return 1;
		}
	}
	if (__unlikely(N_DIRECT_PBANKS == N_PBANKS)) return 0;
	if (__unlikely(N_USEABLE_VBANKS <= VM_MIN_USEABLE_VBANKS)) return 0;
	if (__unlikely(VM_VBANK_MAP[VM_KERNEL_DIRECT_BANK + N_DIRECT_PBANKS] == VBANK_RESERVED)) return 0;
	if (!ast) return 1;
	if (__unlikely(new_pbank_ast != NULL)) KERNEL$SUICIDE("VM_ARCH_NEW_PBANK: RECURSIVE CALL");
	if (__unlikely(N_DIRECT_PBANKS == 2)) type = PBANK_ISADMA;
	new_pbank_ast = ast;
	new_pbank_type = type;
	CALL_AST(&new_pbank_bottom);
	return 1;
}

DECL_AST(new_pbank_bottom_fn, SPL_ZERO, AST)
{
	AST *ast;
	if (__unlikely(VM_VBANK_MAP[VM_KERNEL_DIRECT_BANK + N_DIRECT_PBANKS] != VBANK_FREE)) {
		/*KERNEL$SUICIDE("new_pbank_bottom_fn: SOMETHIG MAPPED UNDER AT %d: %d", VM_KERNEL_DIRECT_BANK + N_DIRECT_PBANKS, VM_VBANK_MAP[VM_KERNEL_DIRECT_BANK + N_DIRECT_PBANKS]);*/
		/* KERNEL$MAP_PHYSICAL_BANK at SPL_ZERO is allowed */
		new_pbank_bottom.status = RQS_PROCESSING;
		WQ_WAIT(&KERNEL$FREEMEM_WAIT, &new_pbank_bottom, KERNEL$SUCCESS);
		RETURN;
	}
	RAISE_SPL(SPL_TOP);
	VM_VBANK_MAP[VM_KERNEL_DIRECT_BANK + N_DIRECT_PBANKS] = VBANK_RESERVED;
	SET_PGDIR_ENTRY_BIG(&KERNEL$PROC_KERNEL, VM_KERNEL_DIRECT_BANK + N_DIRECT_PBANKS, PHYSMAP_ENTRY(N_DIRECT_PBANKS, 1));
	/* no need to flush TLB --- no one will be using new entry anyway since VBANK_RESERVED was set */
	LOWER_SPL(SPL_DEV);
	DISTRIBUTE_PROC_KERNEL_MAPPING();
	RAISE_SPL(SPL_TOP);
	PBANKS[N_DIRECT_PBANKS] = new_pbank_type;
	N_DIRECT_PBANKS++;
	DIRECT_SIZE = N_DIRECT_PBANKS << (PG_BANK_BITS + PG_SIZE_BITS);
	N_USEABLE_VBANKS--;
	if (__unlikely(new_pbank_type == PBANK_ISADMA)) {
		PBANKS[0] &= ~PBANK_ISADMA;
		PBANKS[1] &= ~PBANK_ISADMA;
	}
	LOWER_SPL(SPL_DEV);
	VM_RESET_ZONES(N_DIRECT_PBANKS - 1);
	REFRESH_PARAMS();
	ast = new_pbank_ast;
	new_pbank_ast = NULL;
	RETURN_AST(ast);
}

static int MAP_PAGE_BANK(int pbank, int last_pbank, unsigned long position)
{
	int b = position >> (PG_BANK_BITS + PG_SIZE_BITS);
	__u32 e;
	void *v1;
	int r, i;
	paddr_t *pgdir;
	if (__unlikely(pbank + b >= last_pbank)) return -ERANGE;
	e = GET_PGDIR_ENTRY(&KERNEL$PROC_KERNEL, pbank + b);
	if (__unlikely(!(e & PTE_P))) {
		again1:
#if !__KERNEL_USE_PAE
#define sz	4096
#else
#define sz	8192
#endif
		v1 = memalign(sz, sz);
		if (__unlikely(!v1)) {
			if (__unlikely(r = KERNEL$MEMWAIT_SYNC(sz))) return r;
			goto again1;
		}
		memset(v1, 0, sz);
		e = ((unsigned long)v1 - VM_KERNEL_DIRECT_OFFSET) | PTE_P | PTE_RW | PTE_A;
		SET_PGDIR_ENTRY(&KERNEL$PROC_KERNEL, pbank + b, e);
#undef sz
	}
	pgdir = (paddr_t *)((e & ~(__u32)(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
	b = (position >> PG_SIZE_BITS) & (PG_BANK - 1);
	if (__likely(!(pgdir[b] & PTE_P))) {
		again2:
		v1 = KERNEL$ALLOC_KERNEL_PAGE(VM_TYPE_WIRED_MAPPED);
		if (__unlikely(!v1)) {
			if (__unlikely(r = KERNEL$MEMWAIT_SYNC(PAGE_CLUSTER_SIZE))) return r;
			goto again2;
		}
		memset(v1, 0, PAGE_CLUSTER_SIZE);
		for (i = 0; i < PG_CLUSTER; i++) {
			pgdir[b + i] = ((unsigned long)v1 - VM_KERNEL_DIRECT_OFFSET + i * PG_SIZE) | PTE_P | PTE_RW | PTE_A | PTE_D | GLOBAL_BIT;
		}
	}
	return 0;
}

static paddr_t *ALLOC_PHYSMAP_PD(void)
{
	int r;
	paddr_t *pd;
#if !__KERNEL_USE_PAE
#define sz	(PG_BANK * sizeof(paddr_t))
#else
#define sz 	(PG_BANK * sizeof(paddr_t))
#endif
	again:
	pd = memalign(sz, sz);
	if (__unlikely(!pd)) {
		if (__unlikely(r = KERNEL$MEMWAIT_SYNC(sz))) return __ERR_PTR(r);
		goto again;
	}
	memset(pd, 0, sz);
	return pd;
#undef sz
}

static __u32 START_CACHEMODE_MODIFY(void)
{
	__u32 orig_cr0;
	KERNEL$DI();
	__asm__ volatile ("MOVL %%CR0, %0":"=r"(orig_cr0)::"memory");
	__asm__ volatile ("MOVL %0, %%CR0"::"r"((orig_cr0 | CR0_CD) & ~CR0_NW):"memory");
	CACHE_WBINVD_WHEN_NO_SELFSNOOP();
	TLB_INVD_G();
	return orig_cr0;
}

static void END_CACHEMODE_MODIFY(__u32 orig_cr0)
{
	CACHE_WBINVD_WHEN_NO_SELFSNOOP();
	TLB_INVD_G();
	__asm__ volatile ("MOVL %0, %%CR0"::"r"(orig_cr0):"memory");
	KERNEL$EI();
}

static int CREATE_PHYSMAP(unsigned i, unsigned from, unsigned to, int pat)
{
	unsigned j;
	__u32 orig_cr0 = 0;
	paddr_t *pd = NULL, *pd_to_free = NULL;
	if (__unlikely(PROC_CURRENT_LOCK)) KERNEL$SUICIDE("CREATE_PHYSMAP: CALLED WITH PROC_CURRENT_LOCK %d", PROC_CURRENT_LOCK);
	if (__unlikely(!(KERNEL$CPU_FEATURES & CPU_HAS_PAT)) && pat >= 0) pat &= ~PAT_HIGH_BIT;
	if (__unlikely((__u32)PHYSMAP_486[i] & PTE_P)) {
		if (!((__u32)PHYSMAP_486[i] & PTE_PS)) {
			pd = (paddr_t *)(((unsigned long)PHYSMAP_486[i] & ~(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
			goto set_ptes;
		} else {
			if (PDE2PAT(PHYSMAP_486[i]) == pat || __unlikely(pat == -1)) return 0;
			if (!(__likely(HAS_4M_PAGES) && !from && to == PG_BANK)) {
				if (__unlikely(__IS_ERR(pd = ALLOC_PHYSMAP_PD()))) return __PTR_ERR(pd);
				for (j = 0; j < PG_BANK; j++) {
					pd[j] = PHYSMAP_PTE((paddr_t)i << (PG_BANK_BITS + PG_SIZE_BITS) | j << PG_SIZE_BITS, PDE2PAT(PHYSMAP_486[i]));
				}
			}
			orig_cr0 = START_CACHEMODE_MODIFY();
		}
	}
	if (__likely(HAS_4M_PAGES) && !from && to == PG_BANK) {
		if (__unlikely(pat == -1)) PHYSMAP_486[i] = PHYSMAP_PDE(i << (PG_BANK_BITS + PG_SIZE_BITS), PAT_WB);
		else PHYSMAP_486[i] = PHYSMAP_PDE(i << (PG_BANK_BITS + PG_SIZE_BITS), pat);
	} else {
		char diff;
		if (!pd && __unlikely(__IS_ERR(pd = ALLOC_PHYSMAP_PD()))) return __PTR_ERR(pd);
		PHYSMAP_486[i] = ((unsigned long)pd - VM_KERNEL_DIRECT_OFFSET) | PTE_P | PTE_RW | PTE_A;
		set_ptes:
		diff = 0;
		for (j = from; j < to; j++) {
			if (__unlikely((__u32)pd[j] & PTE_P)) {

				if (PTE2PAT((__u32)pd[j]) == pat || __unlikely(pat == -1)) continue;
				if (!orig_cr0) orig_cr0 = START_CACHEMODE_MODIFY();
			}
			diff = 1;
		}
		if (!diff) goto no_change;
		for (j = from; j < to; j++) {
			if (pat == -1) {
				if (__unlikely((__u32)pd[j] & PTE_P)) continue;
				pd[j] = PHYSMAP_PTE((paddr_t)i << (PG_BANK_BITS + PG_SIZE_BITS) | j << PG_SIZE_BITS, PAT_WB);
			} else {
				pd[j] = PHYSMAP_PTE((paddr_t)i << (PG_BANK_BITS + PG_SIZE_BITS) | j << PG_SIZE_BITS, pat);
			}
		}
		if (__unlikely(!HAS_4M_PAGES)) goto no_change;
		if (__unlikely(pat == -1)) pat = PTE2PAT((__u32)pd[j]);
		for (j = 0; j < PG_BANK; j++) if (!((__u32)pd[j] & PTE_P) || __unlikely(PTE2PAT((__u32)pd[j]) != pat)) goto no_change;
		pd_to_free = pd;
		PHYSMAP_486[i] = PHYSMAP_PDE(i << (PG_BANK_BITS + PG_SIZE_BITS), pat);
		no_change:;
	}
	for (i = VM_KERNEL_DIRECT_BANK; i < VM_KERNEL_DIRECT_BANK + N_DIRECT_PBANKS; i++)
		SET_PGDIR_ENTRY_BIG(&KERNEL$PROC_KERNEL, i, PHYSMAP_ENTRY(i - VM_KERNEL_DIRECT_BANK, 1));
	for (; i < VM_KERNEL_RESERVED_BANK; i++)
		SET_PGDIR_ENTRY_EMPTY(&KERNEL$PROC_KERNEL, i, 0);
	DISTRIBUTE_PROC_KERNEL_MAPPING();
	if (__unlikely(orig_cr0)) END_CACHEMODE_MODIFY(orig_cr0);
	/* else TLB_INVD_G(); already done in DISTRIBUTE_PROC_KERNEL_MAPPING */
	free(pd_to_free);
	return 0;
}

static MTX_DECL(SETMEM_MUTEX, "KERNEL$SETMEM_MUTEX");

int VM_ADD_MEMORY_HOLE(__u64 h0, __u64 h1)
{
	int i;
	h0 &= ~(__u64)(PAGE_CLUSTER_SIZE - 1);
	h1 = (h1 + PAGE_CLUSTER_SIZE - 1) & ~(__u64)(PAGE_CLUSTER_SIZE - 1);
	for (i = 0; i < n_holes; i++) {
		if (holes[i * 2 + 1] >= h0 && holes[i * 2] <= h1) {
			if (h0 < holes[i * 2]) holes[i * 2] = h0;
			if (h1 > holes[i * 2 + 1]) holes[i * 2 + 1] = h1;
			goto brk;
		}
	}
	if (n_holes == N_HOLES) return -ENFILE;
	holes[n_holes * 2] = h0;
	holes[n_holes * 2 + 1] = h1;
	n_holes++;
	brk:
	return 0;
}

int KERNEL$SET_MEMORY_LIMIT(__u64 mem, __u64 *h, int n_h)
{
	int r;
	unsigned i;
	unsigned banks;
	unsigned long n;
	PAGE *p, *p1;
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_BOTTOM)))
		KERNEL$SUICIDE("KERNEL$SET_MEMORY_LIMIT AT SPL %08X", KERNEL$SPL);
	if (__unlikely(mem > (__u64)PG_MAXPAGES << PG_SIZE_BITS)) {
		KERNEL$SYSLOG(__SYSLOG_SW_INCOMPATIBILITY, "MEMORY", "TOO MUCH MEMORY (%"__64_format"dMiB), MAXIMUM SUPPORTED (%"__64_format"dMiB)"
#if !__KERNEL_USE_PAE
	", RECOMPILE KERNEL WITH PAE SUPPORT"
#endif
, mem >> 20, (__u64)PG_MAXPAGES << PG_SIZE_BITS >> 20);
		mem = (__u64)PG_MAXPAGES << PG_SIZE_BITS;
	}
	mem &= ~(__u64)(PAGE_CLUSTER_SIZE - 1);
	MTX_LOCK_SYNC(&SETMEM_MUTEX);
	while (n_h) {
		if (__unlikely(r = VM_ADD_MEMORY_HOLE(h[0], h[1]))) {
			MTX_UNLOCK(&SETMEM_MUTEX);
			return r;
		}
		n_h--;
		h += 2;
	}
	MTX_UNLOCK(&SETMEM_MUTEX);
	retry:
	MTX_LOCK_SYNC(&SETMEM_MUTEX);
	banks = (mem + (PG_SIZE * PG_BANK - 1)) >> (PG_SIZE_BITS + PG_BANK_BITS);
	if (__unlikely(banks != (mem + (PG_SIZE * PG_BANK - 1)) >> (PG_SIZE_BITS + PG_BANK_BITS))) {
		r = -EINVAL;
		goto ret;
	}
	if (__unlikely(banks < N_DIRECT_PBANKS)) {
		r = -ENOMEM;
		goto ret;
	}
	if (__unlikely(mem == MEMSIZE)) goto ret0;
	if (__unlikely(mem < MEMSIZE)) {
		MTX_LOCK_SYNC(&VM_MUTEX);
		r = VM_RELEASE_PAGES(mem >> (PG_CLUSTER_BITS + PG_SIZE_BITS), banks * (PG_BANK / PG_CLUSTER), N_PBANKS * (PG_BANK / PG_CLUSTER));
		if (__unlikely(r)) {
			MTX_UNLOCK(&VM_MUTEX);
			goto ret;
		}
		RAISE_SPL(SPL_TOP);
		MEMSIZE = mem;
		LOWER_SPL(SPL_BOTTOM);
		for (i = banks; i < N_PBANKS; i++) PBANKS[i] = 0;
		N_PBANKS = banks;
		MTX_UNLOCK(&VM_MUTEX);
		goto ret0;
	}
	n = sizeof(PAGE) * banks * (PG_BANK / PG_CLUSTER);
	while (n > PAGE_ARRAY_SIZE) {
		if (__unlikely(r = MAP_PAGE_BANK(VM_KERNEL_PAGE_BANK, VM_KERNEL_DIRECT_BANK, PAGE_ARRAY_SIZE))) goto ret;
		PAGE_ARRAY_SIZE += PAGE_CLUSTER_SIZE;
	}
	DISTRIBUTE_PROC_KERNEL_MAPPING();
	for (i = N_PBANKS; i < banks; i++) {
		if (__unlikely(r = CREATE_PHYSMAP(i, 0, PG_BANK, PAT_WB))) goto ret;
		PBANKS[i] = i < 1024 ? 0 : PBANK_HIGHMEM | PBANK_NOPCIDMA;
	}
	MTX_LOCK_SYNC(&VM_MUTEX);
	p1 = KERNEL_PAGE_MAP + (unsigned long)(MEMSIZE >> (PG_CLUSTER_BITS + PG_SIZE_BITS));
	RAISE_SPL(SPL_TOP);
	MEMSIZE = mem;
	LOWER_SPL(SPL_BOTTOM);
	for (p = KERNEL_PAGE_MAP + banks * PG_BANK / PG_CLUSTER - 1; p >= p1; p--) {
		ADD_PAGE(p, 0);
		TEST_LOCKUP_SYNC;
	}
	N_PBANKS = banks;
	MTX_UNLOCK(&VM_MUTEX);
	ret0:
	REFRESH_PARAMS();
	r = 0;
	ret:
	MTX_UNLOCK(&SETMEM_MUTEX);
	if (r == -ENOMEM && banks > N_PBANKS + 1) {
		r = KERNEL$SET_MEMORY_LIMIT((__u64)(N_PBANKS + 1) << (PG_SIZE_BITS + PG_BANK_BITS), NULL, 0);
		if (__unlikely(r)) return r;
		goto retry;
	}
	return r;
}

int KERNEL$SET_MAPPED_MEMORY(__p_addr addr, __p_addr len, int pat)
{
	int r;
	PAGE *p;
	if (__unlikely((unsigned)addr & (PAGE_CLUSTER_SIZE - 1))) {
		len += (unsigned)addr & (PAGE_CLUSTER_SIZE - 1);
		addr &= ~(__p_addr)(PAGE_CLUSTER_SIZE - 1);
	}
	MTX_LOCK_SYNC(&SETMEM_MUTEX);
	while (len > 0) {
		unsigned i, bank;
		if (addr < MEMSIZE) {
			for (i = 0; i < n_holes * 2; i += 2) {
				if (holes[i] <= addr && holes[i + 1] >= addr + PAGE_CLUSTER_SIZE) goto ok;
			}
			r = -EBUSY;
			goto ret;
		}
		if (__unlikely(r = VM_ADD_MEMORY_HOLE(addr, addr + PAGE_CLUSTER_SIZE))) goto ret;
		ok:
		bank = addr >> (PG_BANK_BITS + PG_SIZE_BITS);
		if (__unlikely(bank >= BANKS)) {
			r = -ERANGE;
			goto ret;
		}
		if (__unlikely(r = MAP_PAGE_BANK(VM_KERNEL_PAGE_BANK, VM_KERNEL_DIRECT_BANK, addr >> (PG_SIZE_BITS + PG_CLUSTER_BITS - PG_SIZEOF_STRUCT_PAGE_BITS)))) goto ret;
		if (__unlikely(r = CREATE_PHYSMAP(bank, (addr >> PG_SIZE_BITS) & (PG_BANK - 1), ((addr >> PG_SIZE_BITS) & (PG_BANK - 1)) + PG_CLUSTER, pat))) goto ret;
		p = PHYS_2_PAGE_ALIGNED(addr);
		VM_ADD_PAGE(p, 1);
		TEST_LOCKUP_SYNC;
		addr += PAGE_CLUSTER_SIZE;
		if (len <= PAGE_CLUSTER_SIZE) break;
		len -= PAGE_CLUSTER_SIZE;
	}
	r = 0;
	ret:
	MTX_UNLOCK(&SETMEM_MUTEX);
	return r;
}

int KERNEL$SET_MEMORY_PAT(__p_addr addr, __p_addr len, int pat)
{
	int r;
	if (__unlikely((unsigned)addr & (PG_SIZE - 1))) {
		len += (unsigned)addr & (PG_SIZE - 1);
		addr &= ~(__p_addr)(PG_SIZE - 1);
	}
	MTX_LOCK_SYNC(&SETMEM_MUTEX);
	while (len > 0) {
		unsigned this_len = PG_BANK * PG_SIZE - ((unsigned)addr & (PG_BANK * PG_SIZE - 1));
		if (this_len > len) this_len = len;
		if (__unlikely(r = CREATE_PHYSMAP(addr >> (PG_SIZE_BITS + PG_BANK_BITS), (addr >> PG_SIZE_BITS) & (PG_BANK - 1), ((addr >> PG_SIZE_BITS) & (PG_BANK - 1)) + ((this_len + PG_SIZE - 1) >> PG_SIZE_BITS), pat))) goto ret;
		addr += this_len;
		len -= this_len;
	}
	r = 0;
	ret:
	MTX_UNLOCK(&SETMEM_MUTEX);
	return r;
}

__u64 KERNEL$GET_MEMORY_SIZE(int type)
{
	__u64 m = MEMSIZE;
	if (type == VM_TYPE_WIRED_MAPPED || type == VM_TYPE_CACHED_MAPPED || type == VM_TYPE_USER_MAPPED) {
		__u64 dm = (__u64)(N_DIRECT_PBANKS + N_USEABLE_VBANKS - VM_MIN_USEABLE_VBANKS) << (PG_BANK_BITS + PG_SIZE_BITS);
		if (dm < m) m = dm;
	}
	return m;
}

int VM_ARCH_CHECK_HW_MEM(PAGE *p)
{
	char *e;
	__u32 *v = KERNEL$MAP_PHYSICAL_PAGE(p);
	v[0] = 0x33343536;
	v[PAGE_CLUSTER_SIZE / 4 - 1] = 0xCCCDCECF;
	CACHE_WBINVD();
	if (__unlikely(v[0] != 0x33343536) || __unlikely(v[PAGE_CLUSTER_SIZE / 4 - 1] != 0xCCCDCECF)) {
		KERNEL$SYSLOG(__SYSLOG_HW_ERROR, "MEMORY", "MEMORY NOT PRESENT AT PHYSICAL ADDRESS %"__64_format"X", (__u64)PAGE_2_PHYS(p));
		KERNEL$UNMAP_PHYSICAL_BANK(v);
		return -ENODEV;
	}
	if ((e = getenv("@KERNEL$MEMORY_CHECK")) && __likely(e[0] == '1') && __likely(!e[1])) {
		__u32 i, j;
		for (j = 0; j < 2; j++) {
			for (i = 0; i < PAGE_CLUSTER_SIZE / 4; i++) v[i] = 0x12345678 ^ (j - 1) ^ i ^ i << 16;
			CACHE_WBINVD();
			for (i = 0; i < PAGE_CLUSTER_SIZE / 4; i++) if (v[i] != (0x12345678 ^ (j - 1) ^ i ^ i << 16)) {
				KERNEL$SYSLOG(__SYSLOG_HW_ERROR, "MEMORY", "MEMORY ERROR AT %"__64_format"X", (__u64)PAGE_2_PHYS(p) + i);
				KERNEL$UNMAP_PHYSICAL_BANK(v);
				return -EIO;
			}
		}
	}
	KERNEL$UNMAP_PHYSICAL_BANK(v);
	return 0;
}

#define RESERVED_PAGETABLES		64

#define PROC_RESERVED_PAGETABLES	6
#define PROC_RESERVED_MAPPINGS		16

static struct __slhead pagetables;
static int pagetable_vm_entity;

static void PAGETABLE_WRITE(VMENTITY *e, PROC *p, int trashing);
static int PAGETABLE_SWAPOUT(VMENTITY *e);
static int PAGETABLE_CHECKMAP(VMENTITY *e);

static __const__ VMENTITY_T pagetable_calls = { PAGETABLE_CHECKMAP, PAGETABLE_WRITE, PAGETABLE_SWAPOUT, "PAGETABLE" };

static void PGTBL_CTOR(void *null, void *pgtbl_)
{
	unsigned i;
	struct pgtbl *pgtbl = pgtbl_;
	memset(&pgtbl->e, 0, sizeof pgtbl->e);
	pgtbl->n_mappings = 0;
	for (i = 0; i < 1024 / PG_CLUSTER; i++) {
		pgtbl->rmap[i].pgtbl = pgtbl;
		pgtbl->rmap[i].idx = i << PG_CLUSTER_BITS;
	}
	CACHE_CONSTRUCT_VM_ENTITY(&pgtbl->vm);
	pgtbl->vm.type = pagetable_vm_entity;
}

void VM_ARCH_INIT_PAGETABLES(void)
{
	int r;
	if (__unlikely(r = KERNEL$CACHE_REGISTER_VM_TYPE(&pagetable_vm_entity, &pagetable_calls))) {
		__critical_printf("CAN'T REGISTER PAGETABLE VM ENTITY: %s", strerror(-r));
		HALT_KERNEL();
	}
	KERNEL$SLAB_INIT(&pagetables, sizeof(struct pgtbl), 4096, VM_TYPE_CACHED_MAPPED, PGTBL_CTOR, NULL, NULL, "KERNEL$PAGETABLE");
	if (__unlikely(r = KERNEL$SLAB_RESERVE(&pagetables, RESERVED_PAGETABLES))) {
		__critical_printf("ERROR ALLOCATING PAGETABLES: %s\n", strerror(-r));
		HALT_KERNEL();
	}
}

static __finline__ void TLB_INVD_CLUSTER(unsigned long a)
{
	if (__unlikely(a >= (__KERNEL_USER_VBANKS - 1) << (PG_SIZE_BITS + PG_BANK_BITS))) {
		TLB_INVD_NG();
		return;
	}
#if PG_CLUSTER > 8
	{
		unsigned i;
		for (i = 0; i < PG_SIZE * PG_CLUTER; i += PG_SIZE)
			TLB_INVD_PG(a + i);
	}
#else
	TLB_INVD_PG(a);
#if PG_CLUSTER > 1
	TLB_INVD_PG(a + PG_SIZE);
#endif
#if PG_CLUSTER > 2
	TLB_INVD_PG(a + PG_SIZE * 2);
	TLB_INVD_PG(a + PG_SIZE * 3);
#endif
#if PG_CLUSTER > 4
	TLB_INVD_PG(a + PG_SIZE * 4);
	TLB_INVD_PG(a + PG_SIZE * 5);
	TLB_INVD_PG(a + PG_SIZE * 6);
	TLB_INVD_PG(a + PG_SIZE * 7);
#endif
#endif
}

static __finline__ void SET_MAPPING(struct pgtbl *pg, unsigned off, paddr_t val)
{
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_VSPACE)))
		KERNEL$SUICIDE("SET_MAPPING AT SPL %08X", KERNEL$SPL);
#endif
#if PG_CLUSTER > 8
	do {
		pg->e[off] = val;
		val += PG_SIZE;
	} while ((++off) & (PG_CLUSTER - 1));
#else
	pg->e[off] = val;
#if PG_CLUSTER > 1
	pg->e[off + 1] = val += PG_SIZE;
#endif
#if PG_CLUSTER > 2
	pg->e[off + 2] = val += PG_SIZE;
	pg->e[off + 3] = val += PG_SIZE;
#endif
#if PG_CLUSTER > 4
	pg->e[off + 4] = val += PG_SIZE;
	pg->e[off + 5] = val += PG_SIZE;
	pg->e[off + 6] = val += PG_SIZE;
	pg->e[off + 7] = val += PG_SIZE;
#endif
#endif
}

static __finline__ void SET_SPAGE_MAPPING(struct pgtbl *pg, unsigned off, paddr_t val, unsigned n_pages)
{
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_VSPACE)))
		KERNEL$SUICIDE("SET_SPAGE_MAPPING AT SPL %08X", KERNEL$SPL);
	if (__unlikely(n_pages - 1 >= PG_CLUSTER - 1))
		KERNEL$SUICIDE("SET_SPAGE_MAPPING: %u PAGES", n_pages);
#endif
	do {
		pg->e[off++] = val;
		val += PG_SIZE;
	} while (--n_pages);
	do {
		pg->e[off] = SPAGE_PADDING_ENTRY;
	} while ((++off) & (PG_CLUSTER - 1));
}

static __finline__ void RESET_MAPPING(struct pgtbl *pg, unsigned off, unsigned *d1, unsigned *d2)
{
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_VSPACE)))
		KERNEL$SUICIDE("RESET_MAPPING AT SPL %08X", KERNEL$SPL);
#endif
#define x(y)								\
		if (*(__u32 *)&pg->e[off + y] & PTE_D) {		\
			if (y < *d1) *d1 = y;				\
			if (y >= *d2) *d2 = y + 1;			\
		}							\
		pg->e[off + y] = 0;
#if PG_CLUSTER > 8
	unsigned i;
	for (i = 0; i < PG_CLUSTER; i++) {
		/*RAISE_SPL(SPL_TOP);*/
		x(i);
		/*LOWER_SPL(SPL_VSPACE);*/
	}
#else
	/*RAISE_SPL(SPL_TOP);*/
	x(0);
#if PG_CLUSTER > 1
	x(1);
#endif
#if PG_CLUSTER > 2
	x(2);
	x(3);
#endif
#if PG_CLUSTER > 4
	/*LOWER_SPL(SPL_VSPACE);*/
	/*RAISE_SPL(SPL_TOP);*/
	x(4);
	x(5);
	x(6);
	x(7);
#endif
	/*LOWER_SPL(SPL_VSPACE);*/
#endif
#undef x
}

static __finline__ void RESET_READONLY_MAPPING(struct pgtbl *pg, unsigned off)
{
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_VSPACE)))
		KERNEL$SUICIDE("RESET_READONLY_MAPPING AT SPL %08X", KERNEL$SPL);
#endif
	memset(&pg->e[off], 0, PG_CLUSTER * sizeof(paddr_t));
}

static __finline__ unsigned GET_MAPPING_FLAGS(struct pgtbl *pg, unsigned off, unsigned *d1, unsigned *d2, __u32 clrflg)
{
	__u32 flg;
	__u32 val;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_VSPACE)))
		KERNEL$SUICIDE("GET_MAPPING_FLAGS AT SPL %08X", KERNEL$SPL);
#endif
#define x(y)								\
		flg |= val = *(__u32 *)&pg->e[off + y];			\
		if (val & clrflg) *(__u32 *)&pg->e[off + y] = val & ~clrflg;\
		if (val & PTE_D) {					\
			if (y < *d1) *d1 = y;				\
			if (y >= *d2) *d2 = y + 1;			\
		}
	flg = 0;
#if PG_CLUSTER > 8
	unsigned i;
	for (i = 0; i < PG_CLUSTER; i++) {
		/*RAISE_SPL(SPL_TOP);*/
		x(i);
		/*LOWER_SPL(SPL_VSPACE);*/
	}
#else
	/*RAISE_SPL(SPL_TOP);*/
	x(0);
#if PG_CLUSTER > 1
	x(1);
#endif
#if PG_CLUSTER > 2
	x(2);
	x(3);
#endif
#if PG_CLUSTER > 4
	/*LOWER_SPL(SPL_VSPACE);*/
	/*RAISE_SPL(SPL_TOP);*/
	x(4);
	x(5);
	x(6);
	x(7);
#endif
	/*LOWER_SPL(SPL_VSPACE);*/
#endif
	return flg;
#undef x
}

static void ZAP_PAGETABLE_ENTRY(struct pgtbl *pg, unsigned from)
{
	PROC *p = pg->proc;
	unsigned x1, x2;
	PAGE *page;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_VSPACE)))
		KERNEL$SUICIDE("ZAP_PAGETABLE_ENTRY AT SPL %08X", KERNEL$SPL);
#endif
	x1 = PG_CLUSTER, x2 = 0;
	pg->n_mappings--;
	p->n_mappings--;
	if (p->n_mappings >= PROC_RESERVED_MAPPINGS)
		QFREE(&p->vmq, 1, proc_vmq_isroot, proc_vmq_parent, Q_NULL_CALL);
	DEL_FROM_LIST(&pg->rmap[from >> PG_CLUSTER_BITS].list);
	if (__unlikely(!((__u32)pg->e[from] & PTE_RW))) {
#if __DEBUG >= 1
		if (__unlikely(((__u32)pg->e[from] & PTE_D)))
			KERNEL$SUICIDE("ZAP_PAGETABLE_ENTRY: DIRTY READ-ONLY PTE: %"__64_format"X", (__u64)pg->e[from]);
#endif
		RESET_READONLY_MAPPING(pg, from);
		return;
	}
	page = PHYS_2_PAGE(pg->e[from]);
	RESET_MAPPING(pg, from, &x1, &x2);
#if __DEBUG >= 1
	if (x1 != PG_CLUSTER)
		if (__unlikely(!(page->flags & PAGE_WRITEABLE)))
			KERNEL$SUICIDE("ZAP_PAGETABLE_ENTRY: NON-WRITEABLE PAGE WITH DIRTY MAPPING, OFFSET %X", from);
#endif
	if (x1 << PG_SIZE_BITS < page->dirty_from) page->dirty_from = x1 << PG_SIZE_BITS;
	if (x2 << PG_SIZE_BITS > page->dirty_to) page->dirty_to = x2 << PG_SIZE_BITS;
}

static void ZAP_PAGETABLE_RANGE(struct pgtbl *pg, unsigned from, unsigned to)
{
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS)))
		KERNEL$SUICIDE("ZAP_PAGETABLE_RANGE AT SPL %08X", KERNEL$SPL);
#endif
		
	RAISE_SPL(SPL_VSPACE);
	for (; from < to; from += PG_CLUSTER) if ((int)pg->e[from] & PTE_P) {
		ZAP_PAGETABLE_ENTRY(pg, from);
		if (__unlikely(SPL_BUSY(SPL_FS))) {
			if (__unlikely(pg->proc == PROC_CURRENT)) TLB_INVD_NG();
			LOWER_SPL(SPL_FS);
			RAISE_SPL(SPL_VSPACE);
		}
	}
	if (__unlikely(pg->proc == PROC_CURRENT)) TLB_INVD_NG();
	LOWER_SPL(SPL_FS);
}

static WQ *READ_ONLY_PAGETABLE(struct pgtbl *pg)
{
	unsigned from;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS)))
		KERNEL$SUICIDE("READ_ONLY_PAGETABLE AT SPL %08X", KERNEL$SPL);
#endif
	RAISE_SPL(SPL_VSPACE);
	for (from = 0; from < PG_BANK; from += PG_CLUSTER) if ((__u32)pg->e[from] & PTE_RW) {
		unsigned x1, x2;
		PAGE *page;
		x1 = PG_CLUSTER, x2 = 0;
		GET_MAPPING_FLAGS(pg, from, &x1, &x2, PTE_D | PTE_RW);
		page = PHYS_2_PAGE(pg->e[from]);
		if (x1 << PG_SIZE_BITS < page->dirty_from) page->dirty_from = x1 << PG_SIZE_BITS;
		if (x2 << PG_SIZE_BITS > page->dirty_to) page->dirty_to = x2 << PG_SIZE_BITS;
		if (__unlikely(SPL_BUSY(SPL_FS))) {
			if (__unlikely(pg->proc == PROC_CURRENT)) TLB_INVD_NG();
			LOWER_SPL(SPL_FS);
			RAISE_SPL(SPL_VSPACE);
		}
	}
	if (__unlikely(pg->proc == PROC_CURRENT)) TLB_INVD_NG();
	LOWER_SPL(SPL_FS);
	return NULL;
}

static void ZAP_IOBMP_LDT(struct pgtbl *pg);

static void ZAP_EMPTY_PAGETABLE(struct pgtbl *pg)
{
	PROC *p = pg->proc;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS)))
		KERNEL$SUICIDE("ZAP_EMPTY_PAGETABLE AT SPL %08X", KERNEL$SPL);
	if (__unlikely(pg->idx >= __KERNEL_USER_VBANKS) && pg->idx != VM_KERNEL_RESERVED_BANK && (__unlikely(pg->idx < 1024) || __unlikely(pg->idx >= 1024 + IOBMP_PAGES + LDT_PAGES)))
		KERNEL$SUICIDE("ZAP_EMPTY_PAGETABLE: INVALID INDEX %x", pg->idx);
#endif
	RAISE_SPL(SPL_VSPACE);
	if (__likely(pg->idx < __KERNEL_USER_VBANKS)) {
		if (__unlikely(pg->idx == __KERNEL_USER_VBANKS - 1))
			SET_PGDIR_ENTRY_EMPTY(p, VM_KERNEL_COPY_OF_LAST_BANK, 0);
		SET_PGDIR_ENTRY_EMPTY(p, pg->idx, 0);
	} else {
		ZAP_IOBMP_LDT(pg);
	}
	/* Possible optimization: invalidate just that entry with INVLPG
	   But I think it doesn't care much ... */
	if (__unlikely(pg->proc == PROC_CURRENT)) TLB_INVD_NG();
	DEL_FROM_LIST(&pg->list);
	if (SPLX_BELOW(SPL_X(SPL_VSPACE), SPL_X(SPL_CACHE))) RAISE_SPL(SPL_CACHE);
	if (SPLX_BELOW(SPL_X(SPL_CACHE), SPL_X(SPL_VSPACE))) LOWER_SPL(SPL_CACHE);
	KERNEL$CACHE_REMOVE_VM_ENTITY(&pg->vm);
	LOWER_SPL(SPL_FS);
	if (__unlikely(pg->idx >= __KERNEL_USER_VBANKS))
		memset(&pg->e, 0, sizeof pg->e);
	__slfree(pg);
	p->n_pagetables--;
	if (p->n_pagetables >= PROC_RESERVED_PAGETABLES)
		QFREE(&p->pgtblq, 1, proc_pgtblq_isroot, proc_pgtblq_parent, Q_NULL_CALL);
}

static void ZAP_IOBMP_LDT(struct pgtbl *pg)
{
	PROC *p = pg->proc;
	if (pg->idx == VM_KERNEL_RESERVED_BANK) {
		SET_PGDIR_ENTRY(p, VM_KERNEL_RESERVED_BANK, VM_ZERO_PAGE_TABLE_ENTRY);
	} else {
		struct pgtbl *dir;
		__u32 val = GET_PGDIR_ENTRY(p, VM_KERNEL_RESERVED_BANK);
		if (__unlikely(val == VM_ZERO_PAGE_TABLE_ENTRY))
			KERNEL$SUICIDE("ZAP_IOBMP_LDT: PROCESS HAS NO LDT OR IOBMP PAGES");
		dir = (void *)((val & ~(__u32)(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
		dir->e[pg->idx - 1024 + RESERVED_IOBMP] = 0;
		p->iobmp_ldt[pg->idx - 1024] = NULL;
	}
}

static void ZAP_PAGETABLE(struct pgtbl *pg)
{
	if (__likely(pg->idx < __KERNEL_USER_VBANKS)) ZAP_PAGETABLE_RANGE(pg, 0, 1024);
	else if (__likely(pg->idx == VM_KERNEL_RESERVED_BANK)) {
		int i;
		PROC *p = pg->proc;
		for (i = 0; i < IOBMP_PAGES + LDT_PAGES; i++) {
			if (p->iobmp_ldt[i]) ZAP_EMPTY_PAGETABLE(p->iobmp_ldt[i]);
		}
	}
	ZAP_EMPTY_PAGETABLE(pg);
}

static WQ *FLUSH_FPU_DELAYED(void);

#define WHOLE_PAGETABLE	((KUPLACE(UDATA_COPROCESSOR) >> PG_SIZE_BITS) & (PG_BANK - PG_CLUSTER))

#define PAGETABLE_FREEABLE(p, pg, idxx, ret)				\
{									\
	WQ *wq;								\
	if (__unlikely(PROC_CURRENT_LOCK) && __unlikely((p) == PROC_CURRENT)) {\
		wq = &PROC_CURRENT_LOCK_WAIT;				\
		ret;							\
	}								\
	if (__unlikely((pg)->idx == __KERNEL_USER_VBANKS - 1) && __unlikely((p) == PROC_FPU) && __unlikely((idxx) == WHOLE_PAGETABLE)) {		\
		wq = FLUSH_FPU_DELAYED();				\
		ret;							\
	}								\
}

static WQ *ZAP_PROC_WQ;

static int ZAP_PROC_PAGETABLE(PROC *p)
{
	struct pgtbl *pg;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS)))
		KERNEL$SUICIDE("ZAP_PROC_PAGETABLE AT SPL %08X", KERNEL$SPL);
#endif
	if (__unlikely(p == &KERNEL$PROC_KERNEL)) return 0;
	if (__unlikely(LIST_EMPTY(&p->pagetables))) return 0;
	pg = LIST_STRUCT(p->pagetables.prev, struct pgtbl, list);
	PAGETABLE_FREEABLE(p, pg, WHOLE_PAGETABLE, {
		if (!ZAP_PROC_WQ) ZAP_PROC_WQ = wq;
		return 0;
	});
	ZAP_PAGETABLE(pg);
	return 1;
}

static WQ *GET_PAGETABLE(PROC *p, unsigned idx)
{
	QUOTA *zapq, *pzap;
	struct pgtbl *pg;
	__u32 val;
	WQ *wq;
	if (__unlikely((wq = KERNEL$MAY_ALLOC(p, (sizeof(struct pgtbl) + 4095) & ~4095)) != NULL)) return wq;
	/*KERNEL$NOTIFY_ALLOC(p, (sizeof(struct pgtbl) + 4095) & ~4095);*/
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS)))
		KERNEL$SUICIDE("GET_PAGETABLE AT SPL %08X", KERNEL$SPL);
	if (__unlikely(idx >= __KERNEL_USER_VBANKS) && idx != VM_KERNEL_RESERVED_BANK && (__unlikely(idx < 1024) || __unlikely(idx >= 1024 + IOBMP_PAGES + LDT_PAGES)))
		KERNEL$SUICIDE("GET_PAGETABLE: INVALID INDEX %x", idx);
#endif
	retry:
	if (0) {
		zaproot:
		pzap = &KERNEL$PROC_KERNEL.pgtblq;
		zapp:
		QZAP(pzap, for_all_pgtblq_subnodes, for_all_pgtblq_subnodes_tail, zapq, 1);
		ZAP_PROC_WQ = NULL;
		if (__unlikely(!ZAP_PROC_PAGETABLE(LIST_STRUCT(zapq, PROC, pgtblq)))) {
			if (__likely(ZAP_PROC_WQ != NULL)) return ZAP_PROC_WQ;
#if __DEBUG >= 1
			if (__unlikely(pzap != &KERNEL$PROC_KERNEL.pgtblq))
				KERNEL$SUICIDE("GET_PAGETABLE: COULD NOT FREE ANY PAGETABLE IN PROCESS");
#endif
/* if we got here from __slalloc failure, it may happen that quotas are not
   filled yet */
			if (__unlikely(!FOR_ALL_PROCS(&KERNEL$PROC_KERNEL, ZAP_PROC_PAGETABLE))) {
				if (__likely(ZAP_PROC_WQ != NULL)) return ZAP_PROC_WQ;
				KERNEL$SUICIDE("GET_PAGETABLE: COULD NOT FREE ANY PAGETABLE IN ALL PROCESSES");
			}
		}
		goto retry;
	}
	if (__unlikely(!(pg = __slalloc(&pagetables)))) {
		goto zaproot;
	}
	if (p->n_pagetables >= PROC_RESERVED_PAGETABLES) {
		QALLOC(&p->pgtblq, 1, proc_pgtblq_isroot, proc_pgtblq_parent, Q_NULL_CALL, pzap, {
			__slow_slfree(pg);
			goto zapp;
		});
	}
	p->n_pagetables++;
	*(void **)pg = NULL;
	pg->proc = p;
	pg->idx = idx;
	if (__unlikely(idx == VM_KERNEL_RESERVED_BANK)) {
		memcpy(&pg->e, VM_ZERO_PAGE_TABLE, 1024 * sizeof(paddr_t));
	}
	ADD_TO_LIST(&p->pagetables, &pg->list);
	RAISE_SPL(SPL_VSPACE);
	if (__likely(idx < 1024)) {
		val = ((unsigned long)pg - VM_KERNEL_DIRECT_OFFSET) | PTE_P | PTE_RW | PTE_US | PTE_A;
		__barrier();
		SET_PGDIR_ENTRY(p, idx, val);
		if (__unlikely(idx == __KERNEL_USER_VBANKS - 1))
			SET_PGDIR_ENTRY(p, VM_KERNEL_COPY_OF_LAST_BANK, val);
		if (__likely(p == PROC_CURRENT)) TLB_INVD_NG();
	} else {
		p->iobmp_ldt[idx - 1024] = pg;
	}
	if (SPLX_BELOW(SPL_X(SPL_VSPACE), SPL_X(SPL_CACHE))) RAISE_SPL(SPL_CACHE);
	if (SPLX_BELOW(SPL_X(SPL_CACHE), SPL_X(SPL_VSPACE))) LOWER_SPL(SPL_CACHE);
	KERNEL$CACHE_INSERT_VM_ENTITY(&pg->vm, p, VM_ENTITY_NOSTREAM);
	LOWER_SPL(SPL_FS);
	return NULL;
}

static WQ *PROC_FIND_ZAP_MAPPING(QUOTA *pzap, PROC *orig_p)
{
	PROC *p;
	QUOTA *zapq;
	QZAP(pzap, for_all_vmq_subnodes, for_all_vmq_subnodes_tail, zapq, 1);
	p = LIST_STRUCT(zapq, PROC, vmq);
	if (__unlikely(p == orig_p)) {
		struct pgtbl *pgtbl;
		unsigned long n = random() % (__unlikely(p->n_mappings > MAXINT) ? MAXINT : p->n_mappings);
		unsigned off;
		LIST_FOR_EACH(pgtbl, &p->pagetables, struct pgtbl, list) {
			if (n < pgtbl->n_mappings) goto found;
			n -= pgtbl->n_mappings;
		}
		KERNEL$SUICIDE("PROC_FIND_ZAP_MAPPING: PROC n_mappings MISCOUNTED (TOTAL %lu, LEFT %lu)", p->n_mappings, n);
		found:
		for (off = 0; off < 1024; off += PG_CLUSTER) {
			if (__unlikely((__u32)pgtbl->e[off] & PTE_P)) {
				if (__unlikely(!n--)) {
					PAGETABLE_FREEABLE(p, pgtbl, off, return wq;);
					RAISE_SPL(SPL_VSPACE);
					ZAP_PAGETABLE_ENTRY(pgtbl, off);
					if (__likely(p == PROC_CURRENT)) TLB_INVD_CLUSTER((pgtbl->idx << (PG_BANK_BITS + PG_SIZE_BITS)) + (off << PG_SIZE_BITS));
					LOWER_SPL(SPL_FS);
					return NULL;
				}
			}
		}
		KERNEL$SUICIDE("PROC_FIND_ZAP_MAPPING: PAGETABLE n_mappings MISCOUNTED (TOTAL %u, LEFT %lu)", pgtbl->n_mappings, n);
	}
	ZAP_PROC_WQ = NULL;
	if (__unlikely(!ZAP_PROC_PAGETABLE(p))) {
		if (__likely(ZAP_PROC_WQ != NULL)) return ZAP_PROC_WQ;
		KERNEL$SUICIDE("PROC_FIND_ZAP_MAPPING: PROCESS HAS VM QUOTA BUT NO PAGETABLES");
	}
	return NULL;
}

static __finline__ __u32 PAGE_CACHEMODE(PAGE *p)
{
	unsigned off;
	unsigned bank = ((char *)p - (char *)KERNEL_PAGE_MAP) >> (__BSR_CONST(sizeof(PAGE)) + (PG_BANK_BITS - PG_CLUSTER_BITS));
	__u32 bits = PHYSMAP_486[bank];
#if __DEBUG >= 1
	if (__unlikely(!(bits & PTE_P)))
		KERNEL$SUICIDE("PAGE_CACHEMODE: MAPPING NON-PRESENT PAGE AT %p", p);
#endif
	if (__likely(bits & PTE_PS)) {
		return (bits & (PTE_PWT | PTE_PCD)) | (bits >> (P_PTE_PAT_PDE - P_PTE_PAT_PTE) & PTE_PAT_PTE);
	}
	off = ((p - KERNEL_PAGE_MAP) << PG_CLUSTER_BITS) & (PG_BANK - 1);
	bits = ((__u32 *)((bits & ~(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET))[off];
#if __DEBUG >= 1
	if (__unlikely(!(bits & PTE_P)))
		KERNEL$SUICIDE("PAGE_CACHEMODE: MAPPING NON-PRESENT PAGE (WITH PAGETABLE BUT WITHOUT PAGE) AT %p", p);
#endif
	return bits & (PTE_PWT | PTE_PCD | PTE_PAT_PTE);
}

WQ *VM_ARCH_MAP_PAGE(PROC *p, unsigned long addr, PAGE *page, int wr)
{
	__u32 val;
	struct pgtbl *pg;
	unsigned off;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_FS)))
		KERNEL$SUICIDE("VM_ARCH_MAP_PAGE AT SPL %08X", KERNEL$SPL);
	if (__unlikely(addr >= KUVMTOP))
		KERNEL$SUICIDE("VM_ARCH_MAP_PAGE: ADDRESS OUT OF USER AREA: %08lX", addr);
	if (__likely(IS_PAGE_POINTER(page))) {
		if (__unlikely(!page->lockdown))
			KERNEL$SUICIDE("VM_ARCH_MAP_PAGE: PAGE HAS NO LOCKDOWN METHOD (MAPPED AT %08lX,%d)", addr, wr);
	} else {
		if (__unlikely(!((SPAGE *)page)->lockdown))
			KERNEL$SUICIDE("VM_ARCH_MAP_PAGE: SPAGE HAS NO LOCKDOWN METHOD (MAPPED AT %08lX,%d)", addr, wr);
	}
#endif
	repeat:
	RAISE_SPL(SPL_VSPACE);
	val = GET_PGDIR_ENTRY(p, addr >> (PG_BANK_BITS + PG_SIZE_BITS));
	if (__likely(val & PTE_P)) {
		pg = (void *)((val & ~(__u32)(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
		DEL_FROM_LIST(&pg->list);
		ADD_TO_LIST(&p->pagetables, &pg->list);
	} else {
		WQ *wq;
		LOWER_SPL(SPL_FS);
		wq = GET_PAGETABLE(p, addr >> (PG_BANK_BITS + PG_SIZE_BITS));
		if (__unlikely(wq != NULL)) return wq;
		goto repeat;
	}
	off = (addr >> PG_SIZE_BITS) & (PG_BANK - PG_CLUSTER);
	if (__unlikely((__u32)pg->e[off] & PTE_P)) {
		PAGETABLE_FREEABLE(p, pg, off, return wq;);
		ZAP_PAGETABLE_ENTRY(pg, off);
		if (__likely(p == PROC_CURRENT)) TLB_INVD_CLUSTER(addr & ~(unsigned long)(PG_CLUSTER * PG_SIZE - 1));
	}
	if (p->n_mappings >= PROC_RESERVED_MAPPINGS) {
		QUOTA *pzap;
		QALLOC(&p->vmq, 1, proc_vmq_isroot, proc_vmq_parent, Q_NULL_CALL, pzap, {
			WQ *wq;
			LOWER_SPL(SPL_FS);
			wq = PROC_FIND_ZAP_MAPPING(pzap, p);
			if (__unlikely(wq != NULL)) return wq;
			goto repeat;
		});
	}
	p->n_mappings++;
	pg->n_mappings++;
	if (__likely(IS_PAGE_POINTER(page))) {
		SET_MAPPING(pg, off, (paddr_t)PAGE_2_PHYS(page) | (__u32)(PTE_P | PTE_US | PTE_A | (!(wr & PF_WRITE) ? 0 : !(page->dirty_from | (page->dirty_to ^ PAGE_CLUSTER_SIZE)) ? PTE_RW | PTE_D : PTE_RW) | PAGE_CACHEMODE(page)));
		ADD_TO_XLIST(&page->mapping, &pg->rmap[off >> PG_CLUSTER_BITS].list);
	} else {
		/* SPAGEs can't have extra cache mode */
		SET_SPAGE_MAPPING(pg, off, ((paddr_t)PAGE_2_PHYS(((SPAGE *)page)->page)) | (((SPAGE *)page)->offset | (__u32)(PTE_P | PTE_US | PTE_A)), ((SPAGE *)page)->n_pages);
		ADD_TO_XLIST(&((SPAGE *)page)->mapping, &pg->rmap[off >> PG_CLUSTER_BITS].list);
	}
	LOWER_SPL(SPL_FS);
	return NULL;
}

WQ *VM_ARCH_UNMAP_RANGE(PROC *p, unsigned long addr, unsigned long len)
{
	__u32 val;
	unsigned long x;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV)))
		KERNEL$SUICIDE("VM_ARCH_UNMAP_RANGE AT SPL %08X", KERNEL$SPL);
#endif
	if (__unlikely(addr + len > KUVMTOP) || __unlikely(addr + len < addr)) {
		if (__unlikely(addr >= KUVMTOP)) return NULL;
		len = KUVMTOP - addr;
	}
	if (__unlikely(!len)) return NULL;
	x = PG_BANK * PG_SIZE - (addr & (PG_BANK * PG_SIZE - 1));
	if (x > len && __likely(p == PROC_CURRENT) && __likely(!PROC_CURRENT_LOCK)) {
/* it is cheaper to switch to different process than to invalidate TLB after
   each page */
		SET_PROC_CURRENT(&KERNEL$PROC_KERNEL);
	}
	again:
	RAISE_SPL(SPL_FS);
	val = GET_PGDIR_ENTRY(p, (unsigned long)addr >> (PG_BANK_BITS + PG_SIZE_BITS));
	if (__likely(val & PTE_P)) {
		struct pgtbl *pg;
		unsigned off = (addr >> PG_SIZE_BITS) & (PG_BANK - PG_CLUSTER);
		unsigned n = len > x ? 1024 - off : (((addr & (PG_CLUSTER * PG_SIZE - 1)) + len + (PG_CLUSTER * PG_SIZE - 1)) >> PG_SIZE_BITS) & ~(unsigned long)(PG_CLUSTER - 1);
		pg = (void *)((val & ~(__u32)(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
		
		PAGETABLE_FREEABLE(p, pg, WHOLE_PAGETABLE, return wq;);
		ZAP_PAGETABLE_RANGE(pg, off, off + n);
		if (__unlikely(!pg->n_mappings)) {
			ZAP_EMPTY_PAGETABLE(pg);
		}
	}
	LOWER_SPL(SPL_DEV);
	if (x < len) {
		addr += x;
		len -= x;
		x = PG_BANK * PG_SIZE;
		goto again;
	}
	return NULL;
}

WQ *VM_ARCH_READ_ONLY(PROC *p)
{
	unsigned b;
	__u32 val;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV)))
		KERNEL$SUICIDE("VM_ARCH_READ_ONLY AT SPL %08X", KERNEL$SPL);
#endif
	for (b = 0; b < __KERNEL_USER_VBANKS; b++) {
		struct pgtbl *pg;
		RAISE_SPL(SPL_FS);
		val = GET_PGDIR_ENTRY(p, b);
		if (__unlikely(val & PTE_P)) {
			pg = (void *)((val & ~(__u32)(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
			PAGETABLE_FREEABLE(p, pg, WHOLE_PAGETABLE, return wq;);
			READ_ONLY_PAGETABLE(pg);
		}
		LOWER_SPL(SPL_DEV);
	}
	return NULL;
}

WQ *VM_ARCH_UNMAP_MAPPING(XLIST_HEAD *mapping)
{
	int spl = KERNEL$SPL;
	RAISE_SPL(SPL_VSPACE);
	while (!XLIST_EMPTY(mapping)) {
		struct rmap *r = LIST_STRUCT(mapping->next, struct rmap, list);
		struct pgtbl *pg = r->pgtbl;
		PAGETABLE_FREEABLE(pg->proc, pg, r->idx, return wq;);
		ZAP_PAGETABLE_ENTRY(pg, r->idx);
		if (pg->proc == PROC_CURRENT) TLB_INVD_CLUSTER((pg->idx << (PG_BANK_BITS + PG_SIZE_BITS)) + (r->idx << PG_SIZE_BITS));
		LOWER_SPLX(spl);
		RAISE_SPL(SPL_VSPACE);
	}
	return NULL;
}

WQ *VM_ARCH_CHECK_MAPPING(XLIST_HEAD *mapping, int unw)
{
	struct rmap *r;
	unsigned flg = 0;
	unsigned x1 = PG_CLUSTER, x2 = 0;
	__u32 clrflg = unw ? PTE_D | PTE_A | PTE_RW : PTE_D | PTE_A;
	int spl = KERNEL$SPL;
	if (__unlikely(SPLX_BELOW(spl, SPL_X(SPL_FS)))) spl = SPL_X(SPL_FS);
	RAISE_SPL(SPL_VSPACE);
	XLIST_FOR_EACH_UNLIKELY(r, mapping, struct rmap, list) {
		struct pgtbl *pg = r->pgtbl;
		unsigned f;
		if (__unlikely(unw)) {
			PAGETABLE_FREEABLE(pg->proc, pg, r->idx, return wq;);
		}
		f = GET_MAPPING_FLAGS(pg, r->idx, &x1, &x2, clrflg);
		if (__unlikely(pg->proc == PROC_CURRENT) && f & (PTE_D | PTE_RW)) TLB_INVD_CLUSTER((pg->idx << (PG_BANK_BITS + PG_SIZE_BITS)) + (r->idx << PG_SIZE_BITS));
		flg |= f;
		LOWER_SPLX(spl);
		RAISE_SPL(SPL_VSPACE);
	}
	if (__unlikely(flg & PTE_D)) {
		/* spages can't have dirty mapping */
		PAGE *p = LIST_STRUCT(mapping, PAGE, mapping);
		if (__unlikely(!(p->flags & PAGE_WRITEABLE)))
			KERNEL$SUICIDE("VM_ARCH_CHECK_PAGE: NON-WRITEABLE PAGE %p WITH DIRTY MAPPING", p);
		if (x1 << PG_SIZE_BITS < p->dirty_from) p->dirty_from = x1 << PG_SIZE_BITS;
		if (x2 << PG_SIZE_BITS > p->dirty_to) p->dirty_to = x2 << PG_SIZE_BITS;
	}
	return (void *)((flg >> P_PTE_A) & 1);
}

PAGE *VM_ARCH_GET_PAGE(PROC *p, unsigned long addr, int wr)
{
	unsigned off;
	__u32 val;
	struct pgtbl *pg;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_VSPACE)))
		KERNEL$SUICIDE("VM_ARCH_GET_PAGE AT SPL %08X", KERNEL$SPL);
#endif
	if (__unlikely(addr >= KUVMTOP)) return NULL;
	val = GET_PGDIR_ENTRY(p, addr >> (PG_BANK_BITS + PG_SIZE_BITS));
	if (__unlikely(!(val & PTE_P))) return NULL;
	pg = (void *)((val & ~(__u32)(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
	off = (addr >> PG_SIZE_BITS) & (PG_BANK - PG_CLUSTER);
	if (__likely(!wr)) {
		if (__unlikely(!((__u32)pg->e[off] & PTE_P))) return NULL;
		if (__unlikely(IS_SPAGE_PADDING(pg->e[off + PG_CLUSTER - 1]))) {
			SPAGES *sp = PHYS_2_PAGE(pg->e[off])->fnode;
			unsigned off = ((__u32)pg->e[off] >> PG_SIZE_BITS) & (PG_CLUSTER - 1);
			return (PAGE *)(SPAGE *)sp->s[off + 1].freelist.next;
		}
	} else {
#if PG_CLUSTER > 8
		unsigned i;
#endif
		/*RAISE_SPL(SPL_TOP);*/
		val = (__u32)pg->e[off];
		if (__unlikely(!(val & PTE_RW))) {
			/*LOWER_SPL(SPL_VSPACE);*/
			return NULL;
		}
		val |= PTE_D;
		*(__u32 *)&pg->e[off] = val;
#if PG_CLUSTER > 8
		/*LOWER_SPL(SPL_VSPACE);*/
		for (i = 1; i < PG_CLUSTER; i++) {
			/*RAISE_SPL(SPL_TOP);*/
			*(__u32 *)&pg->e[off + i] |= PTE_D;
			/*LOWER_SPL(SPL_VSPACE);*/
		}
#else
#if PG_CLUSTER > 1
		*(__u32 *)&pg->e[off + 1] |= PTE_D;
#endif
#if PG_CLUSTER > 2
		*(__u32 *)&pg->e[off + 2] |= PTE_D;
		*(__u32 *)&pg->e[off + 3] |= PTE_D;
#endif
#if PG_CLUSTER > 4
		*(__u32 *)&pg->e[off + 4] |= PTE_D;
		*(__u32 *)&pg->e[off + 5] |= PTE_D;
		*(__u32 *)&pg->e[off + 6] |= PTE_D;
		*(__u32 *)&pg->e[off + 7] |= PTE_D;
#endif
#endif
		/*LOWER_SPL(SPL_VSPACE);*/
	}
	return PHYS_2_PAGE(pg->e[off]);
}

/*
 * three fault possibilities:
 *	-tss: i/o bitmap or vm86 interrupt map
 *	-ldt: an attempt to load segment register in userspace
 *	-idt: pentium f00fc7c8 bug
 */


void VM_KERNEL_FAULT_EXCEPTION(void *addr, int flags, void *ip)
{
	static __u8 new_page[PG_SIZE];
	if (__unlikely(flags & ~7)) {
		invl_fault:
		KERNEL$SUICIDE("VM_KERNEL_FAULT_EXCEPTION: INVALID KERNEL PAGEFAULT IN USER MODE, ADDR %p, FLAGS %08X, EIP %p", addr, flags, ip);
	}
	if (__likely((__u32)addr >= (unsigned)VM_KERNEL_RESERVED_BANK * PG_SIZE * PG_BANK + RESERVED_IOBMP * PG_SIZE) && __likely((__u32)addr < (unsigned)VM_KERNEL_RESERVED_BANK * PG_SIZE * PG_BANK + (RESERVED_LDT + LDT_PAGES) * PG_SIZE)) {
		unsigned idx = ((__u32)addr - ((unsigned)VM_KERNEL_RESERVED_BANK * PG_SIZE * PG_BANK + RESERVED_IOBMP * PG_SIZE)) >> PG_SIZE_BITS;
		WQ *wq;
		__u32 val;
		struct pgtbl *dir, *pg;
		RAISE_SPL(SPL_FS);
		if (__unlikely(PROC_RUN->iobmp_ldt[idx] != NULL)) goto end;
		val = GET_PGDIR_ENTRY(PROC_RUN, VM_KERNEL_RESERVED_BANK);
		if (val == VM_ZERO_PAGE_TABLE_ENTRY) {
			if (__unlikely((wq = GET_PAGETABLE(PROC_RUN, VM_KERNEL_RESERVED_BANK)) != NULL)) {
				w:
				PROC_WAIT(PROC_RUN, wq);
				goto end;
			}
			val = GET_PGDIR_ENTRY(PROC_RUN, VM_KERNEL_RESERVED_BANK);
		}
		dir = (void *)((val & ~(__u32)(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
		if (idx >= IOBMP_PAGES) {
			__u8 *uldt;
			unsigned uldt_n;
			unsigned rq_sel, n;
			uread_ptr(PROC_RUN, KUPLACE(UDATA_LDT), uldt, {
				LOWER_SPL(SPL_DEV);
				VM_FAULT_EXCEPTION((void *)KUPLACE(UDATA_LDT), 0, ip);
			});
			if (__unlikely((unsigned long)uldt & 7)) {
				gp_ex:
				LOWER_SPL(SPL_DEV);
				USER_EXCEPTION(XCPT_GP, NULL, ip);
			}
			uread_int(PROC_RUN, KUPLACE(UDATA_LDT_N), uldt_n, {
				LOWER_SPL(SPL_DEV);
				VM_FAULT_EXCEPTION((void *)KUPLACE(UDATA_LDT_N), 0, ip);
			});
			uldt_n <<= 3;
			rq_sel = (idx - IOBMP_PAGES) << PG_SIZE_BITS;
			uldt += rq_sel;
			for (n = 0; n < PG_SIZE; n += 8, rq_sel += 8, uldt += 8) {
				__u32 seg[2];
				if (__unlikely(rq_sel >= uldt_n)) {
					memset(new_page + n, 0, PG_SIZE - n);
					break;
				}
				uread_64(PROC_RUN, uldt, seg, {
					LOWER_SPL(SPL_DEV);
					VM_FAULT_EXCEPTION(uldt, 0, ip);
				});
				if (__unlikely((seg[1] & 0x207000) != 0x007000)
				 || __unlikely((seg[1] & 0x8c00) == 0x8c00)) {
					if (__likely(!(seg[0] | seg[1]))) {
						*(__u32 *)(new_page + n) = 0;
						*(__u32 *)(new_page + n + 4) = 0;
						continue;
					}
					goto gp_ex;
				}
				*(__u32 *)(new_page + n) = seg[0];
				*(__u32 *)(new_page + n + 4) = seg[1];
			}
		} else {
			memset(new_page, 0xff, PG_SIZE);
		}
		if (__unlikely((wq = GET_PAGETABLE(PROC_RUN, 1024 + idx)) != NULL)) goto w;
		pg = PROC_RUN->iobmp_ldt[idx];
		memcpy(&pg->e, new_page, PG_SIZE);
		dir->e[RESERVED_IOBMP + idx] = ((__u32)pg - VM_KERNEL_DIRECT_OFFSET) | PTE_P | PTE_RW | PTE_A | PTE_D;
		goto end;
	}
	if (__unlikely((__u32)addr >= IDTR.base_lo + (IDTR.base_hi << 16)) && __likely((__u32)addr <= IDTR.base_lo + (IDTR.base_hi << 16) + IDTR.limit)) {
		LOWER_SPL(SPL_DEV);
		USER_EXCEPTION(XCPT_UD, NULL, ip);
	}
	goto invl_fault;
	end:
	LOWER_SPL(SPL_USER);
	JMP_IDLE_LOOP();
}

void SYSCALL_INVD_EXTD_PAGE(unsigned long sys, unsigned long idx, unsigned long arg3)
{
	struct pgtbl *pg;
	if (__unlikely(idx >= IOBMP_PAGES + LDT_PAGES)) {
		SYSCALL_RETURN(PROC_RUN, -EINVAL);
		goto end;
	}
	RAISE_SPL(SPL_FS);
	pg = PROC_RUN->iobmp_ldt[idx];
	if (__unlikely(pg != NULL)) {
		PAGETABLE_FREEABLE(PROC_RUN, pg, WHOLE_PAGETABLE, {
			PROC_WAIT(PROC_RUN, wq);
			goto end;
		});
		ZAP_PAGETABLE(pg);
	}
	LOWER_SPL(SPL_DEV);
	SYSCALL_RETURN(PROC_RUN, 0);
	end:
	LOWER_SPL(SPL_USER);
	JMP_IDLE_LOOP();
}

static void PAGETABLE_WRITE(VMENTITY *e, PROC *p, int trashing)
{
	LOWER_SPL(SPL_FS);
}

static int PAGETABLE_SWAPOUT(VMENTITY *e)
{
	struct pgtbl *pg;
	LOWER_SPL(SPL_FS);
	pg = LIST_STRUCT(e, struct pgtbl, vm);
	if (__unlikely(!(pagetables.__n_pages > pagetables.__n_reserved_pages))) return 2;
	PAGETABLE_FREEABLE(pg->proc, pg, WHOLE_PAGETABLE, return 3;);
	ZAP_PAGETABLE(pg);
	return 0;
}

static int LDT_IOBMP_CHECKMAP(struct pgtbl *pg);
#if __DEBUG >= 1
static int PAGETABLE_CHECKMAP_NP_SUICIDE(struct pgtbl *pg, unsigned val);
#endif

static int PAGETABLE_CHECKMAP(VMENTITY *e)
{
	int r;
	struct pgtbl *pg = LIST_STRUCT(e, struct pgtbl, vm);
	PROC *p;
	__u32 val;
	RAISE_SPL(SPL_VSPACE);
	p = pg->proc;
	if (__unlikely(pg->idx >= __KERNEL_USER_VBANKS - 1)) {
		return LDT_IOBMP_CHECKMAP(pg);
	}
	val = GET_PGDIR_ENTRY(p, pg->idx);
#if __DEBUG >= 1
	if (__unlikely(!(val & PTE_P)))
		return PAGETABLE_CHECKMAP_NP_SUICIDE(pg, val);
#endif
	r = (val / PTE_A) & 1;
	val &= ~PTE_A;
	SET_PGDIR_ENTRY(p, pg->idx, val);
	LOWER_SPL(SPL_FS);
	return r;
}

static int LDT_IOBMP_CHECKMAP(struct pgtbl *pg)
{
	__u32 val;
	__u32 *ptr;
	int r;
	struct pgtbl *dir;
	if (__unlikely(pg->idx < 1024)) {
/* The last aliased pagetable --- IA32 system developer's manual says that if
   two page directory entries alias each other, they must have same 'A' bits
   (3.7.4). So we rather won't touch it at all. */
		r = 1;
		goto ret;
	}
	val = GET_PGDIR_ENTRY(pg->proc, VM_KERNEL_RESERVED_BANK);
	if (__unlikely(val == VM_ZERO_PAGE_TABLE_ENTRY))
		KERNEL$SUICIDE("LDT_IOBMP_CHECKMAP: PROCESS HAS NO LDT OR IOBMP PAGES");
	dir = (void *)((val & ~(__u32)(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
	ptr = (__u32 *)&dir->e[pg->idx - 1024 + RESERVED_IOBMP];
	val = *ptr;
	r = (val / PTE_A) & 1;
	val &= ~PTE_A;
	*ptr = val;
	ret:
	LOWER_SPL(SPL_FS);
	return r;
}

#if __DEBUG >= 1
static int PAGETABLE_CHECKMAP_NP_SUICIDE(struct pgtbl *pg, unsigned val)
{
	KERNEL$SUICIDE("PAGETABLE_CHECKMAP: NON-PRESENT PAGETABLE %X, VALUE %08X", pg->idx, val);
	return 0;
}
#endif

void SAVE_FPU(void);

void SAVE_FPU(void)
{
	int spl;
	char *v;
	PAGE *p;
	if (__unlikely(!PROC_FPU)) return;
	spl = KERNEL$SPL;	/* needed only to keep call to VM_ARCH_GET_PAGE quiet */
	RAISE_SPL(SPL_VSPACE);
	p = VM_ARCH_GET_PAGE(PROC_FPU, KUPLACE(UDATA_COPROCESSOR), PF_WRITE);
	if (__unlikely(!p)) KERNEL$SUICIDE("SAVE_FPU: FPU PAGE NOT PRESENT");
	v = (char *)KERNEL$MAP_PHYSICAL_PAGE(p) + (KUPLACE(UDATA_COPROCESSOR) & (PAGE_CLUSTER_SIZE - 1));
	__asm__ volatile ("						\n\
		.SECTION .FEATURE_FIXUP					\n\
		.LONG 1f, 2f, 3f, 4f, 0, 0, "__stringify(CPU_HAS_FXSR)", 0\n\
		.PREVIOUS						\n\
	1:	FXSAVE (%0)						\n\
	2:								\n\
		.SECTION .rodata					\n\
	3:	FNSAVE (%0)						\n\
		FWAIT							\n\
	4:								\n\
		.PREVIOUS						\n\
	"::"r"(v):"memory");
	KERNEL$UNMAP_PHYSICAL_BANK(v);
	LOWER_SPLX(spl);
}

void FLUSH_FPU(void)
{
	if (__unlikely(!PROC_FPU)) return;
	if (__likely(!KERNEL$FPU_ENABLED)) {
		/* this branch is not needed because kernel exception handler
		   would enable FPU anyway, but it's faster to execute CLTS
		   than hit fpu exception */
		__asm__ volatile("CLTS":::"memory");
		/* order is important. See comment in KERNEL$ENABLE_FPU */
		KERNEL$FPU_ENABLED = -1;
	}
	SAVE_FPU();
	PROC_FPU = NULL;
	/* no need to disable FPU, exit to userspace will do it */
}

extern AST_STUB FLUSH_FPU_AST_FN;
static int FLUSH_FPU_AST_ACTIVE = 0;
static AST FLUSH_FPU_AST = { FLUSH_FPU_AST_FN };
static WQ_DECL(FLUSH_FPU_WQ, "KERNEL$FLUSH_FPU_WQ");

static WQ *FLUSH_FPU_DELAYED(void)
{
	if (!__CMPXCHGI(&FLUSH_FPU_AST_ACTIVE, 0, 1)) CALL_AST(&FLUSH_FPU_AST);
	return &FLUSH_FPU_WQ;
}

DECL_AST(FLUSH_FPU_AST_FN, SPL_ZERO, AST)
{
	FLUSH_FPU_AST_ACTIVE = 0;
	__barrier();
	FLUSH_FPU();
	WQ_WAKE_ALL_PL(&FLUSH_FPU_WQ);
	RETURN;
}

void ARCH_PROC_CTOR(PROC *proc)
{
#if __KERNEL_USE_PAE != 0
	int i;
#endif
	memset(proc->pgdir, 0, __KERNEL_USER_VBANKS * sizeof(struct pgdirentry));
	memset(&proc->pgdir[VM_KERNEL_COPY_OF_LAST_BANK], 0, (1024 - VM_KERNEL_COPY_OF_LAST_BANK) * sizeof(struct pgdirentry));
#if __KERNEL_USE_PAE != 0
	for (i = 0; i < 4; i++) {
		proc->pgdir1[i] = KERNEL$VIRT_2_PHYS(&proc->pgdir[i * 256]) | P_PTR_P;
	}
#endif
	memset(proc->iobmp_ldt, 0, sizeof proc->iobmp_ldt);
	proc->n_pagetables = 0;
	INIT_LIST(&proc->pagetables);
	proc->n_mappings = 0;
	proc->last_map_swap_time = 0;
	proc->n_map_swaps = 0;
	proc->last_swapped_map = 0;
}

void ARCH_PROC_INIT(PROC *proc, PROC *previous)
{
	*(void **)proc = NULL;
	memcpy(&proc->pgdir[__KERNEL_USER_VBANKS], &previous->pgdir[__KERNEL_USER_VBANKS], (VM_KERNEL_COPY_OF_LAST_BANK - __KERNEL_USER_VBANKS) * sizeof(struct pgdirentry));
}

void ARCH_PROC_DESTROY(PROC *proc)
{
	__u32 val;
	if (__unlikely((val = GET_PGDIR_ENTRY(proc, VM_KERNEL_RESERVED_BANK)) != VM_ZERO_PAGE_TABLE_ENTRY)) {
		struct pgtbl *pg = (void *)((val & ~(__u32)(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
		RAISE_SPL(SPL_FS);
		ZAP_PAGETABLE(pg);
		LOWER_SPL(SPL_DEV);
	}
#if __DEBUG >= 1
	if (__unlikely(proc->n_pagetables))
		KERNEL$SUICIDE("ARCH_PROC_DESTROY: PROCESS HAS LEFT %d PAGETABLES", proc->n_pagetables);
	if (__unlikely(!LIST_EMPTY(&proc->pagetables)))
		KERNEL$SUICIDE("ARCH_PROC_DESTROY: PROCESS HAS LEFT NONEMPTY PAGETABLE LIST");
	if (__unlikely(proc->n_mappings))
		KERNEL$SUICIDE("ARCH_PROC_DESTROY: PROCESS HAS LEFT %ld PAGETABLES", proc->n_mappings);
#endif
#if __DEBUG >= 2
	{
		unsigned i;
		for (i = 0; i < __KERNEL_USER_VBANKS; i++) {
			if (__unlikely((__u32)GET_PGDIR_ENTRY(proc, i) & PTE_P))
				KERNEL$SUICIDE("ARCH_PROC_DESTROY: PROCESS LEFT NONEMPTY PAGEDIR ENTRY %X: %"__64_format"X", i, (__u64)GET_PGDIR_ENTRY(proc, i));
		}
		i = VM_KERNEL_COPY_OF_LAST_BANK;
		if (__unlikely((__u32)GET_PGDIR_ENTRY(proc, i) & PTE_P))
			KERNEL$SUICIDE("ARCH_PROC_DESTROY: PROCESS LEFT NONEMPTY PAGEDIR ENTRY %X: %"__64_format"X", i, (__u64)GET_PGDIR_ENTRY(proc, i));
	}
#endif
}

void ARCH_PROC_FREE(PROC *proc)
{
#if __DEBUG >= 1
	if (__unlikely(proc == PROC_CURRENT))
		KERNEL$SUICIDE("ARCH_PROC_FREE: FREEING CURRENT PROC %p", proc);
#endif
#if __DEBUG >= 2
	memset(&proc->pgdir[__KERNEL_USER_VBANKS], 0, (VM_KERNEL_COPY_OF_LAST_BANK - __KERNEL_USER_VBANKS) * sizeof(struct pgdirentry));
#endif
}

void VM_ARCH_AFTER_FORK(PROC *p)
{
	if (p == PROC_FPU) FLUSH_FPU();
	if (__unlikely(VM_ARCH_UNMAP_RANGE(p, KUVMBASE & ~(unsigned long)(PAGE_CLUSTER_SIZE - 1), PAGE_CLUSTER_SIZE) != NULL)) KERNEL$SUICIDE("VM_ARCH_AFTER_FORK: VM_ARCH_UNMAP_RANGE BLOCKED");
}

static int UPDATE_LONG_TERM_MAP(PROC *proc)
{
	__u32 val;
	if (__unlikely((val = GET_PGDIR_ENTRY(proc, VM_KERNEL_RESERVED_BANK)) != VM_ZERO_PAGE_TABLE_ENTRY)) {
		struct pgtbl *pg = (void *)((val & ~(__u32)(PG_SIZE - 1)) + VM_KERNEL_DIRECT_OFFSET);
		memcpy(&pg->e[RESERVED_MEMMAP], &VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP], MEMMAP_PAGES * sizeof(paddr_t));
	}
	return 0;
}


void *KERNEL$MAP_PHYSICAL_REGION_LONG_TERM(__p_addr addr, int length, int pat)
{
	__u32 orig_cr0;
	unsigned i, j;
	int spl;
	if (__unlikely(SPLX_BELOW(SPL_X(SPL_DEV), spl = KERNEL$SPL)))
		KERNEL$SUICIDE("KERNEL$MAP_PHYSICAL_REGION_LONG_TERM AT SPL %08X", KERNEL$SPL);
	RAISE_SPL(SPL_DEV);
	if (__unlikely(length <= 0)) {
		LOWER_SPLX(spl);
		return (void *)1;
	}
	if (__unlikely(addr != (__p_addr)(paddr_t)addr)) {
		LOWER_SPLX(spl);
		return __ERR_PTR(-EADDRNOTAVAIL);
	}
	length = (((unsigned)addr & (PG_SIZE - 1)) + length + (PG_SIZE - 1)) >> PG_SIZE_BITS;
	for (i = 0; i <= MEMMAP_PAGES - length; i++) {
		char cache_diff = 0;
		for (j = 0; j < length; j++) {
			if (__likely((VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + i + j] & ~(paddr_t)(PG_SIZE - 1)) != ((paddr_t)addr & ~(paddr_t)(PG_SIZE - 1)) + (j << PG_SIZE_BITS))) goto skip;
			if (__unlikely((PTE2PAT(VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + i + j]) ^ pat) & ~PAT_RO)) cache_diff = 1;
		}
		if (__unlikely(cache_diff)) {
			orig_cr0 = START_CACHEMODE_MODIFY();
			for (j = 0; j < length; j++) {
				VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + i + j] = PHYSMAP_PTE(VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + i + j] & ~(paddr_t)(PG_SIZE - 1), PAT_UC);
			}
			END_CACHEMODE_MODIFY(orig_cr0);
		}
		for (j = 0; j < length; j++) {
			MEMMAP_REFCOUNTS[i + j]++;
		}
		goto ret_i;
		skip:;
	}
	for (i = 0; i <= MEMMAP_PAGES - length; i++) {
		for (j = 0; j < length; j++) {
			if (__likely((__u32)VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + i + j] & PTE_P)) {
				if (__unlikely(!MEMMAP_REFCOUNTS[i + j]))
					KERNEL$SUICIDE("KERNEL$MAP_PHYSICAL_REGION_LONG_TERM: PRESENT NON-REFERENCED MAPPING AT %d: %LX", i + j, (unsigned long long)VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + i + j]);
				goto skip2;
			}
			if (__unlikely(MEMMAP_REFCOUNTS[i + j]))
				KERNEL$SUICIDE("KERNEL$MAP_PHYSICAL_REGION_LONG_TERM: NON-PRESENT REFERENCED MAPPING AT %d: %LX", i + j, (unsigned long long)VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + i + j]);
		}
		goto found_it;
		skip2:;
	}
	LOWER_SPLX(spl);
	return __ERR_PTR(-ENOMEM);
	found_it:
	orig_cr0 = 0;
	scan_conflicts_again:
	for (j = 0; j < MEMMAP_PAGES; j++) if ((VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + j] & ~(paddr_t)(PG_SIZE - 1)) >= ((paddr_t)addr & ~(paddr_t)(PG_SIZE - 1)) && __unlikely((VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + j] & ~(paddr_t)(PG_SIZE - 1)) < (((paddr_t)addr & ~(paddr_t)(PG_SIZE - 1)) + (length << PG_SIZE_BITS))) && __unlikely((PTE2PAT(VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + j]) ^ pat) & ~PAT_RO)) {
		if (pat != PAT_UC) {
			pat = PAT_UC;
			goto scan_conflicts_again;
		}
		if (!orig_cr0) orig_cr0 = START_CACHEMODE_MODIFY();
		VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + j] = PHYSMAP_PTE(VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + j] & ~(paddr_t)(PG_SIZE - 1), PAT_UC);
	}
	if (__unlikely(orig_cr0)) END_CACHEMODE_MODIFY(orig_cr0);
	for (j = 0; j < length; j++) {
		VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + i + j] = PHYSMAP_PTE((((paddr_t)addr & ~(paddr_t)(PG_SIZE - 1)) + (j << PG_SIZE_BITS)), pat) | GLOBAL_BIT;
		MEMMAP_REFCOUNTS[i + j] = 1;
	}
	if (__likely(PROC_INITIALIZED)) FOR_ALL_PROCS(&KERNEL$PROC_KERNEL, UPDATE_LONG_TERM_MAP);
	TLB_INVD_G();
	ret_i:
	LOWER_SPLX(spl);
	return (void *)((((VM_KERNEL_RESERVED_BANK << PG_BANK_BITS) + RESERVED_MEMMAP + i) << PG_SIZE_BITS) + ((unsigned long)addr & (PG_SIZE - 1)));
}

void KERNEL$UNMAP_PHYSICAL_REGION_LONG_TERM(void *vaddr_, int length)
{
	char need_flush;
	unsigned long vaddr = (unsigned long)vaddr_;
	int spl;
	if (__unlikely(SPLX_BELOW(SPL_X(SPL_DEV), spl = KERNEL$SPL)))
		KERNEL$SUICIDE("KERNEL$UNMAP_PHYSICAL_REGION_LONG_TERM AT SPL %08X", KERNEL$SPL);
	RAISE_SPL(SPL_DEV);
	if (__unlikely(length <= 0)) {
		LOWER_SPLX(spl);
		return;
	}
	if (__unlikely(vaddr < ((VM_KERNEL_RESERVED_BANK << PG_BANK_BITS) + RESERVED_MEMMAP) << PG_SIZE_BITS))
		inval: KERNEL$SUICIDE("KERNEL$UNMAP_PHYSICAL_REGION_LONG_TERM: INVALID ADDRESS: %lX, LENGTH: %X", vaddr, length);
	if (__unlikely(vaddr + length >= ((VM_KERNEL_RESERVED_BANK << PG_BANK_BITS) + RESERVED_MEMMAP + MEMMAP_PAGES) << PG_SIZE_BITS)) goto inval;
	length = (((unsigned)vaddr & (PG_SIZE - 1)) + length + (PG_SIZE - 1)) >> PG_SIZE_BITS;
	vaddr >>= PG_SIZE_BITS;
	vaddr -= (VM_KERNEL_RESERVED_BANK << PG_BANK_BITS) + RESERVED_MEMMAP;
	need_flush = 0;
	for (; length; length--) {
		if (__unlikely(--MEMMAP_REFCOUNTS[vaddr] < 0))
			KERNEL$SUICIDE("KERNEL$UNMAP_PHYSICAL_REGION_LONG_TERM: REFCOUNT UNDERFLOW: %d AT OFFSET %lX", MEMMAP_REFCOUNTS[vaddr], vaddr);
		if (!MEMMAP_REFCOUNTS[vaddr]) {
			int pat = PTE2PAT(VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + vaddr]) & PAT_CACHE_MODE;
			if (!(pat == PAT_UC_MINUS || pat == PAT_UC || pat == PAT_WC)) need_flush |= 2;
			VM_ZERO_PAGE_TABLE[RESERVED_MEMMAP + vaddr] = 0;
			need_flush |= 1;
		}
		vaddr++;
	}
	if (need_flush) {
		if (need_flush & 2) CACHE_WBINVD();
		TLB_INVD_G();
	}
	LOWER_SPLX(spl);
}

