#include <SPAD/LIBC.H>
#include <SPAD/SLAB.H>
#include <SPAD/DL.H>
#include <KERNEL/LINK.H>
#include <SPAD/LINK.H>
#include <ARCH/CPU.H>
#include <ARCH/SETUP.H>
#include <ARCH/IO.H>
#include <SPAD/VM.H>
#include <KERNEL/VM_ARCH.H>
#include <KERNEL/ASM.H>
#include <KERNEL/TIME.H>
#include <KERNEL/IRQ.H>
#include <KERNEL/FIXUP.H>
#include <SYS/TYPES.H>

/*
 * Kernel boot is tricky. Loader loads image to 0x100000 and jumps to
 * KERNEL$BOOT. KERNEL$BOOT is not relocated and so it can't touch global
 * variables, use switch statement etc. We relocate kernel for physical mapping,
 * then create paging structures, then relocate kernel again for virtual mapping
 * and finaly enable paging.
 */

static void RELOC_FAIL(void)
{
	/* Very cool - writing a message without touching any absolute addresses because the code is not relocated */
	unsigned short int *base = (void *)(*(char *)0x449 == 7 ? 0xb00a0 : 0xb80a0);
	base[0] = 'L' | 0x7000;
	base[1] = 'I' | 0x7000;
	base[2] = 'N' | 0x7000;
	base[3] = 'K' | 0x7000;
	base[4] = ' ' | 0x7000;
	base[5] = 'F' | 0x7000;
	base[6] = 'A' | 0x7000;
	base[7] = 'I' | 0x7000;
	base[8] = 'L' | 0x7000;
	base[9] = 'E' | 0x7000;
	base[10] = 'D' | 0x7000;
	while (1) ;
}

static void BOOT_FAIL(char *s)
{
	unsigned short int *base = (void *)(*(char *)0x449 == 7 ? 0xb00a0 : 0xb80a0);
	while (*s) *base++ = (unsigned char)*s++ | 0x7000;
	while (1) ;
}

struct cache_id {
	__u8 code;
	char type;
	unsigned size;
	unsigned assoc;
	unsigned line;
};

#define CID_END		0
#define CID_ITLB	1
#define CID_DTLB	2
#define CID_L1I		3
#define CID_L1D		4
#define CID_T		5
#define CID_L2		6
#define CID_L3		7
#define CID_PREFETCH	8

static __const__ struct cache_id cid_types[] = {
	0x01,	CID_ITLB,	32,	  4,	4096,
	0x02,	CID_ITLB,	2,	  2,	4194304,
	0x03,	CID_DTLB,	64,	  4,	4096,
	0x04,	CID_DTLB,	8,	  4,	4194304,
	0x06,	CID_L1I,	8192,	  4,	32,
	0x08,	CID_L1I,	16384,	  4,	32,
	0x0A,	CID_L1D,	8192,	  2,	32,
	0x0C,	CID_L1D,	16384,	  4,	32,
	0x22,	CID_L3,		524288,	  4,	64,
	0x23,	CID_L3,		1048576,  8,	64,
	0x25,	CID_L3,		2097152,  8,	64,
	0x29,	CID_L3,		4194304,  8,	64,
	0x2C,	CID_L1D,	32768,	  8,	64,
	0x30,	CID_L1I,	32768,	  8,	64,
	0x39,	CID_L2,		131072,	  4,	64,
	0x3B,	CID_L2,		131072,	  2,	64,
	0x3C,	CID_L2,		262144,	  4,	64,
	0x41,	CID_L2,		131072,	  4,	32,
	0x42,	CID_L2,		262144,	  4,	32,
	0x43,	CID_L2,		524288,	  4,	32,
	0x44,	CID_L2,		1048576,  4,	32,
	0x45,	CID_L2,		2097152,  4,	32,
	0x50,	CID_ITLB,	64,	  64,	0,
	0x51,	CID_ITLB,	128,	  128,	0,
	0x52,	CID_ITLB,	256,	  256,	0,
	0x5B,	CID_DTLB,	64,	  64,	0,
	0x5C,	CID_DTLB,	128,	  128,	0,
	0x5D,	CID_DTLB,	256,	  256,	0,
	0x60,	CID_L1D,	16384,	  8,	64,
	0x66,	CID_L1D,	8192,	  4,	64,
	0x67,	CID_L1D,	16384,	  4,	64,
	0x68,	CID_L1D,	32768,	  4,	64,
	0x70,	CID_T,		12288,	  8,	0,
	0x71,	CID_T,		16384,	  8,	0,
	0x72,	CID_T,		32768,	  8,	0,
	0x78,	CID_L2,		1048576,  4,	64,
	0x79,	CID_L2,		131072,	  8,	64,
	0x7A,	CID_L2,		262144,	  8,	64,
	0x7B,	CID_L2,		524288,	  8,	64,
	0x7C,	CID_L2,		1048576,  8,	64,
	0x7D,	CID_L2,		2097152,  8,	64,
	0x7F,	CID_L2,		524288,	  2,	64,
	0x82,	CID_L2,		262144,	  8,	32,
	0x83,	CID_L2,		524288,	  8,	32,
	0x84,	CID_L2,		1048576,  8,	32,
	0x85,	CID_L2,		2097152,  8,	32,
	0x86,	CID_L2,		524288,	  4,	64,
	0x87,	CID_L2,		1048576,  8,	64,
	0xB0,	CID_ITLB,	128,	  4,	4096,
	0xB3,	CID_DTLB,	128,	  4,	4096,
	0xF0,	CID_PREFETCH,	0,	  0,	64,
	0xF1,	CID_PREFETCH,	0,	  0,	128,
	0,	CID_END,	0,	  0,	0
};

static void CHECK_CPU_FEATURES(void)
{
	__u32 f;
	__u32 cpuid = 0;
	__u32 cpuid2 = 0;
	__u32 amd_cpuid = 0;
	__u32 maxc = 0;
	__u32 version = 0;
	__u32 vendor[4] = { 0, 0, 0, 0 };
	__asm__ volatile ("					;\
		PUSHFL						;\
		POPL	%%EAX					;\
		XORL	%1, %%EAX				;\
		PUSHL	%%EAX					;\
		POPFL						;\
		PUSHFL						;\
		POPL	%%ECX					;\
		XORL	%%ECX, %%EAX				;\
		ANDL	%1, %%EAX				;\
	":"=a"(f):"i"(EFLAGS_ID):"cx","cc");
	if (!f) {
/* do not change EBX, because it can't be used when compiling with -fPIC */
		__asm__ volatile ("				;\
			MOVL	%%EBX, %%ESI			;\
			XORL	%%EAX, %%EAX			;\
			CPUID					;\
			XCHGL	%%ESI, %%EBX			;\
		":"=a"(maxc),"=S"(vendor[0]),"=d"(vendor[1]),"=c"(vendor[2])::"cc");
		if (maxc) {
			__asm__ volatile ("			;\
				MOVL	$1, %%EAX		;\
				MOVL	%%EBX, %%ESI		;\
				CPUID				;\
				MOVL	%%ESI, %%EBX		;\
			":"=d"(cpuid),"=c"(cpuid2),"=a"(version)::"si","cc");
		}
		if (!strcmp((char *)vendor, "AuthenticAMD")) {
			__asm__ volatile ("			;\
				MOVL	$0x80000001, %%EAX	;\
				MOVL	%%EBX, %%ESI		;\
				CPUID				;\
				MOVL	%%ESI, %%EBX		;\
			":"=d"(amd_cpuid)::"ax","si","cx","cc");
		}
		if (maxc >= 2) {
			unsigned prefetch = 0, l1d_line = 0, l2_line = 0;
			unsigned n = 0, t = 1;
			do {
				__u32 cid[4];
				unsigned i;
				__asm__ volatile ("		;\
					MOVL	$2, %%EAX	;\
					MOVL	%%EBX, %%ESI	;\
					CPUID			;\
					XCHGL	%%ESI, %%EBX	;\
				":"=a"(cid[0]),"=d"(cid[1]),"=c"(cid[2]),"=S"(cid[3])::"cc");
				if (!n) t = *(char *)cid;
				for (i = 0; i < 4; i++) if (cid[i] & 0x80000000) cid[i] = 0;
				for (i = 1; i < 16; i++) {
					__const__ struct cache_id *ci;
					__u8 c = *((char *)cid + i);
					for (ci = cid_types; ci->type; ci++) if (__unlikely(ci->code == c)) {
						switch (ci->type) {
						case CID_L1D:
							l1d_line = ci->line;
							break;
						case CID_L2:
							l2_line = ci->line;
							break;
						case CID_PREFETCH:
							prefetch = ci->line;
							break;
						}
						break;
					}
				}
			} while (__unlikely(++n < t));
			if (!prefetch) prefetch = l2_line;
			if (!prefetch) prefetch = l1d_line;
			if (prefetch >= 128) KERNEL$CPU_FEATURES |= CPU_PREFETCH_128;
			if (prefetch >= 64) KERNEL$CPU_FEATURES |= CPU_PREFETCH_64;
		}
	}
	if (maxc && !(cpuid & CPU_FEATURE_FPU))
		BOOT_FAIL("CPU DOES NOT HAVE FPU");
	if (maxc) KERNEL$CPU_FEATURES |= CPU_HAS_CPUID;
	if (cpuid & CPU_FEATURE_TSC) KERNEL$CPU_FEATURES |= CPU_HAS_TSC;
	if (cpuid & CPU_FEATURE_PSE) KERNEL$CPU_FEATURES |= CPU_HAS_4M_PAGES;
	if (cpuid & CPU_FEATURE_MSR) KERNEL$CPU_FEATURES |= CPU_HAS_MSR;
	if (cpuid & CPU_FEATURE_CX8) KERNEL$CPU_FEATURES |= CPU_HAS_CMPXCHG8B;
	if (cpuid & CPU_FEATURE_PGE) KERNEL$CPU_FEATURES |= CPU_HAS_GLOBAL_PAGES;
	if (cpuid & CPU_FEATURE_CMOV) KERNEL$CPU_FEATURES |= CPU_HAS_CMOV;
	if (cpuid & CPU_FEATURE_FGPAT) KERNEL$CPU_FEATURES |= CPU_HAS_PAT;
	if (cpuid & CPU_FEATURE_SS) KERNEL$CPU_FEATURES |= CPU_HAS_SELF_SNOOP;

/* If the CPU has both AMD SYSCALL and Intel SYSENTER instructions (new AMD CPUs
   have), prefer SYSCALL --- it is slightly faster. Test on Athlon64:
 	SYSCALL: always 148 ticks
	SYSENTER: most time 175 ticks, sometimes 142 or 159 ticks
*/

	if (amd_cpuid & CPU_FEATURE_AMD_SYSCALL && !(KERNEL$CPU_FEATURES & CPU_HAS_SYSENTER)) KERNEL$CPU_FEATURES |= CPU_HAS_AMD_SYSCALL;

	if (cpuid & CPU_FEATURE_SEP && !(KERNEL$CPU_FEATURES & CPU_HAS_AMD_SYSCALL)) {
		if (((version >> 8) & 0xf) == 6 && ((version >> 4) & 0xf) < 3 && (version & 0xf) < 3) goto nosep;
		KERNEL$CPU_FEATURES |= CPU_HAS_SYSENTER;
		nosep:;
	}

	if (!strcmp((char *)vendor, "AuthenticAMD")) {
		if ((version & 0xff0) == 0x500) KERNEL$CPU_FEATURES |= CPU_HAS_GLOBAL_PAGES;
		if ((version & 0xff0) >= 0x660 && ((version & 0xff0) <= 0x6a0) && maxc && !(cpuid & CPU_FEATURE_SSE)) {
			__asm__ volatile ("			;\
				RDMSR				;\
				ANDL	$~0x00008000, %%EAX	;\
				WRMSR				;\
			"::"c"(MSR_K7_HWCR):"ax","dx","cc");
			__asm__ volatile ("			;\
				MOVL	$1, %%EAX		;\
				MOVL	%%EBX, %%ESI		;\
				CPUID				;\
				MOVL	%%ESI, %%EBX		;\
			":"=d"(cpuid),"=c"(cpuid2),"=a"(version)::"si","cc");
		}
	}
	if (cpuid & CPU_FEATURE_FXSR) {
		KERNEL$CPU_FEATURES |= CPU_HAS_FXSR;
		if (!strcmp((char *)vendor, "AuthenticAMD") && ((version >> 8) & 0xf) >= 6 && cpuid & CPU_FEATURE_MMX) KERNEL$CPU_FEATURES |= CPU_FXSR_LEAK;
	}
	if (cpuid & CPU_FEATURE_SSE) KERNEL$CPU_FEATURES |= CPU_HAS_SSE;
	if (cpuid & CPU_FEATURE_SSE2) KERNEL$CPU_FEATURES |= CPU_HAS_SSE2;
	if (cpuid2 & CPU_FEATURE2_SSE3) KERNEL$CPU_FEATURES |= CPU_HAS_SSE3;
	if (((version >> 8) & 0xf) >= 6 && KERNEL$CPU_FEATURES & CPU_HAS_4M_PAGES) KERNEL$CPU_FEATURES |= CPU_INVLPG_BIG_PAGES;
#if __KERNEL_USE_PAE == 1
	if (!(cpuid & CPU_FEATURE_PAE))
		BOOT_FAIL("KERNEL WAS COMPILED WITH PAE, BUT YOUR CPU DOESN'T SUPPORT IT");
	if (!(KERNEL$CPU_FEATURES & CPU_HAS_4M_PAGES))
		BOOT_FAIL("CPU HAS PAE BUT DOES NOT HAVE BIG PAGES");
	if (!(KERNEL$CPU_FEATURES & CPU_HAS_GLOBAL_PAGES))
		BOOT_FAIL("CPU HAS PAE BUT DOES NOT HAVE GLOBAL PAGES");
#endif
}

/*static void debug_val(int line, unsigned long val)
{
	unsigned short int *base = (void *)((*(char *)0x449 == 7 ? 0xb0000 : 0xb8000) + line * 0xa0);
	int i;
	for (i = 28; i >= 0; i -= 4) {
		unsigned n = (val >> i) & 0x0f;
		if (n >= 10) n += 7;
		*base++ = 0x7030 + n;
	}
}*/

static void KERNEL_BOOT_CONTINUE(void);

static unsigned long FSD;
static unsigned long TOP_OF_CODE;
static unsigned long TOP_OF_KERNEL;
static unsigned long TOP_OF_DATA;

void KERNEL$BOOT(struct link_header *l, unsigned long fsd, unsigned long top_of_heap);
static void KERNEL_BOOT_RELOCATED(struct link_header *l, unsigned long fsd, unsigned long top_of_heap);

void KERNEL$BOOT(struct link_header *l, unsigned long fsd, unsigned long top_of_heap)
{
	/* not relocated here, hope that the compiler won't generate global references */
	struct section *secs = (struct section *)((char *)l + l->sections);
	secs[0].ptr = (__f_off)l + secs[0].offset;
	secs[1].ptr = (__f_off)l + secs[1].offset;
	secs[2].ptr = (__f_off)0x400000;
	secs[3].ptr = (__f_off)0x400000 + (secs[3].offset - secs[2].offset);
	secs[4].ptr = (__f_off)0x400000 + (secs[4].offset - secs[2].offset);
	memmove((char *)secs[2].ptr, (char *)l + secs[2].offset, secs[4].ptr - secs[2].ptr);
	memset((char *)secs[4].ptr, 0, secs[4].len);
	if (LINK_RELOC(l, 0))
		RELOC_FAIL();
	KERNEL_BOOT_RELOCATED(l, fsd, top_of_heap);
}

static void KERNEL_BOOT_RELOCATED(struct link_header *l, unsigned long fsd, unsigned long top_of_heap)
{
	unsigned features;
	void *pgtbl;
	struct section *secs = (struct section *)((char *)l + l->sections);
	/* and here we are safe... */
	memmove((char *)l + secs[0].len + secs[1].len, (void *)fsd, top_of_heap - fsd);
	FSD = (long)l + secs[0].len + secs[1].len;
	TOP_OF_CODE = FSD + top_of_heap - fsd;
	TOP_OF_CODE = (TOP_OF_CODE + PAGE_CLUSTER_SIZE - 1) & ~(unsigned long)(PAGE_CLUSTER_SIZE - 1);
	TOP_OF_KERNEL = FSD;
	TOP_OF_KERNEL = (TOP_OF_KERNEL + PAGE_CLUSTER_SIZE - 1) & ~(unsigned long)(PAGE_CLUSTER_SIZE - 1);
	TOP_OF_DATA = 0x400000 + secs[2].len + secs[3].len + secs[4].len;
	TOP_OF_DATA = (TOP_OF_DATA + PAGE_CLUSTER_SIZE - 1) & ~(unsigned long)(PAGE_CLUSTER_SIZE - 1);
	CHECK_CPU_FEATURES();
	pgtbl = VM_BOOT_INIT();

#define VM_CODE_VBANK (VM_KERNEL_DIRECT_BANK << PG_BANK_BITS << PG_SIZE_BITS)
#define VM_DATA_VBANK ((VM_KERNEL_DIRECT_BANK + 1) << PG_BANK_BITS << PG_SIZE_BITS)

	features = KERNEL$CPU_FEATURES;

	if (LINK_RELOC(l, VM_CODE_VBANK))
		RELOC_FAIL();

	/* unsafe again ... */
	VM_BOOT_ENABLE_PAGING(pgtbl, KERNEL_BOOT_CONTINUE, features);
	/* VM_BOOT_ENABLE_PAGING never returns because it switched stack ... */
}

void MACHINE_INIT(void);
void INIT_KERNEL(void);
void FSD_SET_ENTRY(void *);

static void KERNEL_BOOT_CONTINUE(void)
{
	__f_off l_l, l_l_2;
	/*__debug_printf("features: %08x\n", KERNEL$CPU_FEATURES);*/
	/*__debug_printf("FSD: %08lx, DATA: %08lx, CODE: %08lx\n", FSD, TOP_OF_DATA, TOP_OF_CODE);*/
	if (!LINK_GET_SYMBOL(KMAP_PHYS_2_VIRT(0x100000), "DLL$FIXUP", SYM_SPECIAL, &l_l) && !LINK_GET_SYMBOL(KMAP_PHYS_2_VIRT(0x100000), "DLL$FIXUP_END", SYM_SPECIAL, &l_l_2)) {
		FIXUP_FEATURES((struct feature_fixup *)l_l, (struct feature_fixup *)l_l_2);
	}

	VM_BOOT_GETMEM(0x100000, TOP_OF_KERNEL, TOP_OF_CODE, 0x400000, TOP_OF_DATA, 0x800000);
	/*alloctest();*/
	__SLAB_INIT_MALLOC_ARENA();
	FSD_SET_ENTRY(KMAP_PHYS_2_VIRT(FSD));
	__DL_INIT(KMAP_PHYS_2_VIRT(0x100000), KMAP_PHYS_2_VIRT(TOP_OF_CODE), KMAP_PHYS_2_VIRT(0x400000), KMAP_PHYS_2_VIRT(TOP_OF_DATA));
	IRQ_INIT();
	TIMER_INIT();
	MACHINE_INIT();
	/*alloctest();
	slab_test();
	slab_htest();
	wq_test();
	uaccess_test();*/
	INIT_KERNEL();
}
