#include <SYS/TYPES.H>
#include <SPAD/LIBC.H>
#include <ARCH/CPU.H>
#include <STRING.H>

#include <KERNEL/CODEPAD.H>

static const char *athlon_pad[] = {
	/* AMD Athlon Optimization Guide */
	"\x0e",
	"\x90",
	"\x8b\xff",
	"\x8d\x3c\x27",
	"\x8d\x7c\x27\x00",
	"\x8d\x7c\x27\x00\x90",
	"\x8d\xbf\x00\x00\x00\x00",
	"\x8d\x3c\x3d\x00\x00\x00\x00",
	"\x8d\x3c\x3d\x00\x00\x00\x00\x90",
	"\x8d\xbf\x00\x00\x00\x00\x8d\x34\x26",
	"\x8d\xbf\x00\x00\x00\x00\x8d\x74\x26\x00",
	"\x8d\x3c\x3d\x00\x00\x00\x00\x8d\x74\x26\x00",
	"\x8d\xbf\x00\x00\x00\x00\x8d\xb6\x00\x00\x00\x00",
	"\x8d\x3c\x3d\x00\x00\x00\x00\x8d\xb6\x00\x00\x00\x00",
	"\x8d\x3c\x3d\x00\x00\x00\x00\x8d\x34\x35\x00\x00\x00\x00",
};

static const char *p6_nop_pad[] = {
	/* Intel Instructure reference manual, NOP */
	/* Works on P6-type machines, but there are reportedly some
	   non-Intel non-AMD processors that report generation 6 and
	   don't support NOP */
	/* The Intel P6 chip has the restriction on two non-renamed
	   registers per clock cycle, so use EAX variant, as it will
	   most likely be used nearby */
	"\x10",
	"\x90",
	"\x66\x90",
	"\x0f\x1f\x00",
	"\x0f\x1f\x40\x00",
	"\x0f\x1f\x44\x00\x00",
	"\x66\x0f\x1f\x44\x00\x00",
	"\x0f\x1f\x80\x00\x00\x00\x00",
	"\x0f\x1f\x84\x00\x00\x00\x00\x00",
	"\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
	"\x0f\x1f\x44\x00\x00\x0f\x1f\x44\x00\x00",
	"\x66\x0f\x1f\x44\x00\x00\x0f\x1f\x44\x00\x00",
	"\x66\x0f\x1f\x44\x00\x00\x66\x0f\x1f\x44\x00\x00",
	"\x66\x0f\x1f\x44\x00\x00\x0f\x1f\x80\x00\x00\x00\x00"
	"\x0f\x1f\x80\x00\x00\x00\x00\x0f\x1f\x80\x00\x00\x00\x00"
	"\x66\x0f\x1f\x80\x00\x00\x00\x00\x0f\x1f\x80\x00\x00\x00\x00"
	"\x66\x0f\x1f\x80\x00\x00\x00\x00\x66\x0f\x1f\x80\x00\x00\x00\x00"
};

static const char *amd64_pad[] = {
	/* Software Optimization Guide for AMD64 Processors */
	/* I suppose that these multiple prefixes will burn the
	   decoder on any other CPU ... */
	"\x0c",
	"\x90",
	"\x66\x90",
	"\x66\x66\x90",
	"\x66\x66\x66\x90",
	"\x66\x66\x90\x66\x90",
	"\x66\x66\x90\x66\x66\x90",
	"\x66\x66\x66\x90\x66\x66\x90",
	"\x66\x66\x66\x90\x66\x66\x66\x90",
	"\x66\x66\x90\x66\x66\x90\x66\x66\x90",
	"\x66\x66\x66\x90\x66\x66\x90\x66\x66\x90",
	"\x66\x66\x66\x90\x66\x66\x66\x90\x66\x66\x90",
	"\x66\x66\x66\x90\x66\x66\x66\x90\x66\x66\x66\x90",
};

static const char **padding = NULL;

__COLD_ATTR__ static void PROBE_PADDING(void)
{
	unsigned family;
	char vendor[13];
	if (!KERNEL$FEATURE_TEST(FEATURE_CPUID)) {
		padding = athlon_pad;
		return;
	}
	DO_CPUID0(NULL, vendor);
	DO_CPUID1(&family, NULL, NULL, NULL, NULL);
	if (!strcmp(vendor, "AuthenticAMD"))
		if (family <= 6)
			padding = athlon_pad;
		else
			padding = amd64_pad;
	else if (KERNEL$FEATURE_TEST(FEATURE_NOP))
		padding = p6_nop_pad;
	else
		padding = athlon_pad;
}

unsigned long CODE_PAD(unsigned long addr, unsigned long len, __u8 *result, unsigned long result_len)
{
	if (__unlikely(!padding))
		PROBE_PADDING();
	if (result_len < len || len > (__u8)padding[0][0]) {
		if (len < 130) {
			if (__unlikely(result_len < 2))
				KERNEL$SUICIDE("CODE_PAD: NOT ENOUGH SPACE FOR SHORT JUMP (%lu)", result_len);
			len -= 2;
			result[0] = 0xeb;
			result[1] = len;
			return 2;
		} else {
			if (__unlikely(result_len < 5))
				KERNEL$SUICIDE("CODE_PAD: NOT ENOUGH SPACE FOR LONG JUMP (%lu)", result_len);
			len -= 5;
			result[0] = 0xe9;
			result[1] = len;
			result[2] = len >> 8;
			result[3] = len >> 16;
			result[4] = len >> 24;
			return 5;
		}
	}
	memcpy(result, padding[len], len);
	return len;
}
