#include <SPAD/LIBC.H>
#include <SPAD/VM.H>
#include <SPAD/TIMER.H>
#include <SPAD/SYNC.H>
#include <SYS/PARAM.H>
#include <STRING.H>
#include <LIMITS.H>

#include <KERNEL/VM_ARCH.H>

#define TOLERANCE	4 / 5

	/* how small can be L2 cache on 486 ? */
#define MIN_TEST_SIZE	16384

	/* I expect max L2 cache is 1MB, so if I want to test for slow memory,
	   I need to test for 2MB */
#define MAX_TEST_SIZE	MIN(2097152, PG_SIZE * PG_BANK)

#define NEED_READS	10
#define NEED_JIFFIES	(10 << KERNEL$JIFFIES_STEP_BITS)

__NOINLINE_ATTR__ static void do_read(const volatile __u32 *p, unsigned len)
{
	len /= 4;
	while (len--) {
		*p++;
	}
}

static __u64 measure_once(const volatile __u32 *p, unsigned len)
{
	unsigned read_count;
	u_jiffies_lo_t j, jj;
	do_read(p, len);
	do_read(p, len);
	jj = KERNEL$GET_JIFFIES_LO();
	while ((j = KERNEL$GET_JIFFIES_LO()) == jj) ;	/* Can't sleep because we have mapped memory */
	read_count = 0;
	do {
		do_read(p, len);
		read_count++;
		jj = KERNEL$GET_JIFFIES_LO();
	} while (j == jj || (jj - j < NEED_JIFFIES && read_count < NEED_READS));
	return (__u64)len * read_count * JIFFIES_PER_SECOND / (jj - j);
}

static __u64 measure(const volatile __u32 *p, unsigned len)
{
	__u64 m1, m2;
	m1 = measure_once(p, len);
	m2 = measure_once(p, len);
	m1 = MAX(m1, m2);
	return m1;
}

static __u64 map_and_measure(__p_addr addr, unsigned len)
{
	const volatile __u32 *p;
	__u64 m;
	KERNEL$THREAD_MAY_BLOCK();
	p = KERNEL$MAP_PHYSICAL_BANK(addr);
	m = measure(p, len);
	KERNEL$UNMAP_PHYSICAL_BANK((void *)p);
	KERNEL$THREAD_MAY_BLOCK();
	return m;
}

static int speed_diff(__u64 val1, __u64 val2)
{
	if (val1 * TOLERANCE < val2) return -1;
	if (val1 > val2 * TOLERANCE) return 1;
	return 0;
}

static int test_conflict(__s64 addr, int len, __s64 *low, __s64 *high)
{
	int i;
	int overlaps = 0;
	if (addr + len > VM_ARCH->MEMSIZE) {
		__s64 lower = (__s64)VM_ARCH->MEMSIZE - len;
		if (low) if (lower < *low) *low = lower;
		if (high) *high = -1;
		return 1;
	}
	for (i = 0; i < VM_ARCH->VM_N_HOLES; i++) {
		__s64 from, to;
		__s64 lower, higher;
		from = VM_ARCH->VM_HOLES[i * 2];
		to = VM_ARCH->VM_HOLES[i * 2 + 1];
		if (addr < to && addr + len > from) {
			overlaps = 1;
			lower = (from - len) & -(__s64)len;
			higher = (to + len - 1) & -(__s64)len;
			if (low) if (lower < *low) *low = lower;
			if (high) if (higher > *high) *high = higher;
		}
	}
	return overlaps;
}

static __s64 FIND_LOWEST(int len)
{
	__s64 addr = 0;
	while (test_conflict(addr, len, NULL, &addr)) {
		if (addr < 0)
			return -1;
	}
	return addr;
}

static __s64 FIND_HIGHEST(int len)
{
	__s64 addr = ((__s64)VM_ARCH->MEMSIZE - len) & -(__s64)len;
	while (test_conflict(addr, len, &addr, NULL)) {
		if (addr < 0)
			return -1;
	}
	return addr;
}

static __s64 FIND_MIDDLE(__s64 low, __s64 high, int len)
{
	__s64 middle = ((low + high) / 2) & -(__s64)len;
	__s64 middle1, middle2;
	middle1 = middle;
	while (test_conflict(middle1, len, &middle1, NULL)) {
		if (middle1 < 0) {
			middle1 = low;
			break;
		}
	}
	middle2 = middle;
	while (test_conflict(middle2, len, NULL, &middle2)) {
		if (middle2 < 0) {
			middle2 = high;
			break;
		}
	}
	if (middle - middle1 < middle2 - middle) return middle1;
	else return middle2;
}

__p_addr CACHE_LIMIT_PROBE(void)
{
	int len;

#ifdef DBG
	__debug_printf("cache probe...\n");
#endif
	for (len = MIN_TEST_SIZE; len <= MAX_TEST_SIZE; len <<= 1) {
		__u64 val1, val2;
		__s64 low = FIND_LOWEST(len);
		__s64 high = FIND_HIGHEST(len);
		if (low < 0 || high < 0 || high - low < len)
			continue;
		val1 = map_and_measure(low, len);
		val2 = map_and_measure(high, len);
#ifdef DBG
		__debug_printf("speed for %Lx-%Lx, %x: %Lu, %Lu\n", low, high, len, val1, val2);
#endif
		if (speed_diff(val1, val2) > 0) {
			__s64 middle;
			binary_search:
			middle = FIND_MIDDLE(low, high, len);
			if (middle > low && middle < high) {
				__u64 val3 = map_and_measure(middle, len);
#ifdef DBG
				__debug_printf("speed for %Lx: %Lu\n", middle, val3);
#endif
				if (speed_diff(val1, val3) > 0) {
					high = middle;
				} else {
					low = middle;
				}
				goto binary_search;
			}
#ifdef DBG
			__debug_printf("found limit: %Lx\n", high);
#endif
			return high;
		}
	}
#ifdef DBG
	__debug_printf("nothing found...\n");
#endif

	return 0;
}
