#include <SPAD/SYNC.H>
#include <SPAD/AC.H>
#include <SPAD/LIST.H>
#include <SPAD/ALLOC.H>
#include <SPAD/LIBC.H>
#include <SPAD/DEV_KRNL.H>
#include <SPAD/SLAB.H>
#include <SPAD/IOCTL.H>
#include <SPAD/TIMER.H>
#include <STRING.H>
#include <UNISTD.H>
#include <VALUES.H>
#include <SYS/STAT.H>
#include <SPAD/BIO_KRNL.H>

#include <KERNEL/GETHSIZE.I>

typedef struct {
	int h;
	int pad[sizeof(__sec_t) == 2 * sizeof(int)];
	__sec_t begin_sec;
} RAID_DEVICE;

extern char check_sizeof[sizeof(RAID_DEVICE) >= sizeof(char *) ? 1 : -1];

typedef struct {
	struct __slhead biorq;
	WQ biorq_wait;
	char ro;
	int read_request_size;
	int write_request_size;
	int physical_block_size;
	void *lnte;
	void *dlrq;
	char dev_name[__MAX_STR_LEN];
	void *dummy_biorq;
	PARTITIONS *p;
	int n_devices;
	RAID_DEVICE devices[1];
} RAID;

typedef struct {
	BIORQ b;
	RAID *raid;
	BIORQ *caller;
	int nsec;
	unsigned idx;
	BIODESC desc;
} RAID_BIORQ;

static int raid_unload(void *p, void **release, char *argv[]);

extern IO_STUB RAID_FN;

static __const__ HANDLE_OPERATIONS raid_operations = {
	SPL_X(SPL_RAID),
	KERNEL$NO_VSPACE_GET,
	KERNEL$NO_VSPACE_PUT,
	KERNEL$NO_VSPACE_MAP,
	KERNEL$NO_VSPACE_DMALOCK,
	KERNEL$NO_VSPACE_DMA64LOCK,
	KERNEL$NO_VSPACE_PHYSLOCK,
	KERNEL$NO_VSPACE_GET_PAGEIN_RQ,
	KERNEL$NO_VSPACE_GET_PAGE,
	KERNEL$NO_VSPACE_SWAP_OP,
	NULL,			/* clone */
	BIO$LOOKUP_PARTITION,	/* lookup */
	NULL,			/* create */
	NULL,			/* delete */
	NULL,			/* rename */
	NULL,			/* lookup_io */
	BIO$INSTANTIATE_PARTITION,	/* instantiate */
	NULL,			/* leave */
	BIO$DETACH,		/* detach */
	NULL,			/* open */
	NULL,			/* close */
	BIO$READ,		/* READ */
	BIO$WRITE,		/* WRITE */
	BIO$AREAD,		/* AREAD */
	BIO$AWRITE,		/* AWRITE */
	BIO$IOCTL,		/* IOCTL */
	RAID_FN,		/* BIO */
	KERNEL$NO_OPERATION,	/* PKTIO */
};

static void RAID_SPECIAL(RAID_BIORQ *rq);
static void RAID_SYNC(RAID_BIORQ *rq);
static void RAID_SPLIT(RAID_BIORQ *rq);
static void RAID_SPLIT_DONE(RAID_BIORQ *rq);
extern AST_STUB RAID_DONE;
extern AST_STUB RAID_SYNC_DONE;

DECL_IOCALL(RAID_FN, SPL_RAID, BIORQ)
{
	unsigned idx, idx_last;
	RAID *raid;
	RAID_BIORQ *rq;
	HANDLE *h = RQ->handle;
	if (__unlikely(h->op != &raid_operations)) RETURN_IORQ_LSTAT(RQ, KERNEL$WAKE_BIO);
	SWITCH_PROC_ACCOUNT(RQ->proc, SPL_X(SPL_RAID));
	BIO_TRANSLATE_PARTITION;
	raid = ((PARTITION *)h->fnode)->dev;
	rq = __slalloc(&raid->biorq);
	if (__unlikely(!rq)) {
		WQ_WAIT(&raid->biorq_wait, RQ, KERNEL$WAKE_BIO);
		RETURN;
	}
	rq->b.fn = RAID_DONE;
	rq->caller = RQ;
	rq->b.flags = RQ->flags;
	if (__unlikely(RQ->flags & (raid->ro | BIO_FLUSH | ~BIO_FLAG_MASK))) {
		RAID_SPECIAL(rq);
		RETURN;
	}
	rq->b.nsec = rq->nsec = RQ->nsec;
	rq->b.desc = RQ->desc;
	rq->b.proc = RQ->proc;
	rq->b.fault_sec = -1;
	idx = 0;
	idx_last = raid->n_devices;
	while (idx_last != idx + 1) {
		unsigned idx_half = (idx + idx_last) >> 1;
		if (raid->devices[idx_half].begin_sec <= RQ->sec) idx = idx_half;
		else idx_last = idx_half;
	}
	rq->idx = idx;
	rq->b.h = raid->devices[idx].h;
	rq->b.sec = RQ->sec - raid->devices[idx].begin_sec;
	if (__unlikely(rq->b.sec + rq->b.nsec > raid->devices[idx + 1].begin_sec)) RAID_SPLIT(rq);
	RETURN_IORQ_CANCELABLE(&rq->b, KERNEL$BIO, RQ);

	out:
	RQ->status = -ERANGE;
	RETURN_AST(RQ);
}

DECL_AST(RAID_DONE, SPL_RAID, BIORQ)
{
	RAID_BIORQ *rq = GET_STRUCT(RQ, RAID_BIORQ, b);
	RAID *raid = rq->raid;
	BIORQ *caller = rq->caller;
	IO_DISABLE_CHAIN_CANCEL(SPL_RAID, caller);
	if (__unlikely(rq->b.status < 0)) {
		if (rq->b.fault_sec != -1) caller->fault_sec = rq->b.fault_sec + raid->devices[rq->idx].begin_sec;
		else caller->fault_sec = -1;
		goto ret_status;
	}
	if (__unlikely(caller->nsec != rq->nsec)) {
		RAID_SPLIT_DONE(rq);
		RETURN;
	}
	ret_status:
	caller->status = rq->b.status;
	__slfree(rq);
	WQ_WAKE_ALL(&raid->biorq_wait);
	RETURN_AST(caller);
}

static void RAID_SPECIAL(RAID_BIORQ *rq)
{
	RAID *raid = rq->raid;
	BIORQ *caller = rq->caller;
	if (__unlikely(caller->flags & ~BIO_FLAG_MASK))
		KERNEL$SUICIDE("RAID_SPECIAL: BAD REQUEST, FLAGS %08X", caller->flags);
	if (__likely(caller->flags & BIO_FLUSH)) {
		if (__unlikely(raid->ro)) goto eacces;
		caller->status = 0;
		caller->tmp1 = 0;	/* index of next volume to sync */
		caller->tmp2 = 0;	/* number of pending ios */
		caller->tmp3 = 0;	/* index volume with first error */
		RAID_SYNC(rq);
		return;
	}
	/*if (__unlikely(caller->flags & BIO_WRITE))*/ {
		eacces:
		caller->status = -EACCES;
		CALL_AST(caller);
		__slow_slfree(rq);
		WQ_WAKE_ALL(&raid->biorq_wait);
		return;
	}
}

static void RAID_SYNC(RAID_BIORQ *rq)
{
	RAID *raid = rq->raid;
	BIORQ *caller = rq->caller;
	if (caller->tmp1 == raid->n_devices) {
		__slfree(rq);
		WQ_WAKE_ALL(&raid->biorq_wait);
		if (!caller->tmp2) CALL_AST(caller);
		return;
	}
	again:
	caller->tmp2++;
	rq->b.h = raid->devices[rq->idx = caller->tmp1++].h;
	rq->b.fn = RAID_SYNC_DONE;
	rq->b.sec = 0;
	rq->b.nsec = 1;
	rq->b.flags = BIO_FLUSH;
	rq->b.proc = caller->proc;
	rq->b.fault_sec = -1;
	CALL_IORQ(&rq->b, KERNEL$BIO);
	if (caller->tmp1 == raid->n_devices) return;
	rq = __slalloc(&raid->biorq);
	if (__unlikely(!rq)) return;
	rq->caller = caller;
	goto again;
}

DECL_AST(RAID_SYNC_DONE, SPL_RAID, BIORQ)
{
	RAID_BIORQ *rq = GET_STRUCT(RQ, RAID_BIORQ, b);
	BIORQ *caller = rq->caller;
	if (__unlikely(rq->b.status < 0)) {
		if (rq->idx <= caller->tmp3) {
			caller->tmp3 = rq->idx;
			caller->status = rq->b.status;
			if (rq->b.fault_sec != -1) caller->fault_sec = rq->b.fault_sec + rq->raid->devices[rq->idx].begin_sec;
			else caller->fault_sec = -1;
		}
	}
	caller->tmp2--;
	RAID_SYNC(rq);
	RETURN;
}

static void RAID_SPLIT(RAID_BIORQ *rq)
{
	RAID *raid = rq->raid;
	unsigned lsec;
	if (__unlikely(rq->idx == raid->n_devices - 1)) return;
	lsec = rq->b.sec + rq->b.nsec - raid->devices[rq->idx + 1].begin_sec;
	if (lsec > rq->b.desc->v.len >> BIO_SECTOR_SIZE_BITS) {
		lsec = rq->b.desc->v.len >> BIO_SECTOR_SIZE_BITS;
	}
	rq->b.nsec = rq->nsec = lsec;
	rq->desc.v.vspace = rq->b.desc->v.vspace;
	rq->desc.v.ptr = rq->b.desc->v.ptr;
	rq->desc.v.len = lsec << BIO_SECTOR_SIZE_BITS;
	rq->desc.next = NULL;
	rq->b.desc = &rq->desc;
}

static void RAID_SPLIT_DONE(RAID_BIORQ *rq)
{
	RAID *raid;
	BIORQ *caller = rq->caller;
	caller->sec += rq->nsec;
	caller->nsec -= rq->nsec;
	caller->desc->v.ptr += rq->nsec << BIO_SECTOR_SIZE_BITS;
	if (!(caller->desc->v.len -= rq->nsec << BIO_SECTOR_SIZE_BITS))
		caller->desc = caller->desc->next;
	raid = rq->raid;
	__slfree(rq);
	WQ_WAKE_ALL(&raid->biorq_wait);
	CALL_IORQ_LSTAT(caller, KERNEL$WAKE_BIO);
}

static int raid_ioctl(IOCTLRQ *rq, PARTITION *pa, IORQ *rq_to_queue)
{
	RAID *raid = pa->dev;
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV)))
		KERNEL$SUICIDE("raid_ioctl AT SPL %08X", KERNEL$SPL);
#endif
	switch (rq->ioctl) {
		case IOCTL_BIO_GET_OPTIMAL_REQUEST_SIZE:
			switch (rq->param) {
				case PARAM_BIO_GET_OPTIMAL_REQUEST_SIZE_READ:
					rq->status = raid->read_request_size;
				case PARAM_BIO_GET_OPTIMAL_REQUEST_SIZE_WRITE:
					rq->status = raid->write_request_size;
				default:
					rq->status = -ENOOP;
			}
			return 0;
		case IOCTL_BIO_PHYSICAL_BLOCKSIZE:
			rq->status = raid->physical_block_size;
			return 0;
		default:
			return -1;
	}
}

static void raid_ctor(void *g, void *o)
{
	RAID *raid = g;
	RAID_BIORQ *rq = o;
	rq->raid = raid;
}

static void raid_init_root(HANDLE *h, void *f_)
{
	RAID *raid = f_;
	BIO$ROOT_PARTITION(h, raid->p);
	h->op = &raid_operations;
}

int main(int argc, char *argv[])
{
	struct __param_table params[] = {
		"RO", __PARAM_BOOL, ~0, BIO_WRITE, NULL,
		"BLOCKSIZE", __PARAM_UNSIGNED_INT, 0, MAXUINT, NULL,
		NULL, 0, 0, 0, NULL,
	};
	char **arg = argv;
	int state = 0;
	char *val;
	int r;
	int i, j;
	__sec_t pos;
	RAID *raid;
	int n_devices;
	char *dev_name;
	MALLOC_REQUEST mrq;
	DEVICE_REQUEST drq;
	unsigned blocksize = BIO_SECTOR_SIZE;
	int ro = 0;
	params[0].__p = &ro;
	params[1].__p = &blocksize;
	mrq.size = sizeof(RAID);
	SYNC_IO_CANCELABLE(&mrq, KERNEL$UNIVERSAL_MALLOC);
	if (__unlikely(mrq.status < 0)) {
		r = mrq.status;
		if (r != -EINTR) _snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "RAIDLN: OUT OF MEMORY");
		goto ret0;
	}
	raid = mrq.ptr;
	n_devices = 0;
	while (__parse_params(&arg, &state, params, NULL, NULL, &val)) {
		if (__unlikely(!val)) goto bads;
		mrq.size = sizeof(RAID) + n_devices * sizeof(RAID_DEVICE);
		SYNC_IO_CANCELABLE(&mrq, KERNEL$UNIVERSAL_MALLOC);
		if (__unlikely(mrq.status < 0)) {
			r = mrq.status;
			if (r != -EINTR) _snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "RAIDLN: OUT OF MEMORY");
			goto ret1;
		}
		memcpy(mrq.ptr, raid, sizeof(RAID) + n_devices * sizeof(RAID_DEVICE));
		KERNEL$UNIVERSAL_FREE(raid);
		raid = mrq.ptr;
		*(char **)&raid->devices[n_devices] = val;
		n_devices++;
	}
	if (__unlikely(n_devices < 2) || __unlikely(blocksize < BIO_SECTOR_SIZE) || __unlikely(blocksize & (blocksize - 1))) {
		bads:
		_snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "RAIDLN: SYNTAX ERROR");
		r = -EBADSYN;
		goto ret1;
	}
	dev_name = *(char **)&raid->devices[--n_devices];
	if (__unlikely(strlen(dev_name) >= sizeof(raid->dev_name))) goto bads;
	strcpy(raid->dev_name, dev_name);
	raid->n_devices = n_devices;
	raid->ro = ro;
	pos = 0;
	raid->read_request_size = MAXINT;
	raid->write_request_size = MAXINT;
	raid->physical_block_size = BIO_SECTOR_SIZE;
	for (i = 0; i < n_devices; i++) {
		__u64 size;
		OPENRQ o;
		IOCTLRQ ioctlrq;
		CHHRQ ch;

		o.path = *(char **)&raid->devices[i];
		o.flags = (!ro ? O_RDONLY : O_RDWR) | O_DIRECT;
		o.cwd = KERNEL$CWD();
		SYNC_IO_CANCELABLE(&o, KERNEL$OPEN);
		if (__unlikely(o.status < 0)) {
			_snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "RAIDLN: CAN'T OPEN %s: %s", o.path, strerror(-o.status));
			r = o.status;
			cl_2:
			for (j = 0; j < i; j++) close(raid->devices[j].h);
			goto ret1;
		}

		ch.h = o.status;
		ch.option = "DIRECT";
		ch.value = "";
		SYNC_IO(&ch, KERNEL$CHANGE_HANDLE);

		ioctlrq.h = o.status;
		ioctlrq.ioctl = IOCTL_BIO_GET_OPTIMAL_REQUEST_SIZE;
		ioctlrq.param = PARAM_BIO_GET_OPTIMAL_REQUEST_SIZE_READ;
		ioctlrq.v.ptr = 0;
		ioctlrq.v.len = 0;
		ioctlrq.v.vspace = &KERNEL$VIRTUAL;
		SYNC_IO(&ioctlrq, KERNEL$IOCTL);
		if (ioctlrq.status > 0 && ioctlrq.status < MAXINT && ioctlrq.status < raid->read_request_size) raid->read_request_size = ioctlrq.status;

		ioctlrq.h = o.status;
		ioctlrq.ioctl = IOCTL_BIO_GET_OPTIMAL_REQUEST_SIZE;
		ioctlrq.param = PARAM_BIO_GET_OPTIMAL_REQUEST_SIZE_WRITE;
		ioctlrq.v.ptr = 0;
		ioctlrq.v.len = 0;
		ioctlrq.v.vspace = &KERNEL$VIRTUAL;
		SYNC_IO(&ioctlrq, KERNEL$IOCTL);
		if (ioctlrq.status > 0 && ioctlrq.status < MAXINT && ioctlrq.status < raid->write_request_size) raid->write_request_size = ioctlrq.status;

		ioctlrq.h = o.status;
		ioctlrq.ioctl = IOCTL_BIO_PHYSICAL_BLOCKSIZE;
		ioctlrq.param = 0;
		ioctlrq.v.ptr = 0;
		ioctlrq.v.len = 0;
		ioctlrq.v.vspace = &KERNEL$VIRTUAL;
		SYNC_IO(&ioctlrq, KERNEL$IOCTL);
		if (ioctlrq.status > 0 && ioctlrq.status < MAXINT && ioctlrq.status > raid->physical_block_size) raid->physical_block_size = ioctlrq.status;

		raid->devices[i].h = o.status;
		size = (gethsize(raid->devices[i].h) & ~(__u64)(blocksize - 1)) >> BIO_SECTOR_SIZE_BITS;
		if (__unlikely(!size)) {
			_snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "RAIDLN: DEVICE %s HAS NO SECTORS", o.path);
			r = -EINVAL;
			cl_1:
			close(raid->devices[i].h);
			goto cl_2;
		}
		if (__unlikely(size != (__sec_t)size)) {
			_snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "RAIDLN: DEVICE %s IS TOO LARGE (%"__64_format"d)", o.path, size);
			r = -EFBIG;
			goto cl_1;
		}
		if (__unlikely(pos + (__sec_t)size < pos)) {
			_snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "RAIDLN: THE WHOLE RAID IS TOO LARGE");
			r = -EFBIG;
			goto cl_1;
		}
		raid->devices[i].begin_sec = pos;
		pos += (__sec_t)size;
	}
	if (__unlikely(raid->read_request_size == MAXINT)) raid->read_request_size = 0;
	if (__unlikely(raid->write_request_size == MAXINT)) raid->write_request_size = 0;
	raid->devices[n_devices].begin_sec = pos;
	if (__unlikely((r = BIO$ALLOC_PARTITIONS(&raid->p, raid, pos, raid_ioctl, raid->dev_name)) < 0)) {
		if (r != -EINTR) _snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "RAIDLN: OUT OF MEMORY");
		goto ret2;
	}
	KERNEL$SLAB_INIT(&raid->biorq, sizeof(RAID_BIORQ), sizeof(__sec_t), VM_TYPE_WIRED_MAPPED, raid_ctor, raid, NULL, "RAIDLN$BIORQ");
	if (__unlikely(r = KERNEL$SLAB_RESERVE(&raid->biorq, 2))) {
		if (r != -EINTR) _snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "RAIDLN: OUT OF MEMORY");
		goto ret3;
	}
	raid->dummy_biorq = __slalloc(&raid->biorq);
	if (__unlikely(!raid->dummy_biorq))
		KERNEL$SUICIDE("RAIDLN: CAN'T ALLOCATE DUMMY BIORQ");
	WQ_INIT(&raid->biorq_wait, "RAIDLN$BIORQ_WAIT");
	drq.name = raid->dev_name;
	drq.driver_name = "RAIDLN.SYS";
	drq.flags = 0;
	drq.init_root_handle = raid_init_root;
	drq.dev_ptr = raid;
	drq.dcall = NULL;
	drq.dcall_type = NULL;
	drq.dctl = NULL;
	drq.unload = raid_unload;
	SYNC_IO_CANCELABLE(&drq, KERNEL$REGISTER_DEVICE);
	if (__unlikely(drq.status < 0)) {
		r = drq.status;
		if (drq.status != -EINTR) _snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "RAIDLN: CAN'T REGISTER DEVICE %s", drq.name);
		goto ret4;
	}
	raid->lnte = drq.lnte;
	raid->dlrq = KERNEL$TSR_IMAGE();
	return 0;
	ret4:
	__slow_slfree(raid->dummy_biorq);
	ret3:
	KERNEL$SLAB_DESTROY(&raid->biorq);
	BIO$FREE_PARTITIONS(raid->p);
	ret2:
	for (i = 0; i < raid->n_devices; i++) close(raid->devices[i].h);
	ret1:
	KERNEL$UNIVERSAL_FREE(raid);
	ret0:
	return r;
}

static int raid_unload(void *p, void **release, char *argv[])
{
	int r;
	int i;
	RAID *raid = p;
	if (__unlikely(r = KERNEL$DEVICE_UNLOAD(raid->lnte, argv))) return r;
	RAISE_SPL(SPL_RAID);
	__slow_slfree(raid->dummy_biorq);
	while (!KERNEL$SLAB_EMPTY(&raid->biorq)) KERNEL$SLEEP(1);
	LOWER_SPL(SPL_ZERO);
	WQ_WAKE_ALL(&raid->biorq_wait);
	KERNEL$SLAB_DESTROY(&raid->biorq);
	for (i = 0; i < raid->n_devices; i++) close(raid->devices[i].h);
	*release = raid->dlrq;
	KERNEL$UNIVERSAL_FREE(raid);
	return 0;
}
