#include <SPAD/AC.H>
#include <SPAD/DEV_KRNL.H>
#include <SPAD/SYNC.H>
#include <SPAD/WQ.H>
#include <SPAD/IOCTL.H>
#include <SYS/PARAM.H>
#include <SYS/STAT.H>
#include <SYS/POLL.H>
#include <LIMITS.H>
#include <TIME.H>
#include <ARCH/BSF.H>

static void *lnte, *dlrq;

static int unload(void *p, void **release, const char * const argv[]);

static void *pipe_lookup(HANDLE *h, char *str, int open_flags);
static int pipe_close(HANDLE *h, IORQ *rq);
static IO_STUB pipe_read;
static IO_STUB pipe_write;
static IO_STUB pipe_ioctl;

#define PIPE_HASH_SIZE		1024
static WQ read_wait[PIPE_HASH_SIZE];
static WQ write_wait[PIPE_HASH_SIZE];

static const HANDLE_OPERATIONS pipe_operations = {
	SPL_X(SPL_DEV),
	KERNEL$NO_VSPACE_GET,
	KERNEL$NO_VSPACE_PUT,
	KERNEL$NO_VSPACE_MAP,
	KERNEL$NO_VSPACE_DMALOCK,
	KERNEL$NO_VSPACE_DMA64LOCK,
	KERNEL$NO_VSPACE_PHYSLOCK,
	KERNEL$NO_VSPACE_GET_PAGEIN_RQ,
	KERNEL$NO_VSPACE_GET_PAGE,
	KERNEL$NO_VSPACE_SWAP_OP,
	NULL,			/* clone */
	pipe_lookup,		/* lookup */
	NULL,			/* create */
	NULL,			/* delete */
	NULL,			/* rename */
	NULL,			/* lookup_io */
	NULL,			/* instantiate */
	NULL,			/* leave */
	NULL,			/* detach */
	NULL,			/* close */
	KERNEL$NO_OPERATION,	/* READ */
	KERNEL$NO_OPERATION,	/* WRITE */
	KERNEL$NO_OPERATION,	/* AREAD */
	KERNEL$NO_OPERATION,	/* AWRITE */
	KERNEL$NO_OPERATION,	/* IOCTL */
	KERNEL$NO_OPERATION,	/* BIO */
	KERNEL$NO_OPERATION,	/* PKTIO */
};

static const HANDLE_OPERATIONS read_operations = {
	SPL_X(SPL_DEV),
	KERNEL$NO_VSPACE_GET,
	KERNEL$NO_VSPACE_PUT,
	KERNEL$NO_VSPACE_MAP,
	KERNEL$NO_VSPACE_DMALOCK,
	KERNEL$NO_VSPACE_DMA64LOCK,
	KERNEL$NO_VSPACE_PHYSLOCK,
	KERNEL$NO_VSPACE_GET_PAGEIN_RQ,
	KERNEL$NO_VSPACE_GET_PAGE,
	KERNEL$NO_VSPACE_SWAP_OP,
	NULL,			/* clone */
	NULL,			/* lookup */
	NULL,			/* create */
	NULL,			/* delete */
	NULL,			/* rename */
	NULL,			/* lookup_io */
	NULL,			/* instantiate */
	NULL,			/* leave */
	NULL,			/* detach */
	pipe_close,		/* close */
	pipe_read,		/* READ */
	KERNEL$NO_OPERATION,	/* WRITE */
	KERNEL$NO_OPERATION,	/* AREAD */
	KERNEL$NO_OPERATION,	/* AWRITE */
	pipe_ioctl,		/* IOCTL */
	KERNEL$NO_OPERATION,	/* BIO */
	KERNEL$NO_OPERATION,	/* PKTIO */
};

static const HANDLE_OPERATIONS write_operations = {
	SPL_X(SPL_DEV),
	KERNEL$NO_VSPACE_GET,
	KERNEL$NO_VSPACE_PUT,
	KERNEL$NO_VSPACE_MAP,
	KERNEL$NO_VSPACE_DMALOCK,
	KERNEL$NO_VSPACE_DMA64LOCK,
	KERNEL$NO_VSPACE_PHYSLOCK,
	KERNEL$NO_VSPACE_GET_PAGEIN_RQ,
	KERNEL$NO_VSPACE_GET_PAGE,
	KERNEL$NO_VSPACE_SWAP_OP,
	NULL,			/* clone */
	NULL,			/* lookup */
	NULL,			/* create */
	NULL,			/* delete */
	NULL,			/* rename */
	NULL,			/* lookup_io */
	NULL,			/* instantiate */
	NULL,			/* leave */
	NULL,			/* detach */
	pipe_close,		/* close */
	KERNEL$NO_OPERATION,	/* READ */
	pipe_write,		/* WRITE */
	KERNEL$NO_OPERATION,	/* AREAD */
	KERNEL$NO_OPERATION,	/* AWRITE */
	pipe_ioctl,		/* IOCTL */
	KERNEL$NO_OPERATION,	/* BIO */
	KERNEL$NO_OPERATION,	/* PKTIO */
};

static void *pipe_lookup(HANDLE *h, char *str, int open_flags)
{
	if (str[0] == 'R' && __likely(!str[1])) h->op = &read_operations;
	else if (__likely(str[0] == 'W') && __likely(!str[1])) h->op = &write_operations;
	else return __ERR_PTR(-ENOENT);
	return NULL;
}

static FPIPE *MAP_PIPE_STRUCT(HANDLE *h, IORQ *rq, vspace_unmap_t **unmap, unsigned *hash)
{
	FFILE *f;
	FPIPE *a;
	PROC *p;
	VDESC vd;
	vspace_unmap_t *u;
	void *s;
	if (__unlikely(sizeof(void *) > sizeof(_u_off_t)))
		KERNEL$SUICIDE("MAP_PIPE_STRUCT: POINTER IS LARGER THAN OFFSET: %d > %d", (int)sizeof(void *), (int)sizeof(_u_off_t));
#if __DEBUG >= 1
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_DEV)))
		KERNEL$SUICIDE("MAP_PIPE_STRUCT AT SPL %08X", KERNEL$SPL);
	if (__unlikely(sizeof(FPIPE) + PIPE_SIZE > __PAGE_CLUSTER_SIZE))
		KERNEL$SUICIDE("MAP_PIPE_STRUCT: PIPE IS LARGER THAN PAGE: %d + %d", (int)sizeof(FPIPE), (int)PIPE_SIZE);
#endif
	f = KERNEL$MAP_FILE_STRUCT(h, rq, &u);
	if (__unlikely(!f)) return NULL;
	if (__unlikely(!(f->flags & _O_PIPE))) {
		u(f);
		rq->status = __PTR_ERR(-ENOOP);
		goto rete;
	}
	s = (void *)(unsigned long)f->pos;
	*hash = ((unsigned long)s >> __PAGE_CLUSTER_BITS) & (PIPE_HASH_SIZE - 1);
	u(f);
	if (__unlikely(!s)) {
		rq->status = __PTR_ERR(-EINVAL);
		goto rete;
	}
	p = h->file_addrspace;
	vd.ptr = (unsigned long)s;
	vd.len = sizeof(FPIPE) + PIPE_SIZE;
	vd.vspace = KERNEL$PROC_VSPACE(p);
	RAISE_SPL(SPL_VSPACE);
	a = vd.vspace->op->vspace_map(&vd, PF_RW, SPL_X(SPL_DEV), unmap);
	if (__likely(a != NULL) && __likely(!__IS_ERR(a))) return a;
	if (__likely(!a)) {
		DO_PAGEIN_NORET(rq, &vd, PF_RW);
		return NULL;
	}
	rq->status = __PTR_ERR(a);
	rete:
	CALL_AST(rq);
	return NULL;
}

#define get_write_place				\
do {						\
	s = p->start;				\
	e = p->end;				\
	if (s > e) l = s - e - 1;		\
	else l = s - e - 1 + PIPE_SIZE;		\
} while (0)

static DECL_IOCALL(pipe_read, SPL_DEV, SIORQ)
{
	unsigned csf;
	long l, s, e;
	FPIPE *p;
	vspace_unmap_t *funmap;
	VBUF vbuf;
	HANDLE *h = RQ->handle;
	unsigned hash;
	if (__unlikely(h->op != &read_operations))
		RETURN_IORQ_LSTAT(RQ, KERNEL$WAKE_READ);
	RQ->tmp1 = (unsigned long)KERNEL$WAKE_READ;
	TEST_LOCKUP_ENTRY(RQ, RETURN);
	SWITCH_PROC_ACCOUNT(h->name_addrspace, SPL_X(SPL_DEV));
	p = MAP_PIPE_STRUCT(h, (IORQ *)RQ, &funmap, &hash);
	if (__unlikely(!p)) RETURN;
	if (__unlikely(!RQ->v.len)) goto eof;
	csf = 0;
	next_part:
	s = p->start;
	e = p->end;
	if (__unlikely(s >= PIPE_SIZE) || __unlikely(e >= PIPE_SIZE)) goto inval;
	if (__unlikely(s == e)) {
		if (__likely(csf != 0) || __likely(RQ->progress != 0) || __unlikely(!p->writers) /*|| __unlikely(p->l_refcount < 2)*/) goto eof2;
		if (__unlikely(p->read_flags & O_NONBLOCK)) {
			funmap(p);
			RQ->status = -EWOULDBLOCK;
			RETURN_AST(RQ);
		}
		WQ_WAIT_F(&read_wait[hash], RQ);
		funmap(p);
		RETURN;
	}
	if (__likely(e > s)) l = e - s;
	else l = PIPE_SIZE - s;
	vbuf.ptr = (char *)(p + 1) + s;
	vbuf.len = l;
	vbuf.spl = SPL_X(SPL_DEV);
	RAISE_SPL(SPL_VSPACE);
	if (__unlikely(!(s = RQ->v.vspace->op->vspace_put(&RQ->v, &vbuf)))) {
		RQ->v.ptr -= csf;
		RQ->v.len += csf;
		if (__unlikely((int)(p->start -= csf) < 0)) p->start += PIPE_SIZE;
		goto pf;
	}
	csf += s;
	if (__unlikely((p->start += s) == PIPE_SIZE)) p->start = 0;
	SWITCH_PROC_ACCOUNT(RQ->handle->name_addrspace, SPL_X(SPL_DEV));
	/* note: we must not break here because pipe atomicity would be damaged.
		we copy at most one page anyway, so there's no need for it. */
	if (RQ->v.len) goto next_part;
	eof2:
	RQ->progress += csf;
	WQ_WAKE_ALL_PL(&write_wait[hash]);
	eof:
	funmap(p);
	if (__likely(RQ->progress >= 0)) RQ->status = RQ->progress;
	else RQ->status = -EOVERFLOW;
	RETURN_AST(RQ);

	inval:
	RQ->progress += csf;
	funmap(p);
	RQ->status = -EINVAL;
	RETURN_AST(RQ);

	pf:
	funmap(p);
	DO_PAGEIN(RQ, &RQ->v, PF_WRITE);
}

static DECL_IOCALL(pipe_write, SPL_DEV, SIORQ)
{
	unsigned csf;
	long l, s, e;
	FPIPE *p;
	vspace_unmap_t *funmap;
	VBUF vbuf;
	HANDLE *h = RQ->handle;
	unsigned hash;
	if (__unlikely(h->op != &write_operations))
		RETURN_IORQ_LSTAT(RQ, KERNEL$WAKE_WRITE);
	RQ->tmp1 = (unsigned long)KERNEL$WAKE_WRITE;
	TEST_LOCKUP_ENTRY(RQ, RETURN);
	SWITCH_PROC_ACCOUNT(h->name_addrspace, SPL_X(SPL_DEV));
	p = MAP_PIPE_STRUCT(h, (IORQ *)RQ, &funmap, &hash);
	if (__unlikely(!p)) RETURN;
	if (__unlikely(!RQ->v.len)) goto eof;
	if (__unlikely(!p->readers) /*|| __unlikely(p->l_refcount < 2)*/) goto epipe;
	csf = 0;
	next_part:
	get_write_place;
	if (__unlikely(s >= PIPE_SIZE) || __unlikely(e >= PIPE_SIZE)) goto inval;
	if (__unlikely(l < PIPE_BUF) && __unlikely(l < RQ->v.len)) {
		if (__likely(csf != 0) || __likely(RQ->progress != 0)) {
			if (__likely(l != 0)) goto w;
			if (__unlikely(!(p->write_flags & O_NONBLOCK))) {
	/* This is a hack incompatible with most other drivers but required by
	   a specification --- if the pipe is not O_NONBLOCK, do not return
	   partial number of bytes and wait until everything is written. */
				RQ->progress += csf;
				WQ_WAKE_ALL_PL(&read_wait[hash]);
				goto wai;
			}
			goto eof2;
		}
		if (__unlikely(p->write_flags & O_NONBLOCK)) {
			funmap(p);
			RQ->status = -EWOULDBLOCK;
			RETURN_AST(RQ);
		}
		wai:
		WQ_WAIT_F(&write_wait[hash], RQ);
		funmap(p);
		RETURN;
	}
	w:
	if (e + l > PIPE_SIZE) l = PIPE_SIZE - e;
	vbuf.ptr = (char *)(p + 1) + e;
	vbuf.len = l;
	vbuf.spl = SPL_X(SPL_DEV);
	RAISE_SPL(SPL_VSPACE);
	if (__unlikely(!(s = RQ->v.vspace->op->vspace_get(&RQ->v, &vbuf)))) {
		RQ->v.ptr -= csf;
		RQ->v.len += csf;
		if (__unlikely((int)(p->end -= csf) < 0)) p->end += PIPE_SIZE;
		goto pf;
	}
	csf += s;
	if (__unlikely((p->end += s) == PIPE_SIZE)) p->end = 0;
	SWITCH_PROC_ACCOUNT(RQ->handle->name_addrspace, SPL_X(SPL_DEV));
	/* note: we must not break here because pipe atomicity would be damaged.
		we copy at most one page anyway, so there's no need for it. */
	if (RQ->v.len) goto next_part;
	eof2:
	RQ->progress += csf;
	WQ_WAKE_ALL_PL(&read_wait[hash]);
	eof:
	funmap(p);
	if (__likely(RQ->progress >= 0)) RQ->status = RQ->progress;
	else RQ->status = -EOVERFLOW;
	RETURN_AST(RQ);

	epipe:
	/*__debug_printf("epipe (%p/%x), (%d,%d)\n", h->file_addrspace, h->file, p->readers, p->writers);*/
	funmap(p);
	RQ->status = -EPIPE;
	RETURN_AST(RQ);

	inval:
	RQ->progress += csf;
	funmap(p);
	RQ->status = -EINVAL;
	RETURN_AST(RQ);

	pf:
	funmap(p);
	DO_PAGEIN(RQ, &RQ->v, PF_READ);
}

static __finline__ void pipe_stat(struct stat *stat)
{
	stat->st_mode = 0600 | S_IFIFO;
	stat->st_nlink = 1;
	stat->st_blksize = 1 << __BSR(PIPE_SIZE);
	stat->st_atime = stat->st_mtime = stat->st_ctime = time(NULL);
}

static DECL_IOCALL(pipe_ioctl, SPL_DEV, IOCTLRQ)
{
	int u;
	int op;
	FPIPE *p;
	vspace_unmap_t *funmap;
	HANDLE *h = RQ->handle;
	unsigned hash;
	long s, e, l;
	if (h->op == &read_operations) op = 0;
	else if (__likely(h->op == &write_operations)) op = 1;
	else RETURN_IORQ_LSTAT(RQ, KERNEL$WAKE_IOCTL);
	RQ->tmp1 = (unsigned long)KERNEL$WAKE_IOCTL;
	TEST_LOCKUP_ENTRY(RQ, RETURN);
	SWITCH_PROC_ACCOUNT(h->name_addrspace, SPL_X(SPL_DEV));
	switch (RQ->ioctl) {
		case IOCTL_SELECT_READ: {
			if (__unlikely(op)) {
				RQ->status = 0;
				RETURN_AST(RQ);
			}
			p = MAP_PIPE_STRUCT(h, (IORQ *)RQ, &funmap, &hash);
			if (__unlikely(!p)) RETURN;
			u = p->start != p->end || !p->writers /*|| __unlikely(p->l_refcount < 2)*/;
			funmap(p);
			if (!u) {
				if (RQ->param) {
					WQ_WAIT_F(&read_wait[hash], RQ);
					RETURN;
				}
				RQ->status = -EWOULDBLOCK;
			} else RQ->status = 0;
			RETURN_AST(RQ);
		}
		case IOCTL_SELECT_WRITE: {
			if (__unlikely(!op)) {
				RQ->status = 0;
				RETURN_AST(RQ);
			}
			p = MAP_PIPE_STRUCT(h, (IORQ *)RQ, &funmap, &hash);
			if (__unlikely(!p)) RETURN;
			get_write_place;
			u = l >= PIPE_BUF || !p->readers /*|| __unlikely(p->l_refcount < 2)*/;
			funmap(p);
			if (!u) {
				if (RQ->param) {
					WQ_WAIT_F(&write_wait[hash], RQ);
					RETURN;
				}
				RQ->status = -EWOULDBLOCK;
			} else RQ->status = 0;
			RETURN_AST(RQ);
		}
		case IOCTL_POLL: {
			p = MAP_PIPE_STRUCT(h, (IORQ *)RQ, &funmap, &hash);
			if (__unlikely(!p)) RETURN;
			if (!op) {
				if (__likely(p->start != p->end)) u = POLLOUT | POLLIN;
				else if (__unlikely(!p->writers)) u = POLLOUT | POLLHUP;
				else u = POLLOUT;
			} else {
				if (__unlikely(!p->readers)) u = POLLIN | POLLERR;
				else {
					get_write_place;
					if (l >= PIPE_BUF) u = POLLIN | POLLOUT;
					else u = POLLIN;
				}
			}
			funmap(p);
			RQ->status = u;
			RETURN_AST(RQ);
		}
		case IOCTL_NREAD: {
			long l;
			if (__unlikely(op)) {
				RQ->status = 0;
				RETURN_AST(RQ);
			}
			p = MAP_PIPE_STRUCT(h, (IORQ *)RQ, &funmap, (unsigned *)(void *)&KERNEL$LIST_END);
			if (__unlikely(!p)) RETURN;
			if (p->end >= p->start) l = p->end - p->start;
			else l = PIPE_SIZE + p->end - p->start;
			funmap(p);
			if ((unsigned long)l >= PIPE_SIZE) {
				RQ->status = -EINVAL;
				RETURN_AST(RQ);
			}
			RQ->status = l;
			RETURN_AST(RQ);
		}
		case IOCTL_DUP: {
			p = MAP_PIPE_STRUCT(h, (IORQ *)RQ, &funmap, (unsigned *)(void *)&KERNEL$LIST_END);
			if (__unlikely(!p)) RETURN;
			if (!op) {
				p->readers++;
	/*__debug_printf("dup read %x: (%p/%x), (%d,%d) %p/%d\n", h->handle_num, h->file_addrspace, h->file, p->readers, p->writers, h->name_addrspace, ((PROC *)h->name_addrspace)->depth);*/
			} else {
				p->writers++;
	/*__debug_printf("dup write %x: (%p/%x), (%d,%d) %p/%d\n", h->handle_num, h->file_addrspace, h->file, p->readers, p->writers, h->name_addrspace, ((PROC *)h->name_addrspace)->depth);*/
			}
			funmap(p);
			RQ->status = 0;
			RETURN_AST(RQ);
		}
		case IOCTL_STAT: {
			int r;
			static struct stat stat;
			memset(&stat, 0, sizeof stat);
			pipe_stat(&stat);
			if (__likely((r = KERNEL$PUT_IOCTL_STRUCT(RQ, &stat, sizeof stat)) <= 0)) {
				RQ->status = r;
				RETURN_AST(RQ);
			}
			DO_PAGEIN(RQ, &RQ->v, PF_WRITE);
		}
		case IOCTL_FCNTL_GETFL: {
			p = MAP_PIPE_STRUCT(h, (IORQ *)RQ, &funmap, (unsigned *)(void *)&KERNEL$LIST_END);
			if (__unlikely(!p)) RETURN;
			RQ->status = (RQ->param & ~O_NONBLOCK) | ((!op ? p->read_flags : p->write_flags) & O_NONBLOCK);
			funmap(p);
			RETURN_AST(RQ);
		}
		case IOCTL_FCNTL_SETFL: {
			p = MAP_PIPE_STRUCT(h, (IORQ *)RQ, &funmap, (unsigned *)(void *)&KERNEL$LIST_END);
			if (__unlikely(!p)) RETURN;
			if (!op) p->read_flags = RQ->param & O_NONBLOCK;
			else p->write_flags = RQ->param & O_NONBLOCK;
			funmap(p);
			RQ->status = 0;
			RETURN_AST(RQ);
		}
		default: {
			RQ->status = -ENOOP;
			RETURN_AST(RQ);
		}
	}
}

static int pipe_close(HANDLE *h, IORQ *rq)
{
	FPIPE *p;
	vspace_unmap_t *funmap;
	unsigned hash;
	p = MAP_PIPE_STRUCT(h, rq, &funmap, &hash);
	if (__unlikely(!p)) return 1;
	if (h->op == &read_operations) {
		/*__debug_printf("close read %x: (%p/%x), (%d,%d), %p/%d\n", h->handle_num, h->file_addrspace, h->file, p->readers, p->writers, h->name_addrspace, ((PROC *)h->name_addrspace)->depth);
		if (!p->readers) __debug_printf("!!!\n");*/
		if (__likely(p->readers)) p->readers--;
		if (__likely(!p->readers)) WQ_WAKE_ALL_PL(&write_wait[hash]);
	} else {
		/*__debug_printf("close write %x: (%p/%x), (%d,%d) %p/%d\n", h->handle_num, h->file_addrspace, h->file, p->readers, p->writers, h->name_addrspace, ((PROC *)h->name_addrspace)->depth);
		if (!p->writers) __debug_printf("!!!\n");*/
		if (__likely(p->writers)) p->writers--;
		if (__likely(!p->writers)) WQ_WAKE_ALL_PL(&read_wait[hash]);
	}
	funmap(p);
	return 0;
}

static void init_root(HANDLE *h, void *data)
{
	h->op = &pipe_operations;
}

#define DEVICE_NAME	"SYS$PIPE"

int main(int argc, char *argv[])
{
	int i;
	int r;
	if (argc > 1) {
		_snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "PIPE: SYNTAX ERROR");
		return -EBADSYN;
	}
	for (i = 0; i < PIPE_HASH_SIZE; i++) {
		WQ_INIT(&read_wait[i], "PIPE$READ_WAIT");
		WQ_INIT(&write_wait[i], "PIPE$WRITE_WAIT");
	}
	r = KERNEL$REGISTER_DEVICE(DEVICE_NAME, "PIPE.SYS", LNTE_PUBLIC, NULL, init_root, NULL, NULL, NULL, unload, &lnte, NULL);
	if (r < 0) {
		if (r != -EINTR) _snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, DEVICE_NAME ": COULD NOT REGISTER DEVICE %s", strerror(-r));
		return r;
	}
	strcpy(KERNEL$ERROR_MSG(), DEVICE_NAME);
	dlrq = KERNEL$TSR_IMAGE();
	return 0;
}

static int unload(void *p, void **release, const char * const argv[])
{
	int r, i;
	if (__unlikely(r = KERNEL$DEVICE_UNLOAD(lnte, argv))) return r;
	for (i = 0; i < PIPE_HASH_SIZE; i++) {
		WQ_WAKE_ALL(&read_wait[i]);
		WQ_WAKE_ALL(&write_wait[i]);
	}
	*release = dlrq;
	return 0;
}

