#include <SPAD/LIBC.H>
#include <STRING.H>
#include <STDLIB.H>
#include <SPAD/SYNC.H>
#include <SPAD/PCI.H>
#include <SPAD/DEV.H>
#include <SPAD/SYNC.H>
#include <SPAD/DEV_KRNL.H>
#include <SPAD/ALLOC.H>
#include <SPAD/SYSLOG.H>
#include <SPAD/RANDOM.H>
#include <ARCH/BSF.H>
#include <ARCH/MOV.H>
#include <SPAD/TIMER.H>
#include <ARCH/IO.H>
#include <SPAD/IRQ.H>
#include <VALUES.H>
#include <SPAD/BIO_KRNL.H>

#include <SPAD/ATA.H>
#include "K2REG.H"
#include "ACTRLDMA.H"

#define USE_QDMA

#ifdef USE_QDMA
#define QDMA_ENTRIES		4
#define QUEUE_DEPTH		2
#else
#define QUEUE_DEPTH		1
#endif

#define RESET_TIMEOUT		TIMEOUT_JIFFIES(JIFFIES_PER_SECOND / 50 + 1)

#define FLAGS_8PORT		0x00000001
#define FLAGS_NO_ATAPI_DMA	0x00000002
#define FLAGS_BAR3		0x00000004
#define FLAGS_EXTENDED_REGS	0x00000080 /* unknown if valid for others */
#define FLAGS_QDMA		0x00000100 /* probably valid for previous but I can't test it */

#define SVW4	(FLAGS_NO_ATAPI_DMA)
#define SVW8	(FLAGS_8PORT | FLAGS_NO_ATAPI_DMA)
#define SVW4X	(FLAGS_NO_ATAPI_DMA | FLAGS_EXTENDED_REGS | FLAGS_QDMA)
#define SVW42	(FLAGS_BAR3 | FLAGS_EXTENDED_REGS | FLAGS_QDMA)
#define SVW43	(FLAGS_EXTENDED_REGS | FLAGS_QDMA)

static const struct pci_id_s pci_cards[] = {
	{ 0x1166, 0x0240, PCI_ANY_ID, PCI_ANY_ID, 0, 0, "K2 SATA", SVW4 },
	{ 0x1166, 0x0241, PCI_ANY_ID, PCI_ANY_ID, 0, 0, "SERVERWORKS RC4000", SVW8 },
	{ 0x1166, 0x0242, PCI_ANY_ID, PCI_ANY_ID, 0, 0, "SERVERWORKS BC4000", SVW4 },
	{ 0x1166, 0x024a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, "SERVERWORKS HT1000 SATA", SVW4X },
	{ 0x1166, 0x024b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, "SERVERWORKS HT1000 SATA", SVW4X },
	{ 0x1166, 0x024c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, "SERVERWORKS HT1000 SATA", SVW4X },
	{ 0x1166, 0x0410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, "SERVERWORKS HT1100 SATA", SVW42 },
	{ 0x1166, 0x0411, PCI_ANY_ID, PCI_ANY_ID, 0, 0, "SERVERWORKS HT1100 SATA", SVW43 },
	{ 0, }
};

#define K2_AREA_SIZE(n)	(QDMA_ENTRIES * (sizeof(K2_CMD) + sizeof(K2_STATUS)) * (n))
#define K2_AREA_ALIGN	0x40

typedef struct __actrl ACTRL;
typedef struct __adevice ADEVICE;

struct __adevice {
	ATA_ATTACH_PARAM *attached;
	unsigned dev_flags;
};

#define APORT_DISABLED		0
#define APORT_QDMA_ENABLED	0x01
#define APORT_LOCK		0x80

struct __aport {
	__u8 *bar;
	unsigned char aport_flags;
	ATARQ *rq_list[QUEUE_DEPTH];
	ASGLIST *sg_list[QUEUE_DEPTH];
	__u32 sg_list_dev[QUEUE_DEPTH];
#ifdef USE_QDMA
	__u8 qpi;
	__u8 qpi_list;
	__u8 q_submitted;
	K2_CMD *qdma;
	K2_STATUS *status;
#endif
	TIMER timeout;
	ACTRL *ctrl;

#ifdef USE_QDMA
	__u32 qdma_virtual;
	__u32 status_virtual;
#endif

	unsigned pio_n_sect;
	unsigned pio_sg_pos;
	u_jiffies_lo_t pio_timeout;

	ADEVICE device[1];
	unsigned char n;

	unsigned atapi_length;
	SCSIRQ *scsirq;
	ATARQ atapi_placeholder;
	__u8 error_register;
	SCSIRQ *orig_scsirq;
	DECL_SCSIRQ(6) sense_rq;
	BIODESC sense_desc;
};

struct __actrl {
	__u8 *bar;
	AST irq_ast;
	IRQ_HANDLE *irq_ctrl;
	RANDOM_CTX random_ctx;

	unsigned flags;
	unsigned char n_ports;

	int dcall_users;

#ifdef USE_QDMA
	__u8 *k2_area;
	vspace_dmaunlock_t *k2_area_dmaunlock;
	__u32 k2_area_virtual;
#endif

	char dev_name[__MAX_STR_LEN];
	void *lnte, *dlrq;
	pci_id_t id;
	APORT port[1];
};

#define DEV_F_MASKIRQ		0x01000000U
#define DEV_F_IO32		0
#define DEV_F_ATAPI		0x04000000U
#define DEV_F_ATAPI_DMADIR	0x08000000U
#define DEV_F_ATAPI_DRQ_INT	0x10000000U
#define DEV_F_ATAPI_LONG_DRQ	0x20000000U
#define DEV_F_ATAPI_CMD_16	0x40000000U
#define DEV_F_ATAPI_DMA	0x80000000U

#define K2_BAR_FLAGS(flags)		((flags) & FLAGS_BAR3 ? 3 : 5)
#define K2_BAR(a)			K2_BAR_FLAGS((a)->flags)
#define K2_REGSPACE(a)			((a)->flags & FLAGS_EXTENDED_REGS ? EXTENDED_SPACE : (a)->n_ports * PORT_SPACE)
#define PORT_READ_8(a, idx)		mmio_inb((a)->bar + (idx))
#define PORT_WRITE_8(a, idx, val)	mmio_outb((a)->bar + (idx), (val))
#define PORT_READ_16(a, idx)		mmio_inw((a)->bar + (idx))
#define PORT_WRITE_16(a, idx, val)	mmio_outw((a)->bar + (idx), (val))
#define PORT_READ_32(a, idx)		mmio_inl((a)->bar + (idx))
#define PORT_WRITE_32(a, idx, val)	mmio_outl((a)->bar + (idx), (val))

#define ATA_OUT_DATA16(p, v)		PORT_WRITE_16(p, PORT_DATA, v)
#define ATA_OUT_FEATURES(p, v)		PORT_WRITE_16(p, PORT_FEATURES, v)
#define ATA_OUT_COUNT(p, v)		PORT_WRITE_16(p, PORT_COUNT, v)
#define ATA_OUT_LBA_L(p, v)		PORT_WRITE_16(p, PORT_LBA_L, v)
#define ATA_OUT_LBA_M(p, v)		PORT_WRITE_16(p, PORT_LBA_M, v)
#define ATA_OUT_LBA_H(p, v)		PORT_WRITE_16(p, PORT_LBA_H, v)
#define ATA_OUT_DRIVE(p, v)		PORT_WRITE_8(p, PORT_DRIVE, v)
#define ATA_OUT_CMD(p, v)		PORT_WRITE_8(p, PORT_CMD, v)
#define ATA_IN_DATA16(p)		PORT_READ_16(p, PORT_DATA)
#define ATA_IN_ERROR(p)			PORT_READ_8(p, PORT_ERROR)
#define ATA_IN_FEATURES(p)		PORT_READ_8(p, PORT_FEATURES)
#define ATA_IN_COUNT(p)			PORT_READ_8(p, PORT_COUNT)
#define ATA_IN_LBA_L(p)			PORT_READ_8(p, PORT_LBA_L)
#define ATA_IN_LBA_M(p)			PORT_READ_8(p, PORT_LBA_M)
#define ATA_IN_LBA_H(p)			PORT_READ_8(p, PORT_LBA_H)
#define ATA_IN_DRIVE(p)			PORT_READ_8(p, PORT_DRIVE)
#define ATA_IN_STATUS(p)		PORT_READ_8(p, PORT_STATUS)

#define ATA_OUT_CTRL(p, v)		PORT_WRITE_8(p, PORT_CTRL, v)
#define ATA_IN_ALTSTATUS(p)		PORT_READ_8(p, PORT_ALTSTATUS)

static void ATA_INS(APORT *p, int bit32, void *data, unsigned len)
{
	__u16 *d = data;
	while (len--)
		*d++ = __16CPU2LE(ATA_IN_DATA16(p));
}

static void ATA_OUTS(APORT *p, int bit32, void *data, unsigned len)
{
	__u16 *d = data;
	while (len--)
		ATA_OUT_DATA16(p, __16LE2CPU(*d++));
}

__COLD_ATTR__ static char *K2_TEST_LIST(const void *table, pci_id_t id, unsigned long *driver_data)
{
	char *ret = PCI$TEST_LIST(table, id, driver_data);
	if (ret) {
		__u64 mem = PCI$READ_MEM_RESOURCE(id, K2_BAR_FLAGS(*driver_data), NULL);
		if (!mem)
			return NULL;
	}
	return ret;
}

#ifdef USE_QDMA

#define QDMA_ACTIVE_INTS	(PORT_QMR_CMD_ERROR | PORT_QMR_CMD_DONE | PORT_QMR_DEV_CHANGE)
#define QDMA_INACTIVE_INTS	(PORT_QMR_DEV_CHANGE)

__COLD_ATTR__ static void K2_RESET_QDMA(APORT *p, int port_reset)
{
	PORT_WRITE_32(p, PORT_QCR, 0);
	PORT_WRITE_32(p, PORT_QMR, QDMA_INACTIVE_INTS);
	PORT_WRITE_32(p, PORT_QSR, ~QDMA_INACTIVE_INTS);
	if (!port_reset)
		goto skip_port_reset;
	PORT_WRITE_32(p, PORT_QCR, PORT_QCR_QDMA_ENABLE | PORT_QCR_QDMA_RESET);
	{
		u_jiffies_lo_t j, jj;
		j = KERNEL$GET_JIFFIES_LO();
		do {
			__u32 qsr;
			jj = KERNEL$GET_JIFFIES_LO();
			qsr = PORT_READ_32(p, PORT_QSR);
			if (qsr & PORT_QSR_RESET_ACK) goto reset_ok;
			KERNEL$UDELAY(10);
		} while (jj - j < RESET_TIMEOUT);
		/* This happens on disconnected link, so be quiet */
		/*KERNEL$SYSLOG(__SYSLOG_HW_BUG, p->ctrl->dev_name, "QDMA RESET TIMEOUT ON PORT %d", p->n);*/

		reset_ok:;
	}
	PORT_WRITE_32(p, PORT_QSR, ~QDMA_INACTIVE_INTS);
	PORT_WRITE_32(p, PORT_QCR, 0);

	skip_port_reset:
	PORT_WRITE_32(p, PORT_QAL, p->qdma_virtual);
	PORT_WRITE_32(p, PORT_QAU, 0);
	PORT_WRITE_8(p, PORT_QPI, 0);
	PORT_WRITE_8(p, PORT_QCI, 0);
	PORT_WRITE_32(p, PORT_QDR, QDMA_ENTRIES - 1);
	PORT_WRITE_32(p, PORT_QSR, ~QDMA_INACTIVE_INTS);
	PORT_WRITE_32(p, PORT_QMR, QDMA_INACTIVE_INTS);
	p->aport_flags &= ~APORT_QDMA_ENABLED;
}

__COLD_ATTR__ static void K2_ENABLE_QDMA(APORT *p)
{
	if (p->aport_flags & APORT_QDMA_ENABLED)
		return;
	PORT_WRITE_8(p, PORT_QPI, 0);
	PORT_WRITE_8(p, PORT_QCI, 0);
	PORT_WRITE_32(p, PORT_QSR, QDMA_ACTIVE_INTS & ~QDMA_INACTIVE_INTS);
	PORT_WRITE_32(p, PORT_QMR, QDMA_ACTIVE_INTS);
	PORT_WRITE_32(p, PORT_QCR, PORT_QCR_QDMA_ENABLE);
	p->qpi = p->qpi_list = p->q_submitted = 0;
	p->aport_flags |= APORT_QDMA_ENABLED;
}

__COLD_ATTR__ static void K2_DISABLE_QDMA(APORT *p)
{
	if (!(p->aport_flags & APORT_QDMA_ENABLED))
		return;
	PORT_WRITE_32(p, PORT_QCR, 0);
	PORT_WRITE_8(p, PORT_QPI, 0);
	PORT_WRITE_8(p, PORT_QCI, 0);
	PORT_WRITE_32(p, PORT_QMR, QDMA_INACTIVE_INTS);
	PORT_WRITE_32(p, PORT_QSR, QDMA_ACTIVE_INTS & ~QDMA_INACTIVE_INTS);
	p->aport_flags &= ~APORT_QDMA_ENABLED;
}

static int AD_POST(ATARQ *rq);
static int QDMA_POST(ATARQ *rq);

#endif

#define APORT_DEVICE_ARRAY_SIZE		1

#define CODE_DISK_NAME(result, len, prefix, a, portn, devn)		\
	_snprintf(result, len, "%s%d@%s", prefix, portn, a->dev_name);

#define CODE_WAIT_FOR_OTHER_DEVICE(p)	0
#define CODE_INIT_CANT_RESET(p)		0
#ifdef USE_QDMA
#define CODE_POST			(p->qdma && !(p->device[dev].dev_flags & DEV_F_ATAPI) ? QDMA_POST : AD_POST)
#define CODE_QUEUE_DEPTH		(p->qdma ? QUEUE_DEPTH : 1)
#define CODE_INIT_XFER(p, dev)		if (p->qdma) K2_RESET_QDMA(p, 1);
#define CODE_AFTER_INIT(p, dev)		if (p->qdma && !(p->device[dev].dev_flags & DEV_F_ATAPI)) K2_ENABLE_QDMA(p);
#define CODE_DETACH_XFER(p, dev)	if (p->qdma) K2_RESET_QDMA(p, 1);
#endif

#define current_rq	rq_list[0]
#define sglist		sg_list[0]

#include "FN_ATTCH.I"

#define CODE_SETUP_DMA(p, rq)						\
	PORT_WRITE_32(p, PORT_DMA_DTP, p->sg_list_dev[0]);		\
	PORT_WRITE_8(p, PORT_DMA_CMD, ~(rq)->atarq_flags & DMACMD_READ);\
	PORT_WRITE_8(p, PORT_DMA_CMD, (~(rq)->atarq_flags & DMACMD_READ) | DMACMD_BMEN);

#define CODE_START_DMA(p, rq)

#define CODE_LOAD_TASKFILE						\
	ATA_OUT_DRIVE(p, rq->fis.device);				\
	if (__unlikely(rq->atarq_flags & ATARQ_VALID_FEATURE))		\
		ATA_OUT_FEATURES(p, rq->fis.feature0 | (rq->fis.feature8 << 8));									\
	ATA_OUT_COUNT(p, rq->fis.nsect0 | (rq->fis.nsect8 << 8));	\
	ATA_OUT_LBA_L(p, rq->fis.lba0 | (rq->fis.lba24 << 8));		\
	ATA_OUT_LBA_M(p, rq->fis.lba8 | (rq->fis.lba32 << 8));		\
	ATA_OUT_LBA_H(p, rq->fis.lba16 | (rq->fis.lba40 << 8));		\
	ATA_OUT_CMD(p, rq->fis.command);
	
#include "FN_CORE.I"

#include "FN_ATAPI.I"

#ifdef USE_QDMA

static void QDMA_ABORT(APORT *p);
static int QDMA_ERROR(APORT *p, __u32 qsr);
static void QDMA_TIMEOUT(TIMER *t);

static int QDMA_POST(ATARQ *rq)
{
	APORT *p = rq->port;
	int r;
	K2_CMD *cmd;
	if (__unlikely(p->q_submitted >= QUEUE_DEPTH))
		return -EAGAIN;
	if (__unlikely(p->aport_flags & APORT_LOCK))
		return -EAGAIN;
	cmd = &p->qdma[p->qpi];
	cmd->ctrl_flags =
		K2_CMD_CTRL_FLAG_EIN |
		((~(unsigned)rq->atarq_flags & ATA_PROTOCOL_DMA) * (K2_CMD_CTRL_FLAG_PIO / ATA_PROTOCOL_DMA)) |
		((~(unsigned)rq->atarq_flags & ATARQ_TO_DEVICE) / (ATARQ_TO_DEVICE / K2_CMD_CTRL_FLAG_READ_DIR));
	if (__unlikely((rq->atarq_flags & ATARQ_PROTOCOL) == ATA_PROTOCOL_NODATA)) {
		cmd->ctrl_flags |= K2_CMD_CTRL_FLAG_READ_DIR;
		cmd->desc_type = K2_CMD_DESC_TYPE_PIO_NO_DATA;
		cmd->prd_lo = (p->status_virtual + p->qpi * sizeof(K2_STATUS)) | K2_CMD_PRD_LO_DIRECT;
		cmd->prd_hi = __16CPU2LE(0);
		cmd->prd_count = __16CPU2LE(sizeof(K2_STATUS));
	} else {
		cmd->desc_type = K2_CMD_DESC_TYPE_PIO_DATA;
		cmd->prd_lo = __32CPU2LE(p->sg_list_dev[p->qpi_list]);
		cmd->prd_hi = __16CPU2LE(0);
		cmd->prd_count = __16CPU2LE(0);
		r = ATA$MAP_DMA(p->sg_list[p->qpi_list], &rq->desc, rq->len, PF_WRITE + ((rq->atarq_flags / ATARQ_TO_DEVICE) & 1), 0);
		if (__unlikely(r < 0)) return r;
		if (__likely(rq->atarq_flags & ATARQ_SET_SIZE)) {
			rq->len = r;
			rq->fis.nsect0 = r >> BIO_SECTOR_SIZE_BITS;
			rq->fis.nsect8 = r >> (BIO_SECTOR_SIZE_BITS + 8);
		} else if (__unlikely(r != rq->len)) {
			ATA$UNMAP_DMA(p->sg_list[p->qpi_list]);
			return -EVSPACEFAULT;
		}
	}
#define copy(x)	cmd->x = rq->fis.x
	copy(command);
	copy(device);
	copy(feature0);
	copy(feature8);
	copy(lba0);
	copy(lba8);
	copy(lba16);
	copy(lba24);
	copy(lba32);
	copy(lba40);
	copy(nsect0);
	copy(nsect8);
#undef copy
	p->qpi = (p->qpi + 1) % QDMA_ENTRIES;
	PORT_WRITE_8(p, PORT_QPI, p->qpi);
	p->rq_list[p->qpi_list] = rq;
	p->qpi_list = (p->qpi_list + 1) % QUEUE_DEPTH;
	if (!p->q_submitted++) {
		p->timeout.fn = QDMA_TIMEOUT;
		KERNEL$SET_TIMER(rq->timeout, &p->timeout);
	}
	return 0;
}

static void QDMA_FINISH(APORT *p, int error)
{
	ATARQ *rq;
	unsigned list_idx = (p->qpi_list - p->q_submitted + QUEUE_DEPTH) % QUEUE_DEPTH;
	/*if (error) {
		unsigned idx = (p->qpi - p->q_submitted + QDMA_ENTRIES) % QDMA_ENTRIES;
		__u8 *cmd = (__u8 *)&p->qdma[idx];
		int i;
		for (i = 0; i < 32; i++) __debug_printf("%02x ", cmd[i]);
		__debug_printf("\n");
	}*/
	rq = p->rq_list[list_idx];
	p->rq_list[list_idx] = NULL;
	rq->status = error;
	KERNEL$DEL_TIMER(&p->timeout);
	if (--p->q_submitted) {
		ATARQ *rq2 = p->rq_list[(list_idx + 1) % QUEUE_DEPTH];
		KERNEL$SET_TIMER(rq2->timeout, &p->timeout);
	}
	if (__likely((rq->atarq_flags & ATARQ_PROTOCOL) != ATA_PROTOCOL_NODATA))
		ATA$UNMAP_DMA(p->sg_list[list_idx]);
	rq->done(rq);
}

#endif

static DECL_IRQ_AST(K2_IRQ, SPL_ATA_SCSI, AST)
{
	ACTRL *a = GET_STRUCT(RQ, ACTRL, irq_ast);
	unsigned irq_mask = (1 << a->n_ports) - 1;
	if (__likely(a->flags & FLAGS_EXTENDED_REGS)) {
		/* Someone needs to check this experimantally for previous
		   chips. There are no docs. */
		irq_mask &= ~PORT_READ_32(a, K2_GLB_STS);
		if (__unlikely(!irq_mask)) goto eoi;
	}
	do {
		__u8 status, dmastatus;
		ATARQ *rq;
		int i = __BSF(irq_mask);
		APORT *p = &a->port[i];
		irq_mask &= ~(1 << i);

#ifdef USE_QDMA
		if (__likely(p->qdma != NULL)) {
			__u32 qsr = PORT_READ_32(p, PORT_QSR);
			if (__unlikely(qsr != 0))
				PORT_WRITE_32(p, PORT_QSR, qsr);
			if (__likely(p->aport_flags & APORT_QDMA_ENABLED)) {
				int err = 0;
				if (__unlikely(qsr & (PORT_QSR_CMD_ERROR | PORT_QSR_BUS_MASTER_ERROR | PORT_QSR_ERROR | PORT_QSR_OVERFLOW | PORT_QSR_UNDERFLOW | PORT_QSR_DATA_CRC_ERROR | PORT_QSR_PCI_MASTER_ABORT | PORT_QSR_ATAPI_UNDERRUN))) {
					if ((err = QDMA_ERROR(p, qsr)) > 0)
						continue;
/*
 * If there was a command error than the engine is stopped and the error
 * belongs to the last completed command. Writing PORT_QSR once again
 * will restart the engine.
 */
				}
				if (__likely(qsr & PORT_QSR_CMD_DONE)) {
					__u8 qci = PORT_READ_8(p, PORT_QCI);
					__u8 left = (__u8)(p->qpi - qci + QDMA_ENTRIES) % QDMA_ENTRIES;
					if (__unlikely(left > p->q_submitted)) {
						KERNEL$SYSLOG(__SYSLOG_HW_BUG, a->dev_name, "COMPLETED MORE COMMANDS THAN SUBMITTED: QPI %02X, Q_SUBMITTED %02X, QCI %02X", p->qpi, p->q_submitted, qci);
						QDMA_ABORT(p);
						continue;
					}
					while (p->q_submitted > left) {
						QDMA_FINISH(p, p->q_submitted > left + 1 ? 0 : err);
					}
					ATA_DEQUEUE(p);
				}
				if (__unlikely(qsr & PORT_QSR_CMD_ERROR)) {
					PORT_WRITE_32(p, PORT_QSR, PORT_QSR_CMD_ERROR);
				}
				continue;
			}
		}
#endif

		dmastatus = PORT_READ_8(p, PORT_DMA_STATUS);
		if (__likely(!(dmastatus & (DMASTATUS_IRQ | DMASTATUS_ERROR))))
			continue;
		PORT_WRITE_8(p, PORT_DMA_CMD, 0);
		PORT_WRITE_8(p, PORT_DMA_STATUS, dmastatus | DMASTATUS_IRQ | DMASTATUS_ERROR);
		status = ATA_IN_STATUS(p);
		rq = p->current_rq;
		if (__unlikely(!rq))
			continue;
		if (__unlikely(status & ATA_STATUS_BSY)) {
			/* For PIO it actually happens ... */
			if (__unlikely(rq->atarq_flags & ATA_PROTOCOL_DMA)) {
				status = AD_POLL_SLOW_DISK(p, dmastatus, status);
				if (__unlikely(status & ATA_STATUS_BSY)) goto ad_error;
			}
			continue;
		}
		if (__unlikely(rq == &p->atapi_placeholder)) {
			ATAPI_IRQ(p, dmastatus, status);
		} else if (__likely(rq->atarq_flags & ATA_PROTOCOL_DMA)) {
			if (__unlikely(dmastatus & DMASTATUS_ERROR)) {
				ad_error:
				AD_ERROR(p, dmastatus, status, 0);
				continue;
			}
			if (__unlikely((status & (ATA_STATUS_ERROR | ATA_STATUS_DRQ | ATA_STATUS_DF | ATA_STATUS_DRDY)) != ATA_STATUS_DRDY))
			        goto ad_error;
			ATA$UNMAP_DMA(p->sglist);
			ATA_END_REQUEST(p, 0);
		} else {
			ATA_NONDMA_IRQ(p, status);
		}
	} while (irq_mask);

	KERNEL$ADD_RANDOMNESS(&a->random_ctx, NULL, 0);
	eoi:
	KERNEL$UNMASK_IRQ(a->irq_ctrl);
	RETURN;
}

__COLD_ATTR__ static void ATA_TIMEOUT(TIMER *t)
{
	__u8 status, dmastatus;
	APORT *p = GET_STRUCT(t, APORT, timeout);
	LOWER_SPL(SPL_ATA_SCSI);
	SET_TIMER_NEVER(&p->timeout);
	dmastatus = PORT_READ_8(p, PORT_DMA_STATUS);
	PORT_WRITE_8(p, PORT_DMA_CMD, 0);
	PORT_WRITE_8(p, PORT_DMA_STATUS, dmastatus | DMASTATUS_IRQ | DMASTATUS_ERROR);
	status = ATA_IN_STATUS(p);
	AD_ERROR(p, dmastatus, status, 1);
}

#ifdef USE_QDMA

__COLD_ATTR__ static void QDMA_ABORT(APORT *p)
{
	K2_RESET_QDMA(p, 1);
	p->aport_flags |= APORT_LOCK;
	while (p->q_submitted)
		QDMA_FINISH(p, -ETIMEDOUT);
	p->aport_flags &= ~APORT_LOCK;
	K2_ENABLE_QDMA(p);
	ATA_DEQUEUE(p);
}

__COLD_ATTR__ static const char *QDMA_CURRENT_COMMAND(APORT *p)
{
	ATARQ *rq;
	unsigned list_idx;
	__u8 qci, left;
	if (!p->q_submitted)
		return NULL;
	qci = PORT_READ_8(p, PORT_QCI);
	left = (__u8)(p->qpi - qci + QDMA_ENTRIES) % QDMA_ENTRIES;
	left++;
	if (left > p->q_submitted) left = p->q_submitted;
	list_idx = (p->qpi_list - left + QUEUE_DEPTH) % QUEUE_DEPTH;
	rq = p->rq_list[list_idx];
	return ATA$COMMAND_NAME(rq->fis.command, rq->fis.feature0);
}

__COLD_ATTR__ static int QDMA_ERROR(APORT *p, __u32 qsr)
{
	const char *command = QDMA_CURRENT_COMMAND(p);
	KERNEL$SYSLOG(__SYSLOG_HW_ERROR, p->ctrl->dev_name, "QDMA ERROR, PORT %d%s%s: %08X%s%s%s%s%s%s%s%s",
		p->n,
		command ? ", COMMAND " : "",
		command ? command : "",
		qsr,
		qsr & PORT_QSR_CMD_ERROR ? ", CMD_ERROR" : "",
		qsr & PORT_QSR_BUS_MASTER_ERROR ? ", BUS_MASTER_ERROR" : "",
		qsr & PORT_QSR_ERROR ? ", ERROR" : "",
		qsr & PORT_QSR_OVERFLOW ? ", OVERFLOW" : "",
		qsr & PORT_QSR_UNDERFLOW ? ", UNDERFLOW" : "",
		qsr & PORT_QSR_DATA_CRC_ERROR ? ", DATA_CRC_ERROR" : "",
		qsr & PORT_QSR_PCI_MASTER_ABORT ? ", PCI_MASTER_ABORT" : "",
		qsr & PORT_QSR_ATAPI_UNDERRUN ? ", ATAPI_UNDERRUN" : ""
	);
	if (qsr & PORT_QSR_OVERFLOW || !(qsr & PORT_QSR_CMD_ERROR)) {
		QDMA_ABORT(p);
		return 1;
	}
	return -EIO;
}

__COLD_ATTR__ static void QDMA_TIMEOUT(TIMER *t)
{
	APORT *p = GET_STRUCT(t, APORT, timeout);
	const char *command;
	LOWER_SPL(SPL_ATA_SCSI);
	SET_TIMER_NEVER(&p->timeout);
	command = QDMA_CURRENT_COMMAND(p);
	KERNEL$SYSLOG(__SYSLOG_HW_ERROR, p->ctrl->dev_name, "QDMA TIMEOUT, PORT %d%s%s",
		p->n,
		command ? ", COMMAND " : "",
		command
		);
	QDMA_ABORT(p);
}

#endif

__COLD_ATTR__ static int ACTRL_AUX_CMD(ATA_ATTACH_PARAM *ap, int cmd, ...)
{
	APORT *p;
	int r;
	va_list args;
	if (__unlikely(KERNEL$SPL != SPL_X(SPL_ATA_SCSI)))
		KERNEL$SUICIDE("ACTRL_AUX_CMD AT SPL %08X", KERNEL$SPL);
	p = ap->port;
	va_start(args, cmd);
	switch (cmd) {
		case ATA_AUX_GET_PROTOCOL_MASK: {
			r = (1 << ATA_PROTOCOL_NODATA) | (1 << ATA_PROTOCOL_PIO) | (1 << ATA_PROTOCOL_DMA);
			break;
		}
		case ATA_AUX_SETUP_PIO_XFER:
		case ATA_AUX_SETUP_DMA_XFER: {
			unsigned avail, avail_unsupp;
			__u16 *ident = va_arg(args, __u16 *);
			unsigned xfer_mask = va_arg(args, unsigned);
			u_jiffies_lo_t timeout = va_arg(args, u_jiffies_lo_t);
			if (cmd == ATA_AUX_SETUP_DMA_XFER && ATA$IS_ATAPI(ident) && p->ctrl->flags & FLAGS_NO_ATAPI_DMA) {
				r = -EOPNOTSUPP;
				break;
			}
			avail = IDE_XFER_PIO_0 | IDE_XFER_PIO_1 | IDE_XFER_PIO_2 | IDE_XFER_PIO_3 | IDE_XFER_PIO_4 | IDE_XFER_WDMA_0 | IDE_XFER_WDMA_1 | IDE_XFER_WDMA_2 | IDE_XFER_MASK_UDMA;
			avail_unsupp = 0;
			r = ATA$SETUP_XFER(ap, cmd == ATA_AUX_SETUP_DMA_XFER, ident, p->device[ap->device].dev_flags & IDE_XFER_MASK, xfer_mask, avail, avail_unsupp, ATA$SET_XFER_EMPTY, timeout);
			break;
		}
		case ATA_AUX_SET_ATAPI_FLAGS: {
#include "FN_AFLAG.I"
			break;
		}
		default: {
			r = -ENOOP;
			break;
		}
	}
	va_end(args);
	return r;
}

__COLD_ATTR__ static void DISABLE_PORT(APORT *p)
{
#ifdef USE_QDMA
	if (p->qdma) {
		K2_DISABLE_QDMA(p);
		PORT_WRITE_32(p, PORT_QMR, 0);
		PORT_READ_32(p, PORT_QMR);
		PORT_WRITE_32(p, PORT_QSR, ~0);
	}
#endif
}

__COLD_ATTR__ static void FREE_PORT(ACTRL *a, APORT *p)
{
	int i;
	for (i = 0; i < QUEUE_DEPTH; i++) {
		if (p->sg_list[i]) ATA$FREE_SGLIST(a->dev_name, p->sg_list[i]);
	}
}

static int K2_UNLOAD(void *p, void **dlrq, const char * const argv[]);

__COLD_ATTR__ int main(int argc, const char * const argv[])
{
	int r, i;
	__u32 reg;
	union {
		MALLOC_REQUEST mrq;
		char pci_id_str[__MAX_STR_LEN];
		struct {
			VDESC desc;
			VDMA dma;
		} s;
	} u;
	char *chip_name;
	unsigned long flags;
	ACTRL *a;
	int irq;
	pci_id_t id = 0, id_mask = 0;
	int order = 0;
	int use_qdma = 1;
	int n_ports;
#ifdef USE_QDMA
	unsigned area_offset;
#endif
	const char *opt, *optend, *str;
	static const struct __param_table params[2] = {
		"NO_QDMA", __PARAM_BOOL, ~0, 0,
		NULL, 0, 0, 0,
	};
	const char * const *arg = argv;
	int state = 0;
	void *vars[2];
	vars[0] = &use_qdma;
	vars[1] = NULL;

	while (__parse_params(&arg, &state, params, vars, &opt, &optend, &str)) {
		if (PCI$PARSE_PARAMS(opt, optend, str, &id, &id_mask, &order)) {
		        _snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "K2: SYNTAX ERROR");
			r = -EBADSYN;
			goto ret0;
		}
	}
	if (PCI$FIND_DEVICE(pci_cards, id, id_mask, order, K2_TEST_LIST, &id, &chip_name, &flags, 0)) {
		_snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "K2: NO PCI DEVICE FOUND");
		r = -ENODEV;
		goto ret0;
	}

	n_ports = 4;
	if (flags & FLAGS_8PORT) n_ports = 8;
#ifdef USE_QDMA
	if (!(flags & FLAGS_QDMA))
#endif
		use_qdma = 0;

	u.mrq.size = sizeof(ACTRL) + (n_ports - 1) * sizeof(APORT);
	SYNC_IO_CANCELABLE(&u.mrq, KERNEL$UNIVERSAL_MALLOC);
	if (u.mrq.status < 0) {
		if (u.mrq.status != -EINTR) _snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "K2: CAN'T ALLOCATE MEMORY");
		r = u.mrq.status;
		goto ret1;
	}
	memset(u.mrq.ptr, 0, u.mrq.size);
	a = u.mrq.ptr;
	a->id = id;
	a->flags = flags;
	a->n_ports = n_ports;
	_snprintf(a->dev_name, sizeof a->dev_name, "ATA$K2@" PCI_ID_FORMAT, id);

	PCI$ENABLE_DEVICE(a->id, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);

	a->bar = PCI$MAP_MEM_RESOURCE(id, K2_BAR(a), K2_REGSPACE(a));
	if (__unlikely(!a->bar)) {
		KERNEL$SYSLOG(__SYSLOG_HW_BUG, a->dev_name, "NO MEM RESOURCE");
		_snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "%s: NO MEM RESOURCE", a->dev_name);
		r = -ENXIO;
		goto ret2;
	}
	if (__unlikely(__IS_ERR(a->bar))) {
		_snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "%s: COULD NOT MAP MEM RESOURCE: %s", a->dev_name, strerror(-__PTR_ERR(a->bar)));
		r = __PTR_ERR(a->bar);
		goto ret2;
	}

#ifdef USE_QDMA
	if (use_qdma) {
		a->k2_area = KERNEL$ALLOC_CONTIG_AREA(K2_AREA_SIZE(a->n_ports), AREA_DATA | AREA_PCIDMA | AREA_PHYSCONTIG | AREA_ALIGN, (unsigned long)K2_AREA_ALIGN);
		if (__unlikely(__IS_ERR(a->k2_area))) {
			r = __PTR_ERR(a->k2_area);
			goto ret25;
		}
		u.s.desc.ptr = (unsigned long)a->k2_area;
		u.s.desc.len = K2_AREA_SIZE(a->n_ports);
		u.s.desc.vspace = &KERNEL$VIRTUAL;
		u.s.dma.spl = SPL_X(SPL_ZERO);
		RAISE_SPL(SPL_VSPACE);
		a->k2_area_dmaunlock = KERNEL$VIRTUAL.op->vspace_dmalock(&u.s.desc, PF_RW, &u.s.dma);
		if (__unlikely(u.s.dma.len != u.s.desc.len))
			KERNEL$SUICIDE("K2: CAN'T DMALOCK K2 AREA");
		a->k2_area_virtual = u.s.dma.ptr;
	}
	area_offset = 0;
#endif

	for (i = 0; i < a->n_ports; i++) {
		int j;
		ASGLIST *sg;
		a->port[i].ctrl = a;
		a->port[i].n = i;
		a->port[i].bar = a->bar + i * PORT_SPACE;
		INIT_TIMER(&a->port[i].timeout);
		for (j = 0; j < (!use_qdma ? 1 : QUEUE_DEPTH); j++) {
			sg = ATA$ALLOC_SGLIST(a->dev_name, &a->port[i].sg_list_dev[j]);
			if (__unlikely(__IS_ERR(sg))) {
				r = __PTR_ERR(sg);
				if (r != -EINTR) _snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "%s: CAN'T ALLOCATE: %s", a->dev_name, strerror(-r));
				goto ret3;
			}
			a->port[i].sg_list[j] = sg;
		}
#ifdef USE_QDMA
		if (use_qdma) {
			a->port[i].qdma = (K2_CMD *)(a->k2_area + area_offset);
			a->port[i].qdma_virtual = a->k2_area_virtual + area_offset;
			memset(a->port[i].qdma, 0, sizeof(K2_CMD) * QDMA_ENTRIES);
			area_offset += sizeof(K2_CMD) * QDMA_ENTRIES;
			a->port[i].status = (K2_STATUS *)(a->k2_area + area_offset);
			a->port[i].status_virtual = a->k2_area_virtual + area_offset;
			area_offset += sizeof(K2_STATUS) * QDMA_ENTRIES;
		}
#endif
	}
#ifdef USE_QDMA
	if (use_qdma) {
		if (area_offset != K2_AREA_SIZE(a->n_ports))
			KERNEL$SUICIDE("K2: AREA SIZE MISMATCH: %u != %u", area_offset, (unsigned)K2_AREA_SIZE(a->n_ports));
	}
#endif
	
	reg = PORT_READ_32(&a->port[0], PORT_SICR1);
	reg &= ~0x00040000;
	PORT_WRITE_32(&a->port[0], PORT_SICR1, reg);
	PORT_WRITE_32(&a->port[0], PORT_SCR_ERROR, 0xffffffff);
	PORT_WRITE_32(&a->port[0], PORT_SIM, 0x00000000);

	irq = PCI$READ_INTERRUPT_LINE(a->id);

	_printf("%s: SATA CONTROLLER ON PCI: %s\n", a->dev_name, PCI$ID(u.pci_id_str, a->id));
	_printf("%s: %s\n", a->dev_name, chip_name);
	_printf("%s: %d PORTS, MEM %"__64_format"X, IRQ %d\n", a->dev_name, a->n_ports, PCI$READ_MEM_RESOURCE(a->id, K2_BAR(a), NULL), irq);

	a->irq_ast.fn = K2_IRQ;

	if ((r = KERNEL$REQUEST_IRQ(irq, &a->irq_ctrl, IRQ_REQUEST_AST_HANDLER | IRQ_REQUEST_SHARED | IRQ_REQUEST_MASKED, NULL, &a->irq_ast, a->dev_name)) < 0) {
		KERNEL$SYSLOG(__SYSLOG_SYS_CONFLICT, a->dev_name, "COULD NOT GET IRQ %d: %s", irq, strerror(-r));
		_snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "%s: COULD NOT GET IRQ", a->dev_name);
		goto ret3;
	}

#ifdef USE_QDMA
	for (i = 0; i < a->n_ports; i++) if (a->port[i].qdma) {
		K2_RESET_QDMA(&a->port[i], 0);
	}
#endif
	KERNEL$UNMASK_IRQ(a->irq_ctrl);

	r = KERNEL$REGISTER_DEVICE(a->dev_name, "K2.SYS", 0, a, NULL, ATA_DCALL, "ATA,SCSI", NULL, K2_UNLOAD, &a->lnte, NULL);
	if (r < 0) {
		if (r != -EINTR) _snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "%s: COULD NOT REGISTER DEVICE: %s", a->dev_name, strerror(-r));
		goto ret4;
	}
	a->dlrq = KERNEL$TSR_IMAGE();
	_snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "%s", a->dev_name);
	return 0;

	ret4:
	KERNEL$MASK_IRQ(a->irq_ctrl);
	for (i = 0; i < a->n_ports; i++) DISABLE_PORT(&a->port[i]);
	KERNEL$RELEASE_IRQ(a->irq_ctrl, IRQ_REQUEST_AST_HANDLER | IRQ_REQUEST_SHARED | IRQ_REQUEST_MASKED, NULL, &a->irq_ast);
	ret3:
	for (i = 0; i < a->n_ports; i++) FREE_PORT(a, &a->port[i]);
#ifdef USE_QDMA
	if (a->k2_area) {
		a->k2_area_dmaunlock(a->k2_area_virtual);
		KERNEL$FREE_CONTIG_AREA(a->k2_area, K2_AREA_SIZE(a->n_ports));
	}
	ret25:
#endif
	PCI$UNMAP_MEM_RESOURCE(a->id, a->bar, K2_REGSPACE(a));
	ret2:
	free(a);
	ret1:
	PCI$FREE_DEVICE(id);
	ret0:
	return r;
}

__COLD_ATTR__ static int K2_UNLOAD(void *p, void **dlrq, const char * const argv[])
{
	ACTRL *a = p;
	int i, r;
	RAISE_SPL(SPL_DEV);
	while (a->dcall_users) KERNEL$SLEEP(1);
	for (i = 0; i < a->n_ports; i++) if (a->port[i].device[0].attached) {
		LOWER_SPL(SPL_ZERO);
		_snprintf(KERNEL$ERROR_MSG(), __MAX_STR_LEN, "%s: SOME DRIVERS ARE USING THIS CONTROLLER", a->dev_name);
		return -EBUSY;
	}
	if ((r = KERNEL$DEVICE_UNLOAD(a->lnte, argv))) {
		LOWER_SPL(SPL_ZERO);
		return r;
	}
	LOWER_SPL(SPL_ZERO);
	KERNEL$MASK_IRQ(a->irq_ctrl);
	for (i = 0; i < a->n_ports; i++) DISABLE_PORT(&a->port[i]);
	KERNEL$RELEASE_IRQ(a->irq_ctrl, IRQ_REQUEST_AST_HANDLER | IRQ_REQUEST_SHARED | IRQ_REQUEST_MASKED, NULL, &a->irq_ast);
	for (i = 0; i < a->n_ports; i++) FREE_PORT(a, &a->port[i]);
#ifdef USE_QDMA
	if (a->k2_area) {
		a->k2_area_dmaunlock(a->k2_area_virtual);
		KERNEL$FREE_CONTIG_AREA(a->k2_area, K2_AREA_SIZE(a->n_ports));
	}
#endif
	PCI$UNMAP_MEM_RESOURCE(a->id, a->bar, K2_REGSPACE(a));
	PCI$FREE_DEVICE(a->id);
	*dlrq = a->dlrq;
	free(a);
	return 0;
}
