target/user: Introduce data_bitmap, replace data_length/data_head/data_tail

The data_bitmap was introduced to support asynchornization accessing of
data area.

We divide mailbox data area into blocks, and use data_bitmap to track the
usage of data area. All the new command's data would start with a new block,
and may left unusable space after it end. But it's easy to track using
data_bitmap.

Now we can allocate data area for asynchronization accessing from userspace,
since we can track the allocation using data_bitmap. The userspace part would
be the same as Maxim's previous asynchronized implementation.

Signed-off-by: Sheng Yang <sheng@yasker.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index baa8720..b1539f3 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -26,6 +26,7 @@
 #include <linux/vmalloc.h>
 #include <linux/uio_driver.h>
 #include <linux/stringify.h>
+#include <linux/bitops.h>
 #include <net/genetlink.h>
 #include <scsi/scsi_common.h>
 #include <scsi/scsi_proto.h>
@@ -63,8 +64,11 @@
 
 #define TCMU_TIME_OUT (30 * MSEC_PER_SEC)
 
+#define DATA_BLOCK_BITS 256
+#define DATA_BLOCK_SIZE 4096
+
 #define CMDR_SIZE (16 * 4096)
-#define DATA_SIZE (257 * 4096)
+#define DATA_SIZE (DATA_BLOCK_BITS * DATA_BLOCK_SIZE)
 
 #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE)
 
@@ -93,12 +97,11 @@
 	u32 cmdr_size;
 	u32 cmdr_last_cleaned;
 	/* Offset of data ring from start of mb */
+	/* Must add data_off and mb_addr to get the address */
 	size_t data_off;
 	size_t data_size;
-	/* Ring head + tail values. */
-	/* Must add data_off and mb_addr to get the address */
-	size_t data_head;
-	size_t data_tail;
+
+	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
 
 	wait_queue_head_t wait_cmdr;
 	/* TODO should this be a mutex? */
@@ -122,9 +125,9 @@
 
 	uint16_t cmd_id;
 
-	/* Can't use se_cmd->data_length when cleaning up expired cmds, because if
+	/* Can't use se_cmd when cleaning up expired cmds, because if
 	   cmd has been completed then accessing se_cmd is off limits */
-	size_t data_length;
+	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
 
 	unsigned long deadline;
 
@@ -168,13 +171,6 @@
 
 	tcmu_cmd->se_cmd = se_cmd;
 	tcmu_cmd->tcmu_dev = udev;
-	tcmu_cmd->data_length = se_cmd->data_length;
-
-	if (se_cmd->se_cmd_flags & SCF_BIDI) {
-		BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
-		tcmu_cmd->data_length += se_cmd->t_bidi_data_sg->length;
-	}
-
 	tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT);
 
 	idr_preload(GFP_KERNEL);
@@ -242,111 +238,115 @@
 
 	iovec = *iov;
 	memset(iovec, 0, sizeof(struct iovec));
-
-	/* Even iov_base is relative to mb_addr */
-	iovec->iov_base = (void __user *) udev->data_off +
-		udev->data_head;
 }
 
 #define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size)
 
+/* offset is relative to mb_addr */
+static inline size_t get_block_offset(struct tcmu_dev *dev,
+		int block, int remaining)
+{
+	return dev->data_off + block * DATA_BLOCK_SIZE +
+		DATA_BLOCK_SIZE - remaining;
+}
+
+static inline size_t iov_tail(struct tcmu_dev *udev, struct iovec *iov)
+{
+	return (size_t)iov->iov_base + iov->iov_len;
+}
+
 static void alloc_and_scatter_data_area(struct tcmu_dev *udev,
 	struct scatterlist *data_sg, unsigned int data_nents,
 	struct iovec **iov, int *iov_cnt, bool copy_data)
 {
-	int i;
+	int i, block;
+	int block_remaining = 0;
 	void *from, *to;
-	size_t copy_bytes;
+	size_t copy_bytes, to_offset;
 	struct scatterlist *sg;
 
-	if (data_nents == 0)
-		return;
-
-	new_iov(iov, iov_cnt, udev);
 	for_each_sg(data_sg, sg, data_nents, i) {
-		copy_bytes = min_t(size_t, sg->length,
-				 head_to_end(udev->data_head, udev->data_size));
+		int sg_remaining = sg->length;
 		from = kmap_atomic(sg_page(sg)) + sg->offset;
-		to = (void *) udev->mb_addr + udev->data_off + udev->data_head;
-
-		if (copy_data) {
-			memcpy(to, from, copy_bytes);
-			tcmu_flush_dcache_range(to, copy_bytes);
-		}
-
-		(*iov)->iov_len += copy_bytes;
-
-		UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size);
-
-		/* Uh oh, we wrapped the buffer. Must split sg across 2 iovs. */
-		if (sg->length != copy_bytes) {
-			void *from_skip = from + copy_bytes;
-
-			copy_bytes = sg->length - copy_bytes;
-
-			new_iov(iov, iov_cnt, udev);
-			(*iov)->iov_len = copy_bytes;
-
+		while (sg_remaining > 0) {
+			if (block_remaining == 0) {
+				block = find_first_zero_bit(udev->data_bitmap,
+						DATA_BLOCK_BITS);
+				block_remaining = DATA_BLOCK_SIZE;
+				set_bit(block, udev->data_bitmap);
+			}
+			copy_bytes = min_t(size_t, sg_remaining,
+					block_remaining);
+			to_offset = get_block_offset(udev, block,
+					block_remaining);
+			to = (void *)udev->mb_addr + to_offset;
+			if (*iov_cnt != 0 &&
+			    to_offset == iov_tail(udev, *iov)) {
+				(*iov)->iov_len += copy_bytes;
+			} else {
+				new_iov(iov, iov_cnt, udev);
+				(*iov)->iov_base = (void __user *) to_offset;
+				(*iov)->iov_len = copy_bytes;
+			}
 			if (copy_data) {
-				to = (void *) udev->mb_addr +
-					udev->data_off + udev->data_head;
-				memcpy(to, from_skip, copy_bytes);
+				memcpy(to, from + sg->length - sg_remaining,
+					copy_bytes);
 				tcmu_flush_dcache_range(to, copy_bytes);
 			}
-
-
-			UPDATE_HEAD(udev->data_head,
-				copy_bytes, udev->data_size);
+			sg_remaining -= copy_bytes;
+			block_remaining -= copy_bytes;
 		}
-
 		kunmap_atomic(from - sg->offset);
 	}
 }
 
-static void free_data_area(struct tcmu_dev *udev, size_t length)
+static void free_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd)
 {
-	UPDATE_HEAD(udev->data_tail, length, udev->data_size);
+	bitmap_xor(udev->data_bitmap, udev->data_bitmap, cmd->data_bitmap,
+		   DATA_BLOCK_BITS);
 }
 
-static void gather_and_free_data_area(struct tcmu_dev *udev,
-	struct scatterlist *data_sg, unsigned int data_nents)
+static void gather_data_area(struct tcmu_dev *udev, unsigned long *cmd_bitmap,
+		struct scatterlist *data_sg, unsigned int data_nents)
 {
-	int i;
+	int i, block;
+	int block_remaining = 0;
 	void *from, *to;
-	size_t copy_bytes;
+	size_t copy_bytes, from_offset;
 	struct scatterlist *sg;
 
-	/* It'd be easier to look at entry's iovec again, but UAM */
 	for_each_sg(data_sg, sg, data_nents, i) {
-		copy_bytes = min_t(size_t, sg->length,
-				 head_to_end(udev->data_tail, udev->data_size));
-
+		int sg_remaining = sg->length;
 		to = kmap_atomic(sg_page(sg)) + sg->offset;
-		WARN_ON(sg->length + sg->offset > PAGE_SIZE);
-		from = (void *) udev->mb_addr +
-			udev->data_off + udev->data_tail;
-		tcmu_flush_dcache_range(from, copy_bytes);
-		memcpy(to, from, copy_bytes);
-
-		free_data_area(udev, copy_bytes);
-
-		/* Uh oh, wrapped the data buffer for this sg's data */
-		if (sg->length != copy_bytes) {
-			void *to_skip = to + copy_bytes;
-
-			from = (void *) udev->mb_addr +
-				udev->data_off + udev->data_tail;
-			WARN_ON(udev->data_tail);
-			copy_bytes = sg->length - copy_bytes;
+		while (sg_remaining > 0) {
+			if (block_remaining == 0) {
+				block = find_first_bit(cmd_bitmap,
+						DATA_BLOCK_BITS);
+				block_remaining = DATA_BLOCK_SIZE;
+				clear_bit(block, cmd_bitmap);
+			}
+			copy_bytes = min_t(size_t, sg_remaining,
+					block_remaining);
+			from_offset = get_block_offset(udev, block,
+					block_remaining);
+			from = (void *) udev->mb_addr + from_offset;
 			tcmu_flush_dcache_range(from, copy_bytes);
-			memcpy(to_skip, from, copy_bytes);
+			memcpy(to + sg->length - sg_remaining, from,
+					copy_bytes);
 
-			free_data_area(udev, copy_bytes);
+			sg_remaining -= copy_bytes;
+			block_remaining -= copy_bytes;
 		}
 		kunmap_atomic(to - sg->offset);
 	}
 }
 
+static inline size_t spc_bitmap_free(unsigned long *bitmap)
+{
+	return DATA_BLOCK_SIZE * (DATA_BLOCK_BITS -
+			bitmap_weight(bitmap, DATA_BLOCK_BITS));
+}
+
 /*
  * We can't queue a command until we have space available on the cmd ring *and*
  * space available on the data ring.
@@ -380,10 +380,10 @@
 		return false;
 	}
 
-	space = spc_free(udev->data_head, udev->data_tail, udev->data_size);
+	space = spc_bitmap_free(udev->data_bitmap);
 	if (space < data_needed) {
-		pr_debug("no data space: %zu %zu %zu\n", udev->data_head,
-		       udev->data_tail, udev->data_size);
+		pr_debug("no data space: only %lu available, but ask for %lu\n",
+				space, data_needed);
 		return false;
 	}
 
@@ -402,6 +402,8 @@
 	uint32_t cmd_head;
 	uint64_t cdb_off;
 	bool copy_to_data_area;
+	size_t data_length;
+	DECLARE_BITMAP(old_bitmap, DATA_BLOCK_BITS);
 
 	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
 		return -EINVAL;
@@ -410,10 +412,12 @@
 	 * Must be a certain minimum size for response sense info, but
 	 * also may be larger if the iov array is large.
 	 *
-	 * 3 iovs since we can describe the whole continuous are using one
-	 * for data, one for bidi and one more in the case of wrap.
+	 * We prepare way too many iovs for potential uses here, because it's
+	 * expensive to tell how many regions are freed in the bitmap
 	*/
-	base_command_size = max(offsetof(struct tcmu_cmd_entry, req.iov[3]),
+	base_command_size = max(offsetof(struct tcmu_cmd_entry,
+				req.iov[se_cmd->t_bidi_data_nents +
+					se_cmd->t_data_nents]),
 				sizeof(struct tcmu_cmd_entry));
 	command_size = base_command_size
 		+ round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE);
@@ -424,13 +428,18 @@
 
 	mb = udev->mb_addr;
 	cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
+	data_length = se_cmd->data_length;
+	if (se_cmd->se_cmd_flags & SCF_BIDI) {
+		BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
+		data_length += se_cmd->t_bidi_data_sg->length;
+	}
 	if ((command_size > (udev->cmdr_size / 2))
-	    || tcmu_cmd->data_length > (udev->data_size - 1))
+	    || data_length > udev->data_size)
 		pr_warn("TCMU: Request of size %zu/%zu may be too big for %u/%zu "
-			"cmd/data ring buffers\n", command_size, tcmu_cmd->data_length,
+			"cmd/data ring buffers\n", command_size, data_length,
 			udev->cmdr_size, udev->data_size);
 
-	while (!is_ring_space_avail(udev, command_size, tcmu_cmd->data_length)) {
+	while (!is_ring_space_avail(udev, command_size, data_length)) {
 		int ret;
 		DEFINE_WAIT(__wait);
 
@@ -477,6 +486,8 @@
 	entry->hdr.kflags = 0;
 	entry->hdr.uflags = 0;
 
+	bitmap_copy(old_bitmap, udev->data_bitmap, DATA_BLOCK_BITS);
+
 	/*
 	 * Fix up iovecs, and handle if allocation in data ring wrapped.
 	 */
@@ -495,6 +506,10 @@
 		se_cmd->t_bidi_data_nents, &iov, &iov_cnt, false);
 	entry->req.iov_bidi_cnt = iov_cnt;
 
+	/* cmd's data_bitmap is what changed in process */
+	bitmap_xor(tcmu_cmd->data_bitmap, old_bitmap, udev->data_bitmap,
+			DATA_BLOCK_BITS);
+
 	/* All offsets relative to mb_addr, not start of entry! */
 	cdb_off = CMDR_OFF + cmd_head + base_command_size;
 	memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb));
@@ -547,31 +562,36 @@
 	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
 		/* cmd has been completed already from timeout, just reclaim data
 		   ring space */
-		free_data_area(udev, cmd->data_length);
+		free_data_area(udev, cmd);
 		return;
 	}
 
 	if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) {
-		free_data_area(udev, cmd->data_length);
+		free_data_area(udev, cmd);
 		pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n",
 			cmd->se_cmd);
 		entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION;
 	} else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
 		memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
 			       se_cmd->scsi_sense_length);
-		free_data_area(udev, cmd->data_length);
+		free_data_area(udev, cmd);
 	} else if (se_cmd->se_cmd_flags & SCF_BIDI) {
-		/* Discard data_out buffer */
-		free_data_area(udev, (size_t)se_cmd->t_data_sg->length);
+		DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
 
-		/* Get Data-In buffer */
-		gather_and_free_data_area(udev,
+		/* Get Data-In buffer before clean up */
+		bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
+		gather_data_area(udev, bitmap,
 			se_cmd->t_bidi_data_sg, se_cmd->t_bidi_data_nents);
+		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
-		gather_and_free_data_area(udev,
+		DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
+
+		bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
+		gather_data_area(udev, bitmap,
 			se_cmd->t_data_sg, se_cmd->t_data_nents);
+		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction == DMA_TO_DEVICE) {
-		free_data_area(udev, cmd->data_length);
+		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction != DMA_NONE) {
 		pr_warn("TCMU: data direction was %d!\n",
 			se_cmd->data_direction);
@@ -912,6 +932,7 @@
 
 	WARN_ON(!PAGE_ALIGNED(udev->data_off));
 	WARN_ON(udev->data_size % PAGE_SIZE);
+	WARN_ON(udev->data_size % DATA_BLOCK_SIZE);
 
 	info->version = __stringify(TCMU_MAILBOX_VERSION);