瀏覽代碼

x86, mem: Don't implement forward memmove() as memcpy()

memmove() allow source and destination address to be overlap, but
there is no such limitation for memcpy().  Therefore, explicitly
implement memmove() in both the forwards and backward directions, to
give us the ability to optimize memcpy().

Signed-off-by: Ma Ling <ling.ma@intel.com>
LKML-Reference: <C10D3FB0CD45994C8A51FEC1227CE22F0E483AD86A@shsmsx502.ccr.corp.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Ma, Ling 15 年之前
父節點
當前提交
fdf4289679
共有 2 個文件被更改,包括 68 次插入16 次删除
  1. 27 11
      arch/x86/lib/memcpy_32.c
  2. 41 5
      arch/x86/lib/memmove_64.c

+ 27 - 11
arch/x86/lib/memcpy_32.c

@@ -25,19 +25,35 @@ void *memmove(void *dest, const void *src, size_t n)
 	int d0, d1, d2;
 
 	if (dest < src) {
-		memcpy(dest, src, n);
+		if ((dest + n) < src)
+			 return memcpy(dest, src, n);
+		else
+			__asm__ __volatile__(
+				"rep\n\t"
+				"movsb\n\t"
+				: "=&c" (d0), "=&S" (d1), "=&D" (d2)
+				:"0" (n),
+				 "1" (src),
+				 "2" (dest)
+				:"memory");
+
 	} else {
-		__asm__ __volatile__(
-			"std\n\t"
-			"rep\n\t"
-			"movsb\n\t"
-			"cld"
-			: "=&c" (d0), "=&S" (d1), "=&D" (d2)
-			:"0" (n),
-			 "1" (n-1+src),
-			 "2" (n-1+dest)
-			:"memory");
+
+		if((src + count) < dest)
+			return memcpy(dest, src, count);
+		else
+			__asm__ __volatile__(
+				"std\n\t"
+				"rep\n\t"
+				"movsb\n\t"
+				"cld"
+				: "=&c" (d0), "=&S" (d1), "=&D" (d2)
+				:"0" (n),
+				 "1" (n-1+src),
+				 "2" (n-1+dest)
+				:"memory");
 	}
+
 	return dest;
 }
 EXPORT_SYMBOL(memmove);

+ 41 - 5
arch/x86/lib/memmove_64.c

@@ -8,13 +8,49 @@
 #undef memmove
 void *memmove(void *dest, const void *src, size_t count)
 {
+	unsigned long d0, d1, d2, d3;
 	if (dest < src) {
-		return memcpy(dest, src, count);
+		if ((dest + count) < src)
+			 return memcpy(dest, src, count);
+		else
+			__asm__ __volatile__(
+				"movq %0, %3\n\t"
+				"shr $3, %0\n\t"
+				"andq $7, %3\n\t"
+				"rep\n\t"
+				"movsq\n\t"
+				"movq %3, %0\n\t"
+				"rep\n\t"
+				"movsb"
+				: "=&c" (d0), "=&S" (d1), "=&D" (d2), "=r" (d3)
+				:"0" (count),
+				 "1" (src),
+				 "2" (dest)
+				:"memory");
 	} else {
-		char *p = dest + count;
-		const char *s = src + count;
-		while (count--)
-			*--p = *--s;
+		if((src + count) < dest)
+			return memcpy(dest, src, count);
+		else
+			__asm__ __volatile__(
+				"movq %0, %3\n\t"
+				"lea -8(%1, %0), %1\n\t"
+				"lea -8(%2, %0), %2\n\t"
+				"shr $3, %0\n\t"
+				"andq $7, %3\n\t"
+				"std\n\t"
+				"rep\n\t"
+				"movsq\n\t"
+				"lea 7(%1), %1\n\t"
+				"lea 7(%2), %2\n\t"
+				"movq %3, %0\n\t"
+				"rep\n\t"
+				"movsb\n\t"
+				"cld"
+				: "=&c" (d0), "=&S" (d1), "=&D" (d2), "=r" (d3)
+				:"0" (count),
+				 "1" (src),
+				 "2" (dest)
+				:"memory");
 	}
 	return dest;
 }