memcpy.c 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. /*
  2. * arch/openrisc/lib/memcpy.c
  3. *
  4. * Optimized memory copy routines for openrisc. These are mostly copied
  5. * from ohter sources but slightly entended based on ideas discuassed in
  6. * #openrisc.
  7. *
  8. * The word unroll implementation is an extension to the arm byte
  9. * unrolled implementation, but using word copies (if things are
  10. * properly aligned)
  11. *
  12. * The great arm loop unroll algorithm can be found at:
  13. * arch/arm/boot/compressed/string.c
  14. */
  15. #include <linux/export.h>
  16. #include <linux/string.h>
  17. #ifdef CONFIG_OR1200
  18. /*
  19. * Do memcpy with word copies and loop unrolling. This gives the
  20. * best performance on the OR1200 and MOR1KX archirectures
  21. */
  22. void *memcpy(void *dest, __const void *src, __kernel_size_t n)
  23. {
  24. int i = 0;
  25. unsigned char *d, *s;
  26. uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
  27. /* If both source and dest are word aligned copy words */
  28. if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
  29. /* Copy 32 bytes per loop */
  30. for (i = n >> 5; i > 0; i--) {
  31. *dest_w++ = *src_w++;
  32. *dest_w++ = *src_w++;
  33. *dest_w++ = *src_w++;
  34. *dest_w++ = *src_w++;
  35. *dest_w++ = *src_w++;
  36. *dest_w++ = *src_w++;
  37. *dest_w++ = *src_w++;
  38. *dest_w++ = *src_w++;
  39. }
  40. if (n & 1 << 4) {
  41. *dest_w++ = *src_w++;
  42. *dest_w++ = *src_w++;
  43. *dest_w++ = *src_w++;
  44. *dest_w++ = *src_w++;
  45. }
  46. if (n & 1 << 3) {
  47. *dest_w++ = *src_w++;
  48. *dest_w++ = *src_w++;
  49. }
  50. if (n & 1 << 2)
  51. *dest_w++ = *src_w++;
  52. d = (unsigned char *)dest_w;
  53. s = (unsigned char *)src_w;
  54. } else {
  55. d = (unsigned char *)dest_w;
  56. s = (unsigned char *)src_w;
  57. for (i = n >> 3; i > 0; i--) {
  58. *d++ = *s++;
  59. *d++ = *s++;
  60. *d++ = *s++;
  61. *d++ = *s++;
  62. *d++ = *s++;
  63. *d++ = *s++;
  64. *d++ = *s++;
  65. *d++ = *s++;
  66. }
  67. if (n & 1 << 2) {
  68. *d++ = *s++;
  69. *d++ = *s++;
  70. *d++ = *s++;
  71. *d++ = *s++;
  72. }
  73. }
  74. if (n & 1 << 1) {
  75. *d++ = *s++;
  76. *d++ = *s++;
  77. }
  78. if (n & 1)
  79. *d++ = *s++;
  80. return dest;
  81. }
  82. #else
  83. /*
  84. * Use word copies but no loop unrolling as we cannot assume there
  85. * will be benefits on the archirecture
  86. */
  87. void *memcpy(void *dest, __const void *src, __kernel_size_t n)
  88. {
  89. unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src;
  90. uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
  91. /* If both source and dest are word aligned copy words */
  92. if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
  93. for (; n >= 4; n -= 4)
  94. *dest_w++ = *src_w++;
  95. }
  96. d = (unsigned char *)dest_w;
  97. s = (unsigned char *)src_w;
  98. /* For remaining or if not aligned, copy bytes */
  99. for (; n >= 1; n -= 1)
  100. *d++ = *s++;
  101. return dest;
  102. }
  103. #endif
  104. EXPORT_SYMBOL(memcpy);