Blame view

kernel/linux-rt-4.4.41/arch/x86/lib/memmove_64.S 3.39 KB
5113f6f70   김현기   kernel add
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
  /*
   * Normally compiler builtins are used, but sometimes the compiler calls out
   * of line code. Based on asm-i386/string.h.
   *
   * This assembly file is re-written from memmove_64.c file.
   *	- Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
   */
  #include <linux/linkage.h>
  #include <asm/cpufeature.h>
  #include <asm/alternative-asm.h>
  
  #undef memmove
  
  /*
   * Implement memmove(). This can handle overlap between src and dst.
   *
   * Input:
   * rdi: dest
   * rsi: src
   * rdx: count
   *
   * Output:
   * rax: dest
   */
  .weak memmove
  
  ENTRY(memmove)
  ENTRY(__memmove)
  
  	/* Handle more 32 bytes in loop */
  	mov %rdi, %rax
  	cmp $0x20, %rdx
  	jb	1f
  
  	/* Decide forward/backward copy mode */
  	cmp %rdi, %rsi
  	jge .Lmemmove_begin_forward
  	mov %rsi, %r8
  	add %rdx, %r8
  	cmp %rdi, %r8
  	jg 2f
  
  .Lmemmove_begin_forward:
  	ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
  
  	/*
  	 * movsq instruction have many startup latency
  	 * so we handle small size by general register.
  	 */
  	cmp  $680, %rdx
  	jb	3f
  	/*
  	 * movsq instruction is only good for aligned case.
  	 */
  
  	cmpb %dil, %sil
  	je 4f
  3:
  	sub $0x20, %rdx
  	/*
  	 * We gobble 32 bytes forward in each loop.
  	 */
  5:
  	sub $0x20, %rdx
  	movq 0*8(%rsi), %r11
  	movq 1*8(%rsi), %r10
  	movq 2*8(%rsi), %r9
  	movq 3*8(%rsi), %r8
  	leaq 4*8(%rsi), %rsi
  
  	movq %r11, 0*8(%rdi)
  	movq %r10, 1*8(%rdi)
  	movq %r9, 2*8(%rdi)
  	movq %r8, 3*8(%rdi)
  	leaq 4*8(%rdi), %rdi
  	jae 5b
  	addq $0x20, %rdx
  	jmp 1f
  	/*
  	 * Handle data forward by movsq.
  	 */
  	.p2align 4
  4:
  	movq %rdx, %rcx
  	movq -8(%rsi, %rdx), %r11
  	lea -8(%rdi, %rdx), %r10
  	shrq $3, %rcx
  	rep movsq
  	movq %r11, (%r10)
  	jmp 13f
  .Lmemmove_end_forward:
  
  	/*
  	 * Handle data backward by movsq.
  	 */
  	.p2align 4
  7:
  	movq %rdx, %rcx
  	movq (%rsi), %r11
  	movq %rdi, %r10
  	leaq -8(%rsi, %rdx), %rsi
  	leaq -8(%rdi, %rdx), %rdi
  	shrq $3, %rcx
  	std
  	rep movsq
  	cld
  	movq %r11, (%r10)
  	jmp 13f
  
  	/*
  	 * Start to prepare for backward copy.
  	 */
  	.p2align 4
  2:
  	cmp $680, %rdx
  	jb 6f
  	cmp %dil, %sil
  	je 7b
  6:
  	/*
  	 * Calculate copy position to tail.
  	 */
  	addq %rdx, %rsi
  	addq %rdx, %rdi
  	subq $0x20, %rdx
  	/*
  	 * We gobble 32 bytes backward in each loop.
  	 */
  8:
  	subq $0x20, %rdx
  	movq -1*8(%rsi), %r11
  	movq -2*8(%rsi), %r10
  	movq -3*8(%rsi), %r9
  	movq -4*8(%rsi), %r8
  	leaq -4*8(%rsi), %rsi
  
  	movq %r11, -1*8(%rdi)
  	movq %r10, -2*8(%rdi)
  	movq %r9, -3*8(%rdi)
  	movq %r8, -4*8(%rdi)
  	leaq -4*8(%rdi), %rdi
  	jae 8b
  	/*
  	 * Calculate copy position to head.
  	 */
  	addq $0x20, %rdx
  	subq %rdx, %rsi
  	subq %rdx, %rdi
  1:
  	cmpq $16, %rdx
  	jb 9f
  	/*
  	 * Move data from 16 bytes to 31 bytes.
  	 */
  	movq 0*8(%rsi), %r11
  	movq 1*8(%rsi), %r10
  	movq -2*8(%rsi, %rdx), %r9
  	movq -1*8(%rsi, %rdx), %r8
  	movq %r11, 0*8(%rdi)
  	movq %r10, 1*8(%rdi)
  	movq %r9, -2*8(%rdi, %rdx)
  	movq %r8, -1*8(%rdi, %rdx)
  	jmp 13f
  	.p2align 4
  9:
  	cmpq $8, %rdx
  	jb 10f
  	/*
  	 * Move data from 8 bytes to 15 bytes.
  	 */
  	movq 0*8(%rsi), %r11
  	movq -1*8(%rsi, %rdx), %r10
  	movq %r11, 0*8(%rdi)
  	movq %r10, -1*8(%rdi, %rdx)
  	jmp 13f
  10:
  	cmpq $4, %rdx
  	jb 11f
  	/*
  	 * Move data from 4 bytes to 7 bytes.
  	 */
  	movl (%rsi), %r11d
  	movl -4(%rsi, %rdx), %r10d
  	movl %r11d, (%rdi)
  	movl %r10d, -4(%rdi, %rdx)
  	jmp 13f
  11:
  	cmp $2, %rdx
  	jb 12f
  	/*
  	 * Move data from 2 bytes to 3 bytes.
  	 */
  	movw (%rsi), %r11w
  	movw -2(%rsi, %rdx), %r10w
  	movw %r11w, (%rdi)
  	movw %r10w, -2(%rdi, %rdx)
  	jmp 13f
  12:
  	cmp $1, %rdx
  	jb 13f
  	/*
  	 * Move data for 1 byte.
  	 */
  	movb (%rsi), %r11b
  	movb %r11b, (%rdi)
  13:
  	retq
  ENDPROC(__memmove)
  ENDPROC(memmove)