Blame view

kernel/linux-rt-4.4.41/arch/x86/lib/memset_64.S 2.6 KB
5113f6f70   김현기   kernel add
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
  /* Copyright 2002 Andi Kleen, SuSE Labs */
  
  #include <linux/linkage.h>
  #include <asm/cpufeature.h>
  #include <asm/alternative-asm.h>
  
  .weak memset
  
  /*
   * ISO C memset - set a memory block to a byte value. This function uses fast
   * string to get better performance than the original function. The code is
   * simpler and shorter than the orignal function as well.
   *
   * rdi   destination
   * rsi   value (char)
   * rdx   count (bytes)
   *
   * rax   original destination
   */
  ENTRY(memset)
  ENTRY(__memset)
  	/*
  	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
  	 * to use it when possible. If not available, use fast string instructions.
  	 *
  	 * Otherwise, use original memset function.
  	 */
  	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
  		      "jmp memset_erms", X86_FEATURE_ERMS
  
  	movq %rdi,%r9
  	movq %rdx,%rcx
  	andl $7,%edx
  	shrq $3,%rcx
  	/* expand byte value  */
  	movzbl %sil,%esi
  	movabs $0x0101010101010101,%rax
  	imulq %rsi,%rax
  	rep stosq
  	movl %edx,%ecx
  	rep stosb
  	movq %r9,%rax
  	ret
  ENDPROC(memset)
  ENDPROC(__memset)
  
  /*
   * ISO C memset - set a memory block to a byte value. This function uses
   * enhanced rep stosb to override the fast string function.
   * The code is simpler and shorter than the fast string function as well.
   *
   * rdi   destination
   * rsi   value (char)
   * rdx   count (bytes)
   *
   * rax   original destination
   */
  ENTRY(memset_erms)
  	movq %rdi,%r9
  	movb %sil,%al
  	movq %rdx,%rcx
  	rep stosb
  	movq %r9,%rax
  	ret
  ENDPROC(memset_erms)
  
  ENTRY(memset_orig)
  	movq %rdi,%r10
  
  	/* expand byte value  */
  	movzbl %sil,%ecx
  	movabs $0x0101010101010101,%rax
  	imulq  %rcx,%rax
  
  	/* align dst */
  	movl  %edi,%r9d
  	andl  $7,%r9d
  	jnz  .Lbad_alignment
  .Lafter_bad_alignment:
  
  	movq  %rdx,%rcx
  	shrq  $6,%rcx
  	jz	 .Lhandle_tail
  
  	.p2align 4
  .Lloop_64:
  	decq  %rcx
  	movq  %rax,(%rdi)
  	movq  %rax,8(%rdi)
  	movq  %rax,16(%rdi)
  	movq  %rax,24(%rdi)
  	movq  %rax,32(%rdi)
  	movq  %rax,40(%rdi)
  	movq  %rax,48(%rdi)
  	movq  %rax,56(%rdi)
  	leaq  64(%rdi),%rdi
  	jnz    .Lloop_64
  
  	/* Handle tail in loops. The loops should be faster than hard
  	   to predict jump tables. */
  	.p2align 4
  .Lhandle_tail:
  	movl	%edx,%ecx
  	andl    $63&(~7),%ecx
  	jz 		.Lhandle_7
  	shrl	$3,%ecx
  	.p2align 4
  .Lloop_8:
  	decl   %ecx
  	movq  %rax,(%rdi)
  	leaq  8(%rdi),%rdi
  	jnz    .Lloop_8
  
  .Lhandle_7:
  	andl	$7,%edx
  	jz      .Lende
  	.p2align 4
  .Lloop_1:
  	decl    %edx
  	movb 	%al,(%rdi)
  	leaq	1(%rdi),%rdi
  	jnz     .Lloop_1
  
  .Lende:
  	movq	%r10,%rax
  	ret
  
  .Lbad_alignment:
  	cmpq $7,%rdx
  	jbe	.Lhandle_7
  	movq %rax,(%rdi)	/* unaligned store */
  	movq $8,%r8
  	subq %r9,%r8
  	addq %r8,%rdi
  	subq %r8,%rdx
  	jmp .Lafter_bad_alignment
  .Lfinal:
  ENDPROC(memset_orig)