memset.S
3.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
/*
* linux/arch/m32r/lib/memset.S
*
* Copyright (C) 2001,2002 Hiroyuki Kondo, and Hirokazu Takata
* Copyright (C) 2004 Hirokazu Takata
*
* void *memset(void *dst, int val, int len);
*
* dst: r0
* val: r1
* len: r2
* ret: r0
*
*/
.text
.global memset
#ifdef CONFIG_ISA_DUAL_ISSUE
.align 4
memset:
mv r4, r0 || cmpz r2
jc r14
cmpui r2, #16
bnc qword_align_check
cmpui r2, #4
bc byte_set
word_align_check: /* len >= 4 */
and3 r3, r4, #3
beqz r3, word_set
addi r3, #-4
neg r3, r3 /* r3 = -(r3 - 4) */
align_word:
stb r1, @r4 || addi r4, #1
addi r2, #-1 || addi r3, #-1
bnez r3, align_word
cmpui r2, #4
bc byte_set
word_set:
and3 r1, r1, #0x00ff /* r1: abababab <-- ??????ab */
sll3 r3, r1, #8
or r1, r3 || addi r4, #-4
sll3 r3, r1, #16
or r1, r3 || addi r2, #-4
word_set_loop:
st r1, @+r4 || addi r2, #-4
bgtz r2, word_set_loop
bnez r2, byte_set_wrap
st r1, @+r4
jmp r14
qword_align_check: /* len >= 16 */
and3 r3, r4, #15
bnez r3, word_align_check
qword_set:
and3 r1, r1, #0x00ff /* r1: abababab <-- ??????ab */
sll3 r3, r1, #8
or r1, r3 || addi r4, #-4
sll3 r3, r1, #16
or r1, r3 || ldi r5, #16
qword_set_loop:
ld r3, @(4,r4) /* cache line allocate */
st r1, @+r4 || addi r2, #-16
st r1, @+r4 || cmpu r2, r5
st r1, @+r4
st r1, @+r4
bnc qword_set_loop || cmpz r2
jc r14
set_remainder:
cmpui r2, #4
bc byte_set_wrap1
addi r2, #-4
bra word_set_loop
byte_set_wrap:
addi r2, #4
cmpz r2
jc r14
byte_set_wrap1:
addi r4, #4
#if defined(CONFIG_ISA_M32R2)
byte_set:
addi r2, #-1 || stb r1, @r4+
bnez r2, byte_set
#elif defined(CONFIG_ISA_M32R)
byte_set:
addi r2, #-1 || stb r1, @r4
addi r4, #1
bnez r2, byte_set
#else
#error unknown isa configuration
#endif
end_memset:
jmp r14
#else /* not CONFIG_ISA_DUAL_ISSUE */
.align 4
memset:
mv r4, r0
beqz r2, end_memset
cmpui r2, #16
bnc qword_align_check
cmpui r2, #4
bc byte_set
word_align_check: /* len >= 4 */
and3 r3, r4, #3
beqz r3, word_set
addi r3, #-4
neg r3, r3 /* r3 = -(r3 - 4) */
align_word:
stb r1, @r4
addi r4, #1
addi r2, #-1
addi r3, #-1
bnez r3, align_word
cmpui r2, #4
bc byte_set
word_set:
and3 r1, r1, #0x00ff /* r1: abababab <-- ??????ab */
sll3 r3, r1, #8
or r1, r3
sll3 r3, r1, #16
or r1, r3
addi r2, #-4
addi r4, #-4
word_set_loop:
st r1, @+r4
addi r2, #-4
bgtz r2, word_set_loop
bnez r2, byte_set_wrap
st r1, @+r4
jmp r14
qword_align_check: /* len >= 16 */
and3 r3, r4, #15
bnez r3, word_align_check
qword_set:
and3 r1, r1, #0x00ff /* r1: abababab <-- ??????ab */
sll3 r3, r1, #8
or r1, r3
sll3 r3, r1, #16
or r1, r3
addi r4, #-4
qword_set_loop:
ld r3, @(4,r4) /* cache line allocate */
addi r2, #-16
st r1, @+r4
st r1, @+r4
cmpui r2, #16
st r1, @+r4
st r1, @+r4
bnc qword_set_loop
bnez r2, set_remainder
jmp r14
set_remainder:
cmpui r2, #4
bc byte_set_wrap1
addi r2, #-4
bra word_set_loop
byte_set_wrap:
addi r2, #4
beqz r2, end_memset
byte_set_wrap1:
addi r4, #4
byte_set:
addi r2, #-1
stb r1, @r4
addi r4, #1
bnez r2, byte_set
end_memset:
jmp r14
#endif /* not CONFIG_ISA_DUAL_ISSUE */
.end