Blame view

kernel/linux-rt-4.4.41/arch/x86/um/checksum_32.S 4.74 KB
5113f6f70   김현기   kernel add
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
  /*
   * INET		An implementation of the TCP/IP protocol suite for the LINUX
   *		operating system.  INET is implemented using the  BSD Socket
   *		interface as the means of communication with the user level.
   *
   *		IP/TCP/UDP checksumming routines
   *
   * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
   *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
   *		Tom May, <ftom@netcom.com>
   *              Pentium Pro/II routines:
   *              Alexander Kjeldaas <astor@guardian.no>
   *              Finn Arne Gangstad <finnag@guardian.no>
   *		Lots of code moved from tcp.c and ip.c; see those files
   *		for more names.
   *
   * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
   *			     handling.
   *		Andi Kleen,  add zeroing on error
   *                   converted to pure assembler
   *
   *		This program is free software; you can redistribute it and/or
   *		modify it under the terms of the GNU General Public License
   *		as published by the Free Software Foundation; either version
   *		2 of the License, or (at your option) any later version.
   */
  
  #include <asm/errno.h>
  #include <asm/asm.h>
  				
  /*
   * computes a partial checksum, e.g. for TCP/UDP fragments
   */
  
  /*	
  unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
   */
  		
  .text
  .align 4
  .globl csum_partial
  		
  #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
  
  	  /*		
  	   * Experiments with Ethernet and SLIP connections show that buff
  	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
  	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  	   * alignment for the unrolled loop.
  	   */		
  csum_partial:
  	pushl %esi
  	pushl %ebx
  	movl 20(%esp),%eax	# Function arg: unsigned int sum
  	movl 16(%esp),%ecx	# Function arg: int len
  	movl 12(%esp),%esi	# Function arg: unsigned char *buff
  	testl $2, %esi		# Check alignment.
  	jz 2f			# Jump if alignment is ok.
  	subl $2, %ecx		# Alignment uses up two bytes.
  	jae 1f			# Jump if we had at least two bytes.
  	addl $2, %ecx		# ecx was < 2.  Deal with it.
  	jmp 4f
  1:	movw (%esi), %bx
  	addl $2, %esi
  	addw %bx, %ax
  	adcl $0, %eax
  2:
  	movl %ecx, %edx
  	shrl $5, %ecx
  	jz 2f
  	testl %esi, %esi
  1:	movl (%esi), %ebx
  	adcl %ebx, %eax
  	movl 4(%esi), %ebx
  	adcl %ebx, %eax
  	movl 8(%esi), %ebx
  	adcl %ebx, %eax
  	movl 12(%esi), %ebx
  	adcl %ebx, %eax
  	movl 16(%esi), %ebx
  	adcl %ebx, %eax
  	movl 20(%esi), %ebx
  	adcl %ebx, %eax
  	movl 24(%esi), %ebx
  	adcl %ebx, %eax
  	movl 28(%esi), %ebx
  	adcl %ebx, %eax
  	lea 32(%esi), %esi
  	dec %ecx
  	jne 1b
  	adcl $0, %eax
  2:	movl %edx, %ecx
  	andl $0x1c, %edx
  	je 4f
  	shrl $2, %edx		# This clears CF
  3:	adcl (%esi), %eax
  	lea 4(%esi), %esi
  	dec %edx
  	jne 3b
  	adcl $0, %eax
  4:	andl $3, %ecx
  	jz 7f
  	cmpl $2, %ecx
  	jb 5f
  	movw (%esi),%cx
  	leal 2(%esi),%esi
  	je 6f
  	shll $16,%ecx
  5:	movb (%esi),%cl
  6:	addl %ecx,%eax
  	adcl $0, %eax 
  7:	
  	popl %ebx
  	popl %esi
  	ret
  
  #else
  
  /* Version for PentiumII/PPro */
  
  csum_partial:
  	pushl %esi
  	pushl %ebx
  	movl 20(%esp),%eax	# Function arg: unsigned int sum
  	movl 16(%esp),%ecx	# Function arg: int len
  	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
  
  	testl $2, %esi         
  	jnz 30f                 
  10:
  	movl %ecx, %edx
  	movl %ecx, %ebx
  	andl $0x7c, %ebx
  	shrl $7, %ecx
  	addl %ebx,%esi
  	shrl $2, %ebx  
  	negl %ebx
  	lea 45f(%ebx,%ebx,2), %ebx
  	testl %esi, %esi
  	jmp *%ebx
  
  	# Handle 2-byte-aligned regions
  20:	addw (%esi), %ax
  	lea 2(%esi), %esi
  	adcl $0, %eax
  	jmp 10b
  
  30:	subl $2, %ecx          
  	ja 20b                 
  	je 32f
  	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
  	addl %ebx, %eax
  	adcl $0, %eax
  	jmp 80f
  32:
  	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
  	adcl $0, %eax
  	jmp 80f
  
  40: 
  	addl -128(%esi), %eax
  	adcl -124(%esi), %eax
  	adcl -120(%esi), %eax
  	adcl -116(%esi), %eax   
  	adcl -112(%esi), %eax   
  	adcl -108(%esi), %eax
  	adcl -104(%esi), %eax
  	adcl -100(%esi), %eax
  	adcl -96(%esi), %eax
  	adcl -92(%esi), %eax
  	adcl -88(%esi), %eax
  	adcl -84(%esi), %eax
  	adcl -80(%esi), %eax
  	adcl -76(%esi), %eax
  	adcl -72(%esi), %eax
  	adcl -68(%esi), %eax
  	adcl -64(%esi), %eax     
  	adcl -60(%esi), %eax     
  	adcl -56(%esi), %eax     
  	adcl -52(%esi), %eax   
  	adcl -48(%esi), %eax   
  	adcl -44(%esi), %eax
  	adcl -40(%esi), %eax
  	adcl -36(%esi), %eax
  	adcl -32(%esi), %eax
  	adcl -28(%esi), %eax
  	adcl -24(%esi), %eax
  	adcl -20(%esi), %eax
  	adcl -16(%esi), %eax
  	adcl -12(%esi), %eax
  	adcl -8(%esi), %eax
  	adcl -4(%esi), %eax
  45:
  	lea 128(%esi), %esi
  	adcl $0, %eax
  	dec %ecx
  	jge 40b
  	movl %edx, %ecx
  50:	andl $3, %ecx
  	jz 80f
  
  	# Handle the last 1-3 bytes without jumping
  	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
  	movl $0xffffff,%ebx	# by the shll and shrl instructions
  	shll $3,%ecx
  	shrl %cl,%ebx
  	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
  	addl %ebx,%eax
  	adcl $0,%eax
  80: 
  	popl %ebx
  	popl %esi
  	ret
  				
  #endif