Blame view

kernel/linux-rt-4.4.41/arch/arc/lib/strcmp.S 2.59 KB
5113f6f70   김현기   kernel add
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
  /*
   * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License version 2 as
   * published by the Free Software Foundation.
   */
  
  /* This is optimized primarily for the ARC700.
     It would be possible to speed up the loops by one cycle / word
     respective one cycle / byte by forcing double source 1 alignment, unrolling
     by a factor of two, and speculatively loading the second word / byte of
     source 1; however, that would increase the overhead for loop setup / finish,
     and strcmp might often terminate early.  */
  
  #include <linux/linkage.h>
  
  ENTRY(strcmp)
  	or	r2,r0,r1
  	bmsk_s	r2,r2,1
  	brne	r2,0,.Lcharloop
  	mov_s	r12,0x01010101
  	ror	r5,r12
  .Lwordloop:
  	ld.ab	r2,[r0,4]
  	ld.ab	r3,[r1,4]
  	nop_s
  	sub	r4,r2,r12
  	bic	r4,r4,r2
  	and	r4,r4,r5
  	brne	r4,0,.Lfound0
  	breq	r2,r3,.Lwordloop
  #ifdef	__LITTLE_ENDIAN__
  	xor	r0,r2,r3	; mask for difference
  	sub_s	r1,r0,1
  	bic_s	r0,r0,r1	; mask for least significant difference bit
  	sub	r1,r5,r0
  	xor	r0,r5,r1	; mask for least significant difference byte
  	and_s	r2,r2,r0
  	and_s	r3,r3,r0
  #endif /* LITTLE ENDIAN */
  	cmp_s	r2,r3
  	mov_s	r0,1
  	j_s.d	[blink]
  	bset.lo	r0,r0,31
  
  	.balign	4
  #ifdef __LITTLE_ENDIAN__
  .Lfound0:
  	xor	r0,r2,r3	; mask for difference
  	or	r0,r0,r4	; or in zero indicator
  	sub_s	r1,r0,1
  	bic_s	r0,r0,r1	; mask for least significant difference bit
  	sub	r1,r5,r0
  	xor	r0,r5,r1	; mask for least significant difference byte
  	and_s	r2,r2,r0
  	and_s	r3,r3,r0
  	sub.f	r0,r2,r3
  	mov.hi	r0,1
  	j_s.d	[blink]
  	bset.lo	r0,r0,31
  #else /* BIG ENDIAN */
  	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
  	   because of carry-propagateion from a lower significant zero byte.
  	   We can compensate for this by checking that bit0 is zero.
  	   This compensation is not necessary in the step where we
  	   get a low estimate for r2, because in any affected bytes
  	   we already have 0x00 or 0x01, which will remain unchanged
  	   when bit 7 is cleared.  */
  	.balign	4
  .Lfound0:
  	lsr	r0,r4,8
  	lsr_s	r1,r2
  	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
  	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
  	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
  	cmp_s	r3,r2		; ... be independent of trailing garbage
  	or_s	r2,r2,r0	; likewise for r3 > r2
  	bic_s	r3,r3,r0
  	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
  	cmp_s	r2,r3
  	j_s.d	[blink]
  	bset.lo	r0,r0,31
  #endif /* ENDIAN */
  
  	.balign	4
  .Lcharloop:
  	ldb.ab	r2,[r0,1]
  	ldb.ab	r3,[r1,1]
  	nop_s
  	breq	r2,0,.Lcmpend
  	breq	r2,r3,.Lcharloop
  .Lcmpend:
  	j_s.d	[blink]
  	sub	r0,r2,r3
  END(strcmp)