에프에이리눅스 / fullcycle-jenkins-joy

Blame view

kernel/linux-rt-4.4.41/arch/x86/lib/memset_64.S 2.6 KB
  /* Copyright 2002 Andi Kleen, SuSE Labs */
  
  #include <linux/linkage.h>
  #include <asm/cpufeature.h>
  #include <asm/alternative-asm.h>
  
  .weak memset
  
  /*
   * ISO C memset - set a memory block to a byte value. This function uses fast
   * string to get better performance than the original function. The code is
   * simpler and shorter than the orignal function as well.
   *
   * rdi   destination
   * rsi   value (char)
   * rdx   count (bytes)
   *
   * rax   original destination
   */
  ENTRY(memset)
  ENTRY(__memset)
  	/*
  	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
  	 * to use it when possible. If not available, use fast string instructions.
  	 *
  	 * Otherwise, use original memset function.
  	 */
  	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
  		      "jmp memset_erms", X86_FEATURE_ERMS
  
  	movq %rdi,%r9
  	movq %rdx,%rcx
  	andl $7,%edx
  	shrq $3,%rcx
  	/* expand byte value  */
  	movzbl %sil,%esi
  	movabs $0x0101010101010101,%rax
  	imulq %rsi,%rax
  	rep stosq
  	movl %edx,%ecx
  	rep stosb
  	movq %r9,%rax
  	ret
  ENDPROC(memset)
  ENDPROC(__memset)
  
  /*
   * ISO C memset - set a memory block to a byte value. This function uses
   * enhanced rep stosb to override the fast string function.
   * The code is simpler and shorter than the fast string function as well.
   *
   * rdi   destination
   * rsi   value (char)
   * rdx   count (bytes)
   *
   * rax   original destination
   */
  ENTRY(memset_erms)
  	movq %rdi,%r9
  	movb %sil,%al
  	movq %rdx,%rcx
  	rep stosb
  	movq %r9,%rax
  	ret
  ENDPROC(memset_erms)
  
  ENTRY(memset_orig)
  	movq %rdi,%r10
  
  	/* expand byte value  */
  	movzbl %sil,%ecx
  	movabs $0x0101010101010101,%rax
  	imulq  %rcx,%rax
  
  	/* align dst */
  	movl  %edi,%r9d
  	andl  $7,%r9d
  	jnz  .Lbad_alignment
  .Lafter_bad_alignment:
  
  	movq  %rdx,%rcx
  	shrq  $6,%rcx
  	jz	 .Lhandle_tail
  
  	.p2align 4
  .Lloop_64:
  	decq  %rcx
  	movq  %rax,(%rdi)
  	movq  %rax,8(%rdi)
  	movq  %rax,16(%rdi)
  	movq  %rax,24(%rdi)
  	movq  %rax,32(%rdi)
  	movq  %rax,40(%rdi)
  	movq  %rax,48(%rdi)
  	movq  %rax,56(%rdi)
  	leaq  64(%rdi),%rdi
  	jnz    .Lloop_64
  
  	/* Handle tail in loops. The loops should be faster than hard
  	   to predict jump tables. */
  	.p2align 4
  .Lhandle_tail:
  	movl	%edx,%ecx
  	andl    $63&(~7),%ecx
  	jz 		.Lhandle_7
  	shrl	$3,%ecx
  	.p2align 4
  .Lloop_8:
  	decl   %ecx
  	movq  %rax,(%rdi)
  	leaq  8(%rdi),%rdi
  	jnz    .Lloop_8
  
  .Lhandle_7:
  	andl	$7,%edx
  	jz      .Lende
  	.p2align 4
  .Lloop_1:
  	decl    %edx
  	movb 	%al,(%rdi)
  	leaq	1(%rdi),%rdi
  	jnz     .Lloop_1
  
  .Lende:
  	movq	%r10,%rax
  	ret
  
  .Lbad_alignment:
  	cmpq $7,%rdx
  	jbe	.Lhandle_7
  	movq %rax,(%rdi)	/* unaligned store */
  	movq $8,%r8
  	subq %r9,%r8
  	addq %r8,%rdi
  	subq %r8,%rdx
  	jmp .Lafter_bad_alignment
  .Lfinal:
  ENDPROC(memset_orig)