.text	

.globl	add_mod_384
.hidden	add_mod_384
.type	add_mod_384,@function
.align	32
add_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	subq	$8,%rsp
.cfi_adjust_cfa_offset	8


	call	__add_mod_384

	movq	8(%rsp),%r15
.cfi_restore	%r15
	movq	16(%rsp),%r14
.cfi_restore	%r14
	movq	24(%rsp),%r13
.cfi_restore	%r13
	movq	32(%rsp),%r12
.cfi_restore	%r12
	movq	40(%rsp),%rbx
.cfi_restore	%rbx
	movq	48(%rsp),%rbp
.cfi_restore	%rbp
	leaq	56(%rsp),%rsp
.cfi_adjust_cfa_offset	-56

	.byte	0xf3,0xc3
.cfi_endproc	
.size	add_mod_384,.-add_mod_384

.type	__add_mod_384,@function
.align	32
__add_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa

	movq	0(%rsi),%r8
	movq	8(%rsi),%r9
	movq	16(%rsi),%r10
	movq	24(%rsi),%r11
	movq	32(%rsi),%r12
	movq	40(%rsi),%r13

__add_mod_384_a_is_loaded:
	addq	0(%rdx),%r8
	adcq	8(%rdx),%r9
	adcq	16(%rdx),%r10
	movq	%r8,%r14
	adcq	24(%rdx),%r11
	movq	%r9,%r15
	adcq	32(%rdx),%r12
	movq	%r10,%rax
	adcq	40(%rdx),%r13
	movq	%r11,%rbx
	sbbq	%rdx,%rdx

	subq	0(%rcx),%r8
	sbbq	8(%rcx),%r9
	movq	%r12,%rbp
	sbbq	16(%rcx),%r10
	sbbq	24(%rcx),%r11
	sbbq	32(%rcx),%r12
	movq	%r13,%rsi
	sbbq	40(%rcx),%r13
	sbbq	$0,%rdx

	cmovcq	%r14,%r8
	cmovcq	%r15,%r9
	cmovcq	%rax,%r10
	movq	%r8,0(%rdi)
	cmovcq	%rbx,%r11
	movq	%r9,8(%rdi)
	cmovcq	%rbp,%r12
	movq	%r10,16(%rdi)
	cmovcq	%rsi,%r13
	movq	%r11,24(%rdi)
	movq	%r12,32(%rdi)
	movq	%r13,40(%rdi)

	.byte	0xf3,0xc3
.cfi_endproc
.size	__add_mod_384,.-__add_mod_384

.globl	add_mod_384x
.hidden	add_mod_384x
.type	add_mod_384x,@function
.align	32
add_mod_384x:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	subq	$24,%rsp
.cfi_adjust_cfa_offset	24


	movq	%rsi,0(%rsp)
	movq	%rdx,8(%rsp)
	leaq	48(%rsi),%rsi
	leaq	48(%rdx),%rdx
	leaq	48(%rdi),%rdi
	call	__add_mod_384

	movq	0(%rsp),%rsi
	movq	8(%rsp),%rdx
	leaq	-48(%rdi),%rdi
	call	__add_mod_384

	movq	24+0(%rsp),%r15
.cfi_restore	%r15
	movq	24+8(%rsp),%r14
.cfi_restore	%r14
	movq	24+16(%rsp),%r13
.cfi_restore	%r13
	movq	24+24(%rsp),%r12
.cfi_restore	%r12
	movq	24+32(%rsp),%rbx
.cfi_restore	%rbx
	movq	24+40(%rsp),%rbp
.cfi_restore	%rbp
	leaq	24+48(%rsp),%rsp
.cfi_adjust_cfa_offset	-24-8*6

	.byte	0xf3,0xc3
.cfi_endproc	
.size	add_mod_384x,.-add_mod_384x


.globl	rshift_mod_384
.hidden	rshift_mod_384
.type	rshift_mod_384,@function
.align	32
rshift_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	pushq	%rdi
.cfi_adjust_cfa_offset	8


	movq	0(%rsi),%r8
	movq	8(%rsi),%r9
	movq	16(%rsi),%r10
	movq	24(%rsi),%r11
	movq	32(%rsi),%r12
	movq	40(%rsi),%r13

.Loop_rshift_mod_384:
	call	__rshift_mod_384
	decl	%edx
	jnz	.Loop_rshift_mod_384

	movq	%r8,0(%rdi)
	movq	%r9,8(%rdi)
	movq	%r10,16(%rdi)
	movq	%r11,24(%rdi)
	movq	%r12,32(%rdi)
	movq	%r13,40(%rdi)

	movq	8(%rsp),%r15
.cfi_restore	%r15
	movq	16(%rsp),%r14
.cfi_restore	%r14
	movq	24(%rsp),%r13
.cfi_restore	%r13
	movq	32(%rsp),%r12
.cfi_restore	%r12
	movq	40(%rsp),%rbx
.cfi_restore	%rbx
	movq	48(%rsp),%rbp
.cfi_restore	%rbp
	leaq	56(%rsp),%rsp
.cfi_adjust_cfa_offset	-56

	.byte	0xf3,0xc3
.cfi_endproc	
.size	rshift_mod_384,.-rshift_mod_384

.type	__rshift_mod_384,@function
.align	32
__rshift_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa

	movq	$1,%rsi
	movq	0(%rcx),%r14
	andq	%r8,%rsi
	movq	8(%rcx),%r15
	negq	%rsi
	movq	16(%rcx),%rax
	andq	%rsi,%r14
	movq	24(%rcx),%rbx
	andq	%rsi,%r15
	movq	32(%rcx),%rbp
	andq	%rsi,%rax
	andq	%rsi,%rbx
	andq	%rsi,%rbp
	andq	40(%rcx),%rsi

	addq	%r8,%r14
	adcq	%r9,%r15
	adcq	%r10,%rax
	adcq	%r11,%rbx
	adcq	%r12,%rbp
	adcq	%r13,%rsi
	sbbq	%r13,%r13

	shrq	$1,%r14
	movq	%r15,%r8
	shrq	$1,%r15
	movq	%rax,%r9
	shrq	$1,%rax
	movq	%rbx,%r10
	shrq	$1,%rbx
	movq	%rbp,%r11
	shrq	$1,%rbp
	movq	%rsi,%r12
	shrq	$1,%rsi
	shlq	$63,%r8
	shlq	$63,%r9
	orq	%r14,%r8
	shlq	$63,%r10
	orq	%r15,%r9
	shlq	$63,%r11
	orq	%rax,%r10
	shlq	$63,%r12
	orq	%rbx,%r11
	shlq	$63,%r13
	orq	%rbp,%r12
	orq	%rsi,%r13

	.byte	0xf3,0xc3
.cfi_endproc
.size	__rshift_mod_384,.-__rshift_mod_384

.globl	div_by_2_mod_384
.hidden	div_by_2_mod_384
.type	div_by_2_mod_384,@function
.align	32
div_by_2_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	pushq	%rdi
.cfi_adjust_cfa_offset	8


	movq	0(%rsi),%r8
	movq	%rdx,%rcx
	movq	8(%rsi),%r9
	movq	16(%rsi),%r10
	movq	24(%rsi),%r11
	movq	32(%rsi),%r12
	movq	40(%rsi),%r13

	call	__rshift_mod_384

	movq	%r8,0(%rdi)
	movq	%r9,8(%rdi)
	movq	%r10,16(%rdi)
	movq	%r11,24(%rdi)
	movq	%r12,32(%rdi)
	movq	%r13,40(%rdi)

	movq	8(%rsp),%r15
.cfi_restore	%r15
	movq	16(%rsp),%r14
.cfi_restore	%r14
	movq	24(%rsp),%r13
.cfi_restore	%r13
	movq	32(%rsp),%r12
.cfi_restore	%r12
	movq	40(%rsp),%rbx
.cfi_restore	%rbx
	movq	48(%rsp),%rbp
.cfi_restore	%rbp
	leaq	56(%rsp),%rsp
.cfi_adjust_cfa_offset	-56

	.byte	0xf3,0xc3
.cfi_endproc	
.size	div_by_2_mod_384,.-div_by_2_mod_384


.globl	lshift_mod_384
.hidden	lshift_mod_384
.type	lshift_mod_384,@function
.align	32
lshift_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	pushq	%rdi
.cfi_adjust_cfa_offset	8


	movq	0(%rsi),%r8
	movq	8(%rsi),%r9
	movq	16(%rsi),%r10
	movq	24(%rsi),%r11
	movq	32(%rsi),%r12
	movq	40(%rsi),%r13

.Loop_lshift_mod_384:
	addq	%r8,%r8
	adcq	%r9,%r9
	adcq	%r10,%r10
	movq	%r8,%r14
	adcq	%r11,%r11
	movq	%r9,%r15
	adcq	%r12,%r12
	movq	%r10,%rax
	adcq	%r13,%r13
	movq	%r11,%rbx
	sbbq	%rdi,%rdi

	subq	0(%rcx),%r8
	sbbq	8(%rcx),%r9
	movq	%r12,%rbp
	sbbq	16(%rcx),%r10
	sbbq	24(%rcx),%r11
	sbbq	32(%rcx),%r12
	movq	%r13,%rsi
	sbbq	40(%rcx),%r13
	sbbq	$0,%rdi

	movq	(%rsp),%rdi
	cmovcq	%r14,%r8
	cmovcq	%r15,%r9
	cmovcq	%rax,%r10
	cmovcq	%rbx,%r11
	cmovcq	%rbp,%r12
	cmovcq	%rsi,%r13

	decl	%edx
	jnz	.Loop_lshift_mod_384

	movq	%r8,0(%rdi)
	movq	%r9,8(%rdi)
	movq	%r10,16(%rdi)
	movq	%r11,24(%rdi)
	movq	%r12,32(%rdi)
	movq	%r13,40(%rdi)

	movq	8(%rsp),%r15
.cfi_restore	%r15
	movq	16(%rsp),%r14
.cfi_restore	%r14
	movq	24(%rsp),%r13
.cfi_restore	%r13
	movq	32(%rsp),%r12
.cfi_restore	%r12
	movq	40(%rsp),%rbx
.cfi_restore	%rbx
	movq	48(%rsp),%rbp
.cfi_restore	%rbp
	leaq	56(%rsp),%rsp
.cfi_adjust_cfa_offset	-56

	.byte	0xf3,0xc3
.cfi_endproc	
.size	lshift_mod_384,.-lshift_mod_384

.type	__lshift_mod_384,@function
.align	32
__lshift_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa

	addq	%r8,%r8
	adcq	%r9,%r9
	adcq	%r10,%r10
	movq	%r8,%r14
	adcq	%r11,%r11
	movq	%r9,%r15
	adcq	%r12,%r12
	movq	%r10,%rax
	adcq	%r13,%r13
	movq	%r11,%rbx
	sbbq	%rdx,%rdx

	subq	0(%rcx),%r8
	sbbq	8(%rcx),%r9
	movq	%r12,%rbp
	sbbq	16(%rcx),%r10
	sbbq	24(%rcx),%r11
	sbbq	32(%rcx),%r12
	movq	%r13,%rsi
	sbbq	40(%rcx),%r13
	sbbq	$0,%rdx

	cmovcq	%r14,%r8
	cmovcq	%r15,%r9
	cmovcq	%rax,%r10
	cmovcq	%rbx,%r11
	cmovcq	%rbp,%r12
	cmovcq	%rsi,%r13

	.byte	0xf3,0xc3
.cfi_endproc
.size	__lshift_mod_384,.-__lshift_mod_384


.globl	mul_by_3_mod_384
.hidden	mul_by_3_mod_384
.type	mul_by_3_mod_384,@function
.align	32
mul_by_3_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	pushq	%rsi
.cfi_adjust_cfa_offset	8


	movq	0(%rsi),%r8
	movq	8(%rsi),%r9
	movq	16(%rsi),%r10
	movq	24(%rsi),%r11
	movq	32(%rsi),%r12
	movq	40(%rsi),%r13
	movq	%rdx,%rcx

	call	__lshift_mod_384

	movq	(%rsp),%rdx
	call	__add_mod_384_a_is_loaded

	movq	8(%rsp),%r15
.cfi_restore	%r15
	movq	16(%rsp),%r14
.cfi_restore	%r14
	movq	24(%rsp),%r13
.cfi_restore	%r13
	movq	32(%rsp),%r12
.cfi_restore	%r12
	movq	40(%rsp),%rbx
.cfi_restore	%rbx
	movq	48(%rsp),%rbp
.cfi_restore	%rbp
	leaq	56(%rsp),%rsp
.cfi_adjust_cfa_offset	-56

	.byte	0xf3,0xc3
.cfi_endproc	
.size	mul_by_3_mod_384,.-mul_by_3_mod_384

.globl	mul_by_8_mod_384
.hidden	mul_by_8_mod_384
.type	mul_by_8_mod_384,@function
.align	32
mul_by_8_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	subq	$8,%rsp
.cfi_adjust_cfa_offset	8


	movq	0(%rsi),%r8
	movq	8(%rsi),%r9
	movq	16(%rsi),%r10
	movq	24(%rsi),%r11
	movq	32(%rsi),%r12
	movq	40(%rsi),%r13
	movq	%rdx,%rcx

	call	__lshift_mod_384
	call	__lshift_mod_384
	call	__lshift_mod_384

	movq	%r8,0(%rdi)
	movq	%r9,8(%rdi)
	movq	%r10,16(%rdi)
	movq	%r11,24(%rdi)
	movq	%r12,32(%rdi)
	movq	%r13,40(%rdi)

	movq	8(%rsp),%r15
.cfi_restore	%r15
	movq	16(%rsp),%r14
.cfi_restore	%r14
	movq	24(%rsp),%r13
.cfi_restore	%r13
	movq	32(%rsp),%r12
.cfi_restore	%r12
	movq	40(%rsp),%rbx
.cfi_restore	%rbx
	movq	48(%rsp),%rbp
.cfi_restore	%rbp
	leaq	56(%rsp),%rsp
.cfi_adjust_cfa_offset	-56

	.byte	0xf3,0xc3
.cfi_endproc	
.size	mul_by_8_mod_384,.-mul_by_8_mod_384


.globl	mul_by_3_mod_384x
.hidden	mul_by_3_mod_384x
.type	mul_by_3_mod_384x,@function
.align	32
mul_by_3_mod_384x:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	pushq	%rsi
.cfi_adjust_cfa_offset	8


	movq	0(%rsi),%r8
	movq	8(%rsi),%r9
	movq	16(%rsi),%r10
	movq	24(%rsi),%r11
	movq	32(%rsi),%r12
	movq	40(%rsi),%r13
	movq	%rdx,%rcx

	call	__lshift_mod_384

	movq	(%rsp),%rdx
	call	__add_mod_384_a_is_loaded

	movq	(%rsp),%rsi
	leaq	48(%rdi),%rdi

	movq	48(%rsi),%r8
	movq	56(%rsi),%r9
	movq	64(%rsi),%r10
	movq	72(%rsi),%r11
	movq	80(%rsi),%r12
	movq	88(%rsi),%r13

	call	__lshift_mod_384

	movq	$48,%rdx
	addq	(%rsp),%rdx
	call	__add_mod_384_a_is_loaded

	movq	8(%rsp),%r15
.cfi_restore	%r15
	movq	16(%rsp),%r14
.cfi_restore	%r14
	movq	24(%rsp),%r13
.cfi_restore	%r13
	movq	32(%rsp),%r12
.cfi_restore	%r12
	movq	40(%rsp),%rbx
.cfi_restore	%rbx
	movq	48(%rsp),%rbp
.cfi_restore	%rbp
	leaq	56(%rsp),%rsp
.cfi_adjust_cfa_offset	-56

	.byte	0xf3,0xc3
.cfi_endproc	
.size	mul_by_3_mod_384x,.-mul_by_3_mod_384x

.globl	mul_by_8_mod_384x
.hidden	mul_by_8_mod_384x
.type	mul_by_8_mod_384x,@function
.align	32
mul_by_8_mod_384x:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	pushq	%rsi
.cfi_adjust_cfa_offset	8


	movq	0(%rsi),%r8
	movq	8(%rsi),%r9
	movq	16(%rsi),%r10
	movq	24(%rsi),%r11
	movq	32(%rsi),%r12
	movq	40(%rsi),%r13
	movq	%rdx,%rcx

	call	__lshift_mod_384
	call	__lshift_mod_384
	call	__lshift_mod_384

	movq	(%rsp),%rsi
	movq	%r8,0(%rdi)
	movq	%r9,8(%rdi)
	movq	%r10,16(%rdi)
	movq	%r11,24(%rdi)
	movq	%r12,32(%rdi)
	movq	%r13,40(%rdi)

	movq	48+0(%rsi),%r8
	movq	48+8(%rsi),%r9
	movq	48+16(%rsi),%r10
	movq	48+24(%rsi),%r11
	movq	48+32(%rsi),%r12
	movq	48+40(%rsi),%r13

	call	__lshift_mod_384
	call	__lshift_mod_384
	call	__lshift_mod_384

	movq	%r8,48+0(%rdi)
	movq	%r9,48+8(%rdi)
	movq	%r10,48+16(%rdi)
	movq	%r11,48+24(%rdi)
	movq	%r12,48+32(%rdi)
	movq	%r13,48+40(%rdi)

	movq	8(%rsp),%r15
.cfi_restore	%r15
	movq	16(%rsp),%r14
.cfi_restore	%r14
	movq	24(%rsp),%r13
.cfi_restore	%r13
	movq	32(%rsp),%r12
.cfi_restore	%r12
	movq	40(%rsp),%rbx
.cfi_restore	%rbx
	movq	48(%rsp),%rbp
.cfi_restore	%rbp
	leaq	56(%rsp),%rsp
.cfi_adjust_cfa_offset	-56

	.byte	0xf3,0xc3
.cfi_endproc	
.size	mul_by_8_mod_384x,.-mul_by_8_mod_384x


.globl	cneg_mod_384
.hidden	cneg_mod_384
.type	cneg_mod_384,@function
.align	32
cneg_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	pushq	%rdx
.cfi_adjust_cfa_offset	8


	movq	0(%rsi),%rdx
	movq	8(%rsi),%r9
	movq	16(%rsi),%r10
	movq	%rdx,%r8
	movq	24(%rsi),%r11
	orq	%r9,%rdx
	movq	32(%rsi),%r12
	orq	%r10,%rdx
	movq	40(%rsi),%r13
	orq	%r11,%rdx
	movq	$-1,%rsi
	orq	%r12,%rdx
	orq	%r13,%rdx

	movq	0(%rcx),%r14
	cmovnzq	%rsi,%rdx
	movq	8(%rcx),%r15
	movq	16(%rcx),%rax
	andq	%rdx,%r14
	movq	24(%rcx),%rbx
	andq	%rdx,%r15
	movq	32(%rcx),%rbp
	andq	%rdx,%rax
	movq	40(%rcx),%rsi
	andq	%rdx,%rbx
	movq	0(%rsp),%rcx
	andq	%rdx,%rbp
	andq	%rdx,%rsi

	subq	%r8,%r14
	sbbq	%r9,%r15
	sbbq	%r10,%rax
	sbbq	%r11,%rbx
	sbbq	%r12,%rbp
	sbbq	%r13,%rsi

	orq	%rcx,%rcx

	cmovzq	%r8,%r14
	cmovzq	%r9,%r15
	cmovzq	%r10,%rax
	movq	%r14,0(%rdi)
	cmovzq	%r11,%rbx
	movq	%r15,8(%rdi)
	cmovzq	%r12,%rbp
	movq	%rax,16(%rdi)
	cmovzq	%r13,%rsi
	movq	%rbx,24(%rdi)
	movq	%rbp,32(%rdi)
	movq	%rsi,40(%rdi)

	movq	8(%rsp),%r15
.cfi_restore	%r15
	movq	16(%rsp),%r14
.cfi_restore	%r14
	movq	24(%rsp),%r13
.cfi_restore	%r13
	movq	32(%rsp),%r12
.cfi_restore	%r12
	movq	40(%rsp),%rbx
.cfi_restore	%rbx
	movq	48(%rsp),%rbp
.cfi_restore	%rbp
	leaq	56(%rsp),%rsp
.cfi_adjust_cfa_offset	-56

	.byte	0xf3,0xc3
.cfi_endproc	
.size	cneg_mod_384,.-cneg_mod_384


.globl	sub_mod_384
.hidden	sub_mod_384
.type	sub_mod_384,@function
.align	32
sub_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	subq	$8,%rsp
.cfi_adjust_cfa_offset	8


	call	__sub_mod_384

	movq	8(%rsp),%r15
.cfi_restore	%r15
	movq	16(%rsp),%r14
.cfi_restore	%r14
	movq	24(%rsp),%r13
.cfi_restore	%r13
	movq	32(%rsp),%r12
.cfi_restore	%r12
	movq	40(%rsp),%rbx
.cfi_restore	%rbx
	movq	48(%rsp),%rbp
.cfi_restore	%rbp
	leaq	56(%rsp),%rsp
.cfi_adjust_cfa_offset	-56

	.byte	0xf3,0xc3
.cfi_endproc	
.size	sub_mod_384,.-sub_mod_384

.type	__sub_mod_384,@function
.align	32
__sub_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa

	movq	0(%rsi),%r8
	movq	8(%rsi),%r9
	movq	16(%rsi),%r10
	movq	24(%rsi),%r11
	movq	32(%rsi),%r12
	movq	40(%rsi),%r13

	subq	0(%rdx),%r8
	movq	0(%rcx),%r14
	sbbq	8(%rdx),%r9
	movq	8(%rcx),%r15
	sbbq	16(%rdx),%r10
	movq	16(%rcx),%rax
	sbbq	24(%rdx),%r11
	movq	24(%rcx),%rbx
	sbbq	32(%rdx),%r12
	movq	32(%rcx),%rbp
	sbbq	40(%rdx),%r13
	movq	40(%rcx),%rsi
	sbbq	%rdx,%rdx

	andq	%rdx,%r14
	andq	%rdx,%r15
	andq	%rdx,%rax
	andq	%rdx,%rbx
	andq	%rdx,%rbp
	andq	%rdx,%rsi

	addq	%r14,%r8
	adcq	%r15,%r9
	movq	%r8,0(%rdi)
	adcq	%rax,%r10
	movq	%r9,8(%rdi)
	adcq	%rbx,%r11
	movq	%r10,16(%rdi)
	adcq	%rbp,%r12
	movq	%r11,24(%rdi)
	adcq	%rsi,%r13
	movq	%r12,32(%rdi)
	movq	%r13,40(%rdi)

	.byte	0xf3,0xc3
.cfi_endproc
.size	__sub_mod_384,.-__sub_mod_384

.globl	sub_mod_384x
.hidden	sub_mod_384x
.type	sub_mod_384x,@function
.align	32
sub_mod_384x:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	subq	$24,%rsp
.cfi_adjust_cfa_offset	24


	movq	%rsi,0(%rsp)
	movq	%rdx,8(%rsp)
	leaq	48(%rsi),%rsi
	leaq	48(%rdx),%rdx
	leaq	48(%rdi),%rdi
	call	__sub_mod_384

	movq	0(%rsp),%rsi
	movq	8(%rsp),%rdx
	leaq	-48(%rdi),%rdi
	call	__sub_mod_384

	movq	24+0(%rsp),%r15
.cfi_restore	%r15
	movq	24+8(%rsp),%r14
.cfi_restore	%r14
	movq	24+16(%rsp),%r13
.cfi_restore	%r13
	movq	24+24(%rsp),%r12
.cfi_restore	%r12
	movq	24+32(%rsp),%rbx
.cfi_restore	%rbx
	movq	24+40(%rsp),%rbp
.cfi_restore	%rbp
	leaq	24+48(%rsp),%rsp
.cfi_adjust_cfa_offset	-24-8*6

	.byte	0xf3,0xc3
.cfi_endproc	
.size	sub_mod_384x,.-sub_mod_384x
.globl	mul_by_1_plus_i_mod_384x
.hidden	mul_by_1_plus_i_mod_384x
.type	mul_by_1_plus_i_mod_384x,@function
.align	32
mul_by_1_plus_i_mod_384x:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
	subq	$56,%rsp
.cfi_adjust_cfa_offset	56


	movq	0(%rsi),%r8
	movq	8(%rsi),%r9
	movq	16(%rsi),%r10
	movq	24(%rsi),%r11
	movq	32(%rsi),%r12
	movq	40(%rsi),%r13

	movq	%r8,%r14
	addq	48(%rsi),%r8
	movq	%r9,%r15
	adcq	56(%rsi),%r9
	movq	%r10,%rax
	adcq	64(%rsi),%r10
	movq	%r11,%rbx
	adcq	72(%rsi),%r11
	movq	%r12,%rcx
	adcq	80(%rsi),%r12
	movq	%r13,%rbp
	adcq	88(%rsi),%r13
	movq	%rdi,48(%rsp)
	sbbq	%rdi,%rdi

	subq	48(%rsi),%r14
	sbbq	56(%rsi),%r15
	sbbq	64(%rsi),%rax
	sbbq	72(%rsi),%rbx
	sbbq	80(%rsi),%rcx
	sbbq	88(%rsi),%rbp
	sbbq	%rsi,%rsi

	movq	%r8,0(%rsp)
	movq	0(%rdx),%r8
	movq	%r9,8(%rsp)
	movq	8(%rdx),%r9
	movq	%r10,16(%rsp)
	movq	16(%rdx),%r10
	movq	%r11,24(%rsp)
	movq	24(%rdx),%r11
	movq	%r12,32(%rsp)
	andq	%rsi,%r8
	movq	32(%rdx),%r12
	movq	%r13,40(%rsp)
	andq	%rsi,%r9
	movq	40(%rdx),%r13
	andq	%rsi,%r10
	andq	%rsi,%r11
	andq	%rsi,%r12
	andq	%rsi,%r13
	movq	48(%rsp),%rsi

	addq	%r8,%r14
	movq	0(%rsp),%r8
	adcq	%r9,%r15
	movq	8(%rsp),%r9
	adcq	%r10,%rax
	movq	16(%rsp),%r10
	adcq	%r11,%rbx
	movq	24(%rsp),%r11
	adcq	%r12,%rcx
	movq	32(%rsp),%r12
	adcq	%r13,%rbp
	movq	40(%rsp),%r13

	movq	%r14,0(%rsi)
	movq	%r8,%r14
	movq	%r15,8(%rsi)
	movq	%rax,16(%rsi)
	movq	%r9,%r15
	movq	%rbx,24(%rsi)
	movq	%rcx,32(%rsi)
	movq	%r10,%rax
	movq	%rbp,40(%rsi)

	subq	0(%rdx),%r8
	movq	%r11,%rbx
	sbbq	8(%rdx),%r9
	sbbq	16(%rdx),%r10
	movq	%r12,%rcx
	sbbq	24(%rdx),%r11
	sbbq	32(%rdx),%r12
	movq	%r13,%rbp
	sbbq	40(%rdx),%r13
	sbbq	$0,%rdi

	cmovcq	%r14,%r8
	cmovcq	%r15,%r9
	cmovcq	%rax,%r10
	movq	%r8,48(%rsi)
	cmovcq	%rbx,%r11
	movq	%r9,56(%rsi)
	cmovcq	%rcx,%r12
	movq	%r10,64(%rsi)
	cmovcq	%rbp,%r13
	movq	%r11,72(%rsi)
	movq	%r12,80(%rsi)
	movq	%r13,88(%rsi)

	movq	56+0(%rsp),%r15
.cfi_restore	%r15
	movq	56+8(%rsp),%r14
.cfi_restore	%r14
	movq	56+16(%rsp),%r13
.cfi_restore	%r13
	movq	56+24(%rsp),%r12
.cfi_restore	%r12
	movq	56+32(%rsp),%rbx
.cfi_restore	%rbx
	movq	56+40(%rsp),%rbp
.cfi_restore	%rbp
	leaq	56+48(%rsp),%rsp
.cfi_adjust_cfa_offset	-56-8*6

	.byte	0xf3,0xc3
.cfi_endproc	
.size	mul_by_1_plus_i_mod_384x,.-mul_by_1_plus_i_mod_384x
.globl	sgn0_pty_mod_384
.hidden	sgn0_pty_mod_384
.type	sgn0_pty_mod_384,@function
.align	32
sgn0_pty_mod_384:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa



	movq	0(%rdi),%r8
	movq	8(%rdi),%r9
	movq	16(%rdi),%r10
	movq	24(%rdi),%r11
	movq	32(%rdi),%rcx
	movq	40(%rdi),%rdx

	xorq	%rax,%rax
	movq	%r8,%rdi
	addq	%r8,%r8
	adcq	%r9,%r9
	adcq	%r10,%r10
	adcq	%r11,%r11
	adcq	%rcx,%rcx
	adcq	%rdx,%rdx
	adcq	$0,%rax

	subq	0(%rsi),%r8
	sbbq	8(%rsi),%r9
	sbbq	16(%rsi),%r10
	sbbq	24(%rsi),%r11
	sbbq	32(%rsi),%rcx
	sbbq	40(%rsi),%rdx
	sbbq	$0,%rax

	notq	%rax
	andq	$1,%rdi
	andq	$2,%rax
	orq	%rdi,%rax


	.byte	0xf3,0xc3
.cfi_endproc	
.size	sgn0_pty_mod_384,.-sgn0_pty_mod_384

.globl	sgn0_pty_mod_384x
.hidden	sgn0_pty_mod_384x
.type	sgn0_pty_mod_384x,@function
.align	32
sgn0_pty_mod_384x:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa


	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	subq	$8,%rsp
.cfi_adjust_cfa_offset	8


	movq	48(%rdi),%r8
	movq	56(%rdi),%r9
	movq	64(%rdi),%r10
	movq	72(%rdi),%r11
	movq	80(%rdi),%rcx
	movq	88(%rdi),%rdx

	movq	%r8,%rbx
	orq	%r9,%r8
	orq	%r10,%r8
	orq	%r11,%r8
	orq	%rcx,%r8
	orq	%rdx,%r8

	leaq	0(%rdi),%rax
	xorq	%rdi,%rdi
	movq	%rbx,%rbp
	addq	%rbx,%rbx
	adcq	%r9,%r9
	adcq	%r10,%r10
	adcq	%r11,%r11
	adcq	%rcx,%rcx
	adcq	%rdx,%rdx
	adcq	$0,%rdi

	subq	0(%rsi),%rbx
	sbbq	8(%rsi),%r9
	sbbq	16(%rsi),%r10
	sbbq	24(%rsi),%r11
	sbbq	32(%rsi),%rcx
	sbbq	40(%rsi),%rdx
	sbbq	$0,%rdi

	movq	%r8,0(%rsp)
	notq	%rdi
	andq	$1,%rbp
	andq	$2,%rdi
	orq	%rbp,%rdi

	movq	0(%rax),%r8
	movq	8(%rax),%r9
	movq	16(%rax),%r10
	movq	24(%rax),%r11
	movq	32(%rax),%rcx
	movq	40(%rax),%rdx

	movq	%r8,%rbx
	orq	%r9,%r8
	orq	%r10,%r8
	orq	%r11,%r8
	orq	%rcx,%r8
	orq	%rdx,%r8

	xorq	%rax,%rax
	movq	%rbx,%rbp
	addq	%rbx,%rbx
	adcq	%r9,%r9
	adcq	%r10,%r10
	adcq	%r11,%r11
	adcq	%rcx,%rcx
	adcq	%rdx,%rdx
	adcq	$0,%rax

	subq	0(%rsi),%rbx
	sbbq	8(%rsi),%r9
	sbbq	16(%rsi),%r10
	sbbq	24(%rsi),%r11
	sbbq	32(%rsi),%rcx
	sbbq	40(%rsi),%rdx
	sbbq	$0,%rax

	movq	0(%rsp),%rbx

	notq	%rax

	testq	%r8,%r8
	cmovzq	%rdi,%rbp

	testq	%rbx,%rbx
	cmovnzq	%rdi,%rax

	andq	$1,%rbp
	andq	$2,%rax
	orq	%rbp,%rax

	movq	8(%rsp),%rbx
.cfi_restore	%rbx
	movq	16(%rsp),%rbp
.cfi_restore	%rbp
	leaq	24(%rsp),%rsp
.cfi_adjust_cfa_offset	-24

	.byte	0xf3,0xc3
.cfi_endproc	
.size	sgn0_pty_mod_384x,.-sgn0_pty_mod_384x
.globl	vec_select_48
.hidden	vec_select_48
.type	vec_select_48,@function
.align	32
vec_select_48:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa

	movd	%ecx,%xmm5
	pxor	%xmm4,%xmm4
	pshufd	$0,%xmm5,%xmm5
	movdqu	(%rsi),%xmm0
	leaq	24(%rsi),%rsi
	pcmpeqd	%xmm4,%xmm5
	movdqu	(%rdx),%xmm1
	leaq	24(%rdx),%rdx
	pcmpeqd	%xmm5,%xmm4
	leaq	24(%rdi),%rdi
	pand	%xmm4,%xmm0
	movdqu	0+16-24(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	0+16-24(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,0-24(%rdi)
	pand	%xmm4,%xmm2
	movdqu	16+16-24(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	16+16-24(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,16-24(%rdi)
	pand	%xmm4,%xmm0
	pand	%xmm5,%xmm1
	por	%xmm1,%xmm0
	movdqu	%xmm0,32-24(%rdi)
	.byte	0xf3,0xc3
.cfi_endproc
.size	vec_select_48,.-vec_select_48
.globl	vec_select_96
.hidden	vec_select_96
.type	vec_select_96,@function
.align	32
vec_select_96:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa

	movd	%ecx,%xmm5
	pxor	%xmm4,%xmm4
	pshufd	$0,%xmm5,%xmm5
	movdqu	(%rsi),%xmm0
	leaq	48(%rsi),%rsi
	pcmpeqd	%xmm4,%xmm5
	movdqu	(%rdx),%xmm1
	leaq	48(%rdx),%rdx
	pcmpeqd	%xmm5,%xmm4
	leaq	48(%rdi),%rdi
	pand	%xmm4,%xmm0
	movdqu	0+16-48(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	0+16-48(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,0-48(%rdi)
	pand	%xmm4,%xmm2
	movdqu	16+16-48(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	16+16-48(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,16-48(%rdi)
	pand	%xmm4,%xmm0
	movdqu	32+16-48(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	32+16-48(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,32-48(%rdi)
	pand	%xmm4,%xmm2
	movdqu	48+16-48(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	48+16-48(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,48-48(%rdi)
	pand	%xmm4,%xmm0
	movdqu	64+16-48(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	64+16-48(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,64-48(%rdi)
	pand	%xmm4,%xmm2
	pand	%xmm5,%xmm3
	por	%xmm3,%xmm2
	movdqu	%xmm2,80-48(%rdi)
	.byte	0xf3,0xc3
.cfi_endproc
.size	vec_select_96,.-vec_select_96
.globl	vec_select_192
.hidden	vec_select_192
.type	vec_select_192,@function
.align	32
vec_select_192:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa

	movd	%ecx,%xmm5
	pxor	%xmm4,%xmm4
	pshufd	$0,%xmm5,%xmm5
	movdqu	(%rsi),%xmm0
	leaq	96(%rsi),%rsi
	pcmpeqd	%xmm4,%xmm5
	movdqu	(%rdx),%xmm1
	leaq	96(%rdx),%rdx
	pcmpeqd	%xmm5,%xmm4
	leaq	96(%rdi),%rdi
	pand	%xmm4,%xmm0
	movdqu	0+16-96(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	0+16-96(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,0-96(%rdi)
	pand	%xmm4,%xmm2
	movdqu	16+16-96(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	16+16-96(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,16-96(%rdi)
	pand	%xmm4,%xmm0
	movdqu	32+16-96(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	32+16-96(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,32-96(%rdi)
	pand	%xmm4,%xmm2
	movdqu	48+16-96(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	48+16-96(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,48-96(%rdi)
	pand	%xmm4,%xmm0
	movdqu	64+16-96(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	64+16-96(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,64-96(%rdi)
	pand	%xmm4,%xmm2
	movdqu	80+16-96(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	80+16-96(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,80-96(%rdi)
	pand	%xmm4,%xmm0
	movdqu	96+16-96(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	96+16-96(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,96-96(%rdi)
	pand	%xmm4,%xmm2
	movdqu	112+16-96(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	112+16-96(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,112-96(%rdi)
	pand	%xmm4,%xmm0
	movdqu	128+16-96(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	128+16-96(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,128-96(%rdi)
	pand	%xmm4,%xmm2
	movdqu	144+16-96(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	144+16-96(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,144-96(%rdi)
	pand	%xmm4,%xmm0
	movdqu	160+16-96(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	160+16-96(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,160-96(%rdi)
	pand	%xmm4,%xmm2
	pand	%xmm5,%xmm3
	por	%xmm3,%xmm2
	movdqu	%xmm2,176-96(%rdi)
	.byte	0xf3,0xc3
.cfi_endproc
.size	vec_select_192,.-vec_select_192
.globl	vec_select_144
.hidden	vec_select_144
.type	vec_select_144,@function
.align	32
vec_select_144:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa

	movd	%ecx,%xmm5
	pxor	%xmm4,%xmm4
	pshufd	$0,%xmm5,%xmm5
	movdqu	(%rsi),%xmm0
	leaq	72(%rsi),%rsi
	pcmpeqd	%xmm4,%xmm5
	movdqu	(%rdx),%xmm1
	leaq	72(%rdx),%rdx
	pcmpeqd	%xmm5,%xmm4
	leaq	72(%rdi),%rdi
	pand	%xmm4,%xmm0
	movdqu	0+16-72(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	0+16-72(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,0-72(%rdi)
	pand	%xmm4,%xmm2
	movdqu	16+16-72(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	16+16-72(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,16-72(%rdi)
	pand	%xmm4,%xmm0
	movdqu	32+16-72(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	32+16-72(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,32-72(%rdi)
	pand	%xmm4,%xmm2
	movdqu	48+16-72(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	48+16-72(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,48-72(%rdi)
	pand	%xmm4,%xmm0
	movdqu	64+16-72(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	64+16-72(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,64-72(%rdi)
	pand	%xmm4,%xmm2
	movdqu	80+16-72(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	80+16-72(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,80-72(%rdi)
	pand	%xmm4,%xmm0
	movdqu	96+16-72(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	96+16-72(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,96-72(%rdi)
	pand	%xmm4,%xmm2
	movdqu	112+16-72(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	112+16-72(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,112-72(%rdi)
	pand	%xmm4,%xmm0
	pand	%xmm5,%xmm1
	por	%xmm1,%xmm0
	movdqu	%xmm0,128-72(%rdi)
	.byte	0xf3,0xc3
.cfi_endproc
.size	vec_select_144,.-vec_select_144
.globl	vec_select_288
.hidden	vec_select_288
.type	vec_select_288,@function
.align	32
vec_select_288:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa

	movd	%ecx,%xmm5
	pxor	%xmm4,%xmm4
	pshufd	$0,%xmm5,%xmm5
	movdqu	(%rsi),%xmm0
	leaq	144(%rsi),%rsi
	pcmpeqd	%xmm4,%xmm5
	movdqu	(%rdx),%xmm1
	leaq	144(%rdx),%rdx
	pcmpeqd	%xmm5,%xmm4
	leaq	144(%rdi),%rdi
	pand	%xmm4,%xmm0
	movdqu	0+16-144(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	0+16-144(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,0-144(%rdi)
	pand	%xmm4,%xmm2
	movdqu	16+16-144(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	16+16-144(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,16-144(%rdi)
	pand	%xmm4,%xmm0
	movdqu	32+16-144(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	32+16-144(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,32-144(%rdi)
	pand	%xmm4,%xmm2
	movdqu	48+16-144(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	48+16-144(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,48-144(%rdi)
	pand	%xmm4,%xmm0
	movdqu	64+16-144(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	64+16-144(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,64-144(%rdi)
	pand	%xmm4,%xmm2
	movdqu	80+16-144(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	80+16-144(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,80-144(%rdi)
	pand	%xmm4,%xmm0
	movdqu	96+16-144(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	96+16-144(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,96-144(%rdi)
	pand	%xmm4,%xmm2
	movdqu	112+16-144(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	112+16-144(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,112-144(%rdi)
	pand	%xmm4,%xmm0
	movdqu	128+16-144(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	128+16-144(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,128-144(%rdi)
	pand	%xmm4,%xmm2
	movdqu	144+16-144(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	144+16-144(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,144-144(%rdi)
	pand	%xmm4,%xmm0
	movdqu	160+16-144(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	160+16-144(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,160-144(%rdi)
	pand	%xmm4,%xmm2
	movdqu	176+16-144(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	176+16-144(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,176-144(%rdi)
	pand	%xmm4,%xmm0
	movdqu	192+16-144(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	192+16-144(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,192-144(%rdi)
	pand	%xmm4,%xmm2
	movdqu	208+16-144(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	208+16-144(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,208-144(%rdi)
	pand	%xmm4,%xmm0
	movdqu	224+16-144(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	224+16-144(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,224-144(%rdi)
	pand	%xmm4,%xmm2
	movdqu	240+16-144(%rsi),%xmm0
	pand	%xmm5,%xmm3
	movdqu	240+16-144(%rdx),%xmm1
	por	%xmm3,%xmm2
	movdqu	%xmm2,240-144(%rdi)
	pand	%xmm4,%xmm0
	movdqu	256+16-144(%rsi),%xmm2
	pand	%xmm5,%xmm1
	movdqu	256+16-144(%rdx),%xmm3
	por	%xmm1,%xmm0
	movdqu	%xmm0,256-144(%rdi)
	pand	%xmm4,%xmm2
	pand	%xmm5,%xmm3
	por	%xmm3,%xmm2
	movdqu	%xmm2,272-144(%rdi)
	.byte	0xf3,0xc3
.cfi_endproc
.size	vec_select_288,.-vec_select_288
.globl	vec_prefetch
.hidden	vec_prefetch
.type	vec_prefetch,@function
.align	32
vec_prefetch:
.cfi_startproc
	.byte	0xf3,0x0f,0x1e,0xfa

	leaq	-1(%rdi,%rsi,1),%rsi
	movq	$64,%rax
	xorq	%r8,%r8
	prefetchnta	(%rdi)
	leaq	(%rdi,%rax,1),%rdi
	cmpq	%rsi,%rdi
	cmovaq	%rsi,%rdi
	cmovaq	%r8,%rax
	prefetchnta	(%rdi)
	leaq	(%rdi,%rax,1),%rdi
	cmpq	%rsi,%rdi
	cmovaq	%rsi,%rdi
	cmovaq	%r8,%rax
	prefetchnta	(%rdi)
	leaq	(%rdi,%rax,1),%rdi
	cmpq	%rsi,%rdi
	cmovaq	%rsi,%rdi
	cmovaq	%r8,%rax
	prefetchnta	(%rdi)
	leaq	(%rdi,%rax,1),%rdi
	cmpq	%rsi,%rdi
	cmovaq	%rsi,%rdi
	cmovaq	%r8,%rax
	prefetchnta	(%rdi)
	leaq	(%rdi,%rax,1),%rdi
	cmpq	%rsi,%rdi
	cmovaq	%rsi,%rdi
	cmovaq	%r8,%rax
	prefetchnta	(%rdi)
	leaq	(%rdi,%rax,1),%rdi
	cmpq	%rsi,%rdi
	cmovaq	%rsi,%rdi
	prefetchnta	(%rdi)
	.byte	0xf3,0xc3
.cfi_endproc
.size	vec_prefetch,.-vec_prefetch

.section	.note.GNU-stack,"",@progbits
.section	.note.gnu.property,"a",@note
	.long	4,2f-1f,5
	.byte	0x47,0x4E,0x55,0
1:	.long	0xc0000002,4,3
.align	8
2:
