--- [プログラム2]   sum-omp.s

    1		.arch armv6
    2		.eabi_attribute 27, 3
    3		.eabi_attribute 28, 1
    4		.fpu vfp
    5		.eabi_attribute 20, 1
    6		.eabi_attribute 21, 1
    7		.eabi_attribute 23, 3
    8		.eabi_attribute 24, 1
    9		.eabi_attribute 25, 1
   10		.eabi_attribute 26, 2
   11		.eabi_attribute 30, 2
   12		.eabi_attribute 18, 4
   13		.file	"sum1.c"
   14		.global	__aeabi_idiv
   15		.text
   16		.align	2
   17		.type	sum._omp_fn.0, %function
   18	sum._omp_fn.0:
   19		@ args = 0, pretend = 0, frame = 0
   20		@ frame_needed = 0, uses_anonymous_args = 0
   21		stmfd	sp!, {r3, r4, r5, r6, r7, lr}
   22		mov	r5, r0
   23		bl	omp_get_thread_num
   24		mov	r6, r0
   25		bl	omp_get_num_threads
   26		mov	r1, r6
   27		mov	r4, r0
   28		mov	r2, r0
   29		ldr	r0, .L7
   30		bl	printf
   ;;;
   31		ldr	r7, [r5, #4]	; r7= n
   32		mov	r1, r4		; r4=全スレッド数
   33		mov	r0, r7 		; r0=r7= n
   34		bl	__aeabi_idiv	;r0 除算結果
   35		mul	r4, r0, r4	;;;あまり処理, r4= 除算結果*全スレッド数
   36		cmp	r4, r7
   37		moveq	r4, r0		; 割り切れたら r4=r0
   38		addne	r4, r0, #1	; 割り切れない r4=r0+1
   ;;;
   39		mul	r6, r4, r6	; r6(i初期値)= r4* r6 自スレッド番号
   ;;;
   40		add	r4, r6, r4	; r4(i終了値)=r6+r4
   41		cmp	r4, r7		; r4 vs n
   42		movlt	r7, r4		; r4<n ? r7(i終了値)=r4
   43		movge	r7, r7		; r4>=n ? r7(i終了値)=n , ループの一番最後 担当
   ;;;
   44		cmp	r6, r7
   45		movge	r3, #0		; r6(i初期値)==r7(i終了値) then r3=0 そして終了へjump
   46		bge	.L2
   ;;;
   47		ldr	r2, [r5, #0]	; r2= Aのアドレス
   48		mov	r3, #0		; r3:スレッド・ローカルなテンポラリ変数(スレッド個別の積算値) =0
   49		add	r2, r2, r6, asl #2  ;r2=r2+(r6*4), r2に担当部分の先頭アドレス
   ;;;
   50	.L3:
   51		ldr	r1, [r2], #4 	; r1 = *r2++
   52		add	r6, r6, #1	; r6++
   53		cmp	r6, r7		; r6==終了値? 
   54		add	r3, r3, r1	; テンポラリ変数へ積算
   55		bne	.L3
   ;;;
   56	.L2:
   57		add	r5, r5, #8
   58		mcr	p15, 0, r0, c7, c10, 5
   59		.LSYT45:
   60		ldrex	r2, [r5]
   61		add	r2, r2, r3
   62		strex	r1, r2, [r5]
   63		teq	r1, #0
   64		bne	.LSYT45
   65		.LSYB45:
   66		mcr	p15, 0, r0, c7, c10, 5
   67		ldmfd	sp!, {r3, r4, r5, r6, r7, lr}
   68		b	GOMP_barrier
   69	.L8:
   70		.align	2
   71	.L7:
   72		.word	.LC0
   73		.size	sum._omp_fn.0, .-sum._omp_fn.0
   74		.align	2
   75		.global	sum
   76		.type	sum, %function
   77	sum:
   78		@ args = 0, pretend = 0, frame = 16
   79		@ frame_needed = 0, uses_anonymous_args = 0
   80		str	lr, [sp, #-4]!
   81		sub	sp, sp, #20
   82		mov	r3, #0
   83		mov	r2, r3
   84		stmib	sp, {r0, r1}
   85		add	r1, sp, #4
   86		ldr	r0, .L10
   87		str	r3, [sp, #12]
   88		bl	GOMP_parallel_start
   89		add	r0, sp, #4
   90		bl	sum._omp_fn.0
   91		bl	GOMP_parallel_end
   92		ldr	r0, [sp, #12]
   93		add	sp, sp, #20
   94		ldmfd	sp!, {pc}
   95	.L11:
   96		.align	2
   97	.L10:
   98		.word	sum._omp_fn.0
   99		.size	sum, .-sum
  100		.section	.text.startup,"ax",%progbits
  101		.align	2
  102		.global	main
  103		.type	main, %function
  104	main:
  105		@ args = 0, pretend = 0, frame = 0
  106		@ frame_needed = 0, uses_anonymous_args = 0
  107		stmfd	sp!, {r4, lr}
  108		ldr	r0, .L15
  109		bl	malloc
  110		ldr	ip, .L15+4
  111		ldr	r2, .L15+8
  112		mov	r1, #0
  113		sub	r3, r0, #4
  114		str	r0, [ip, #0]
  115	.L13:
  116		str	r1, [r3, #4]!
  117		add	r1, r1, #1
  118		cmp	r1, r2
  119		bne	.L13
  120		bl	sum
  121		mov	r4, r0
  122		mov	r1, r0
  123		ldr	r0, .L15+12
  124		bl	printf
  125		mov	r0, r4
  126		bl	exit
  127	.L16:
  128		.align	2
  129	.L15:
  130		.word	200000
  131		.word	A
  132		.word	50000
  133		.word	.LC1
  134		.size	main, .-main
  135		.comm	A,4,4
  136		.section	.rodata.str1.4,"aMS",%progbits,1
  137		.align	2
  138	.LC0:
  139		.ascii	"threads = %d,%d\012\000"
  140		.space	3
  141	.LC1:
  142		.ascii	"sum = %d\012\000"
  143		.ident	"GCC: (Debian 4.6.3-14+rpi1) 4.6.3"
  144		.section	.note.GNU-stack,"",%progbits
--- END
