217 lines
7.6 KiB
Plaintext
217 lines
7.6 KiB
Plaintext
|
|
|
||
|
|
! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40
|
||
|
|
! *** Enabled options ***:
|
||
|
|
! *** maximum shadow width is 50 percent
|
||
|
|
! *** generated by SAPFOR
|
||
|
|
|
||
|
|
!-------------------------------------------------------------------------!
|
||
|
|
! !
|
||
|
|
! N A S P A R A L L E L B E N C H M A R K S 3.3.1 !
|
||
|
|
! !
|
||
|
|
! D V M H V E R S I O N !
|
||
|
|
! !
|
||
|
|
! S P !
|
||
|
|
! !
|
||
|
|
!-------------------------------------------------------------------------!
|
||
|
|
!-------------------------------------------------------------------------!
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
!
|
||
|
|
! Authors:
|
||
|
|
! Original:
|
||
|
|
! R. Van der Wijngaart
|
||
|
|
! W. Saphir
|
||
|
|
! H. Jin
|
||
|
|
! Optimize for DVMH:
|
||
|
|
! Kolganov A.S.
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
subroutine sp
|
||
|
|
|
||
|
|
include 'header_sp.h'
|
||
|
|
integer :: i,niter,step,fstatus,n3
|
||
|
|
external timer_read_sp
|
||
|
|
double precision :: mflops,t,tmax,timer_read_sp,trecs(t_last)
|
||
|
|
logical :: verified
|
||
|
|
character :: class
|
||
|
|
character :: t_names(t_last)*8
|
||
|
|
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
! Read input file (if it exists), else take
|
||
|
|
! defaults from parameters
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
open (unit = 2,file = 'timer.flag',status = 'old',iostat = fstatus
|
||
|
|
&)
|
||
|
|
if (fstatus .eq. 0) then
|
||
|
|
timeron = .TRUE.
|
||
|
|
t_names(t_total) = 'total'
|
||
|
|
t_names(t_rhsx) = 'rhsx'
|
||
|
|
t_names(t_rhsy) = 'rhsy'
|
||
|
|
t_names(t_rhsz) = 'rhsz'
|
||
|
|
t_names(t_rhs) = 'rhs'
|
||
|
|
t_names(t_xsolve) = 'xsolve'
|
||
|
|
t_names(t_ysolve) = 'ysolve'
|
||
|
|
t_names(t_zsolve) = 'zsolve'
|
||
|
|
t_names(t_rdis1) = 'redist1'
|
||
|
|
t_names(t_rdis2) = 'redist2'
|
||
|
|
t_names(t_tzetar) = 'tzetar'
|
||
|
|
t_names(t_ninvr) = 'ninvr'
|
||
|
|
t_names(t_pinvr) = 'pinvr'
|
||
|
|
t_names(t_txinvr) = 'txinvr'
|
||
|
|
t_names(t_add) = 'add'
|
||
|
|
close (unit = 2)
|
||
|
|
else
|
||
|
|
timeron = .FALSE.
|
||
|
|
endif
|
||
|
|
write (unit = *,fmt = 1000)
|
||
|
|
open (unit = 2,file = 'inputsp.data',status = 'old',iostat = fstat
|
||
|
|
&us)
|
||
|
|
if (fstatus .eq. 0) then
|
||
|
|
write (unit = *,fmt = 233)
|
||
|
|
233 format(' Reading from input file inputsp.data')
|
||
|
|
read (unit = 2,fmt = *) niter
|
||
|
|
read (unit = 2,fmt = *) dt
|
||
|
|
read (unit = 2,fmt = *) grid_points(1),grid_points(2),grid_poin
|
||
|
|
&ts(3)
|
||
|
|
close (unit = 2)
|
||
|
|
else
|
||
|
|
write (unit = *,fmt = 234)
|
||
|
|
niter = niter_default
|
||
|
|
dt = dt_default
|
||
|
|
grid_points(1) = problem_size
|
||
|
|
grid_points(2) = problem_size
|
||
|
|
grid_points(3) = problem_size
|
||
|
|
endif
|
||
|
|
234 format(' No input file inputsp.data. Using compiled defaults')
|
||
|
|
open (unit = 2,file = 'inputStage',status = 'old',iostat = fstatus
|
||
|
|
&)
|
||
|
|
if (fstatus .eq. 0) then
|
||
|
|
read (unit = 2,fmt = *) stage_n
|
||
|
|
close (unit = 2)
|
||
|
|
else
|
||
|
|
stage_n = 0
|
||
|
|
endif
|
||
|
|
write (unit = *,fmt = *) 'stage = ',stage_n
|
||
|
|
write (unit = *,fmt = 1001) problem_size,problem_size,problem_size
|
||
|
|
write (unit = *,fmt = 1002) niter,dt
|
||
|
|
write (unit = *,fmt = *)
|
||
|
|
1000 format(//, ' NAS Parallel Benchmarks (NPB3.3.1-DVMH)',
|
||
|
|
& ' - SP Benchmark', /)
|
||
|
|
1001 format(' Size: ', i4, 'x', i4, 'x', i4)
|
||
|
|
1002 format(' Iterations: ', i4, ' dt: ', F11.7)
|
||
|
|
1003 format(' Number of available threads: ', i5)
|
||
|
|
if (problem_size .gt. imax .or. problem_size .gt. jmax .or. proble
|
||
|
|
&m_size .gt. kmax) then
|
||
|
|
print *, (grid_points(i), i = 1,3)
|
||
|
|
print *, ' Problem size too big for compiled array sizes'
|
||
|
|
goto 999
|
||
|
|
endif
|
||
|
|
nx2 = problem_size - 2
|
||
|
|
ny2 = problem_size - 2
|
||
|
|
nz2 = problem_size - 2
|
||
|
|
call set_constants_sp()
|
||
|
|
call exact_rhs_sp()
|
||
|
|
call initialize_sp()
|
||
|
|
call adi_first_sp()
|
||
|
|
call adi_first_sp()
|
||
|
|
call initialize_sp()
|
||
|
|
do i = 1,t_last
|
||
|
|
call timer_clear_sp(i)
|
||
|
|
enddo
|
||
|
|
call timer_start_sp(1)
|
||
|
|
|
||
|
|
! DVM$ BARRIER
|
||
|
|
do step = 1,niter
|
||
|
|
if (mod (step,20) .eq. 0 .or. step .eq. 1) then
|
||
|
|
write (unit = *,fmt = 200) step
|
||
|
|
200 format(' Time step ', i4)
|
||
|
|
endif
|
||
|
|
call adi()
|
||
|
|
enddo
|
||
|
|
call timer_stop_sp(1)
|
||
|
|
tmax = timer_read_sp (1)
|
||
|
|
call verify_sp(niter,class,verified)
|
||
|
|
if (tmax .ne. 0.) then
|
||
|
|
n3 = problem_size * problem_size * problem_size
|
||
|
|
t = (problem_size + problem_size + problem_size) / 3.0
|
||
|
|
mflops = (881.174 * float (n3) - 4683.91 * t** 2 + 11484.5 * t
|
||
|
|
&- 19272.4) * float (niter) / (tmax * 1000000.0d0)
|
||
|
|
else
|
||
|
|
mflops = 0.0
|
||
|
|
endif
|
||
|
|
call print_results_sp('SP',class,problem_size,problem_size,
|
||
|
|
&problem_si
|
||
|
|
&ze,niter,tmax,mflops,' floating point',verified,npbversio
|
||
|
|
&n,compiletime,cs1,cs2,cs3,cs4,cs5,cs6,'(none)')
|
||
|
|
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
! More timers
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
if (.not.(timeron)) goto 999
|
||
|
|
do i = 1,t_last
|
||
|
|
trecs(i) = timer_read_sp (i)
|
||
|
|
enddo
|
||
|
|
if (tmax .eq. 0.0) tmax = 1.0
|
||
|
|
write (unit = *,fmt = 800)
|
||
|
|
800 format(' SECTION Time (secs)')
|
||
|
|
do i = 1,t_last
|
||
|
|
write (unit = *,fmt = 810) t_names(i),trecs(i),trecs(i) * 100.
|
||
|
|
&/ tmax
|
||
|
|
if (i .eq. t_rhs) then
|
||
|
|
t = trecs(t_rhsx) + trecs(t_rhsy) + trecs(t_rhsz)
|
||
|
|
write (unit = *,fmt = 820) 'sub-rhs',t,t * 100. / tmax
|
||
|
|
t = trecs(t_rhs) - t
|
||
|
|
write (unit = *,fmt = 820) 'rest-rhs',t,t * 100. / tmax
|
||
|
|
else if (i .eq. t_zsolve) then
|
||
|
|
t = trecs(t_zsolve) - trecs(t_rdis1) - trecs(t_rdis2)
|
||
|
|
write (unit = *,fmt = 820) 'sub-zsol',t,t * 100. / tmax
|
||
|
|
else if (i .eq. t_rdis2) then
|
||
|
|
t = trecs(t_rdis1) + trecs(t_rdis2)
|
||
|
|
write (unit = *,fmt = 820) 'redist',t,t * 100. / tmax
|
||
|
|
endif
|
||
|
|
810 format(2x,a8,':',f9.3,' (',f6.2,'%)')
|
||
|
|
820 format(' --> ',a8,':',f9.3,' (',f6.2,'%)')
|
||
|
|
enddo
|
||
|
|
999 continue
|
||
|
|
end
|
||
|
|
|
||
|
|
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
subroutine adi_first_sp ()
|
||
|
|
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
call compute_rhs_sp(1)
|
||
|
|
call x_solve_sp()
|
||
|
|
call y_solve_sp()
|
||
|
|
call z_solve_sp()
|
||
|
|
return
|
||
|
|
end
|
||
|
|
|
||
|
|
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
subroutine adi_sp ()
|
||
|
|
|
||
|
|
! DVM$ INTERVAL 1
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
!---------------------------------------------------------------------
|
||
|
|
call compute_rhs_sp(1)
|
||
|
|
|
||
|
|
! DVM$ INTERVAL 12
|
||
|
|
! DVM$ END INTERVAL
|
||
|
|
call x_solve_sp()
|
||
|
|
|
||
|
|
! DVM$ INTERVAL 13
|
||
|
|
! DVM$ END INTERVAL
|
||
|
|
call y_solve_sp()
|
||
|
|
|
||
|
|
! DVM$ INTERVAL 14
|
||
|
|
! DVM$ END INTERVAL
|
||
|
|
call z_solve_sp()
|
||
|
|
|
||
|
|
! DVM$ END INTERVAL
|
||
|
|
return
|
||
|
|
end
|
||
|
|
|