commit 784a8f2ec71279aa9bb397b8571074e3a0561746 Author: xnpster Date: Tue May 6 22:04:43 2025 +0300 initial diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bd27760 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +visualizer_data +out +v* +m* +*.dep +*.proj +.vscode \ No newline at end of file diff --git a/bt.for b/bt.for new file mode 100644 index 0000000..aed584a --- /dev/null +++ b/bt.for @@ -0,0 +1,129 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- + subroutine btdv3 + + include 'header3d_bt.h' + integer :: i,niter,step,fstatus,n3 + double precision :: navg,mflops + external timer_read_bt,verify_bt + double precision :: tmax,timer_read_bt + logical :: verified + character :: class + +!--------------------------------------------------------------------- +! Root node reads input file (if it exists) else takes +! defaults from parameters +!--------------------------------------------------------------------- + write (unit = *,fmt = 1000) + open (unit = 2,file = 'inputbt.data',status = 'old',iostat = fstat + &us) + if (fstatus .eq. 0) then + write (unit = *,fmt = 233) +233 format(' Reading from input file inputbt.data') + read (unit = 2,fmt = *) niter + read (unit = 2,fmt = *) dt + read (unit = 2,fmt = *) grid_points(1),grid_points(2),grid_poin + &ts(3) + close (unit = 2) + else + write (unit = *,fmt = 234) + niter = niter_default + dt = dt_default + grid_points(1) = problem_size + grid_points(2) = problem_size + grid_points(3) = problem_size + endif +234 format(' No input file inputbt.data. Using compiled defaults') + write (unit = *,fmt = 1001) grid_points(1),grid_points(2),grid_poi + &nts(3) + write (unit = *,fmt = 1002) niter,dt +1000 format(//, ' NAS Parallel Benchmarks 3.3.1 - DVMH version',' - BT + &Benchmark ',/) +1001 format(' Size: ', i3, 'x', i3, 'x', i3) +1002 format(' Iterations: ', i3, ' dt: ', F10.6) + if (grid_points(1) .gt. imax .or. grid_points(2) .gt. jmax .or. gr + &id_points(3) .gt. kmax) then + print *, (grid_points(i), i = 1,3) + print *, ' Problem size too big for compiled array sizes' + goto 999 + endif + open (unit = 2,file = 'inputStage',status = 'old',iostat = fstatus + &) + if (fstatus .eq. 0) then + read (unit = 2,fmt = *) stage_n + close (unit = 2) + else + stage_n = 0 + endif + write (unit = *,fmt = *) 'stage = ',stage_n + call set_constants_bt() + call initialize_bt() + call exact_rhs_bt() + +! ************* DO 2 iterations for touch all code + call adi_first_bt() + call adi_first_bt() + call initialize_bt() + call timer_clear_bt(1) + call timer_start_bt(1) + do step = 1,niter + if (mod (step,20) .eq. 0 .or. step .eq. 1) then + write (unit = *,fmt = 200) step +200 format(' Time step ', i8) + endif + call adi_bt() + enddo + call timer_stop_bt(1) + tmax = timer_read_bt(1) + call verify_bt(niter,class,verified) + n3 = grid_points(1) * grid_points(2) * grid_points(3) + navg = (grid_points(1) + grid_points(2) + grid_points(3)) / 3.0 + if (tmax .ne. 0.) then + mflops = 1.0e-6 * float (niter) * (3478.8 * float (n3) - 17655. + &7 * navg** 2 + 28023.7 * navg) / tmax + else + mflops = 0.0 + endif + call print_results_bt('BT',class,grid_points(1), + &grid_points(2),grid_p + &oints(3),niter,tmax,mflops,' floating point',verified,npb + &version) + +! ,compiletime, cs1, cs2, cs3, cs4, cs5,cs6, '(none)') +999 continue + end + + subroutine adi_first_bt () + call compute_rhs_bt() + call x_solve_bt() + call y_solve_bt() + call z_solve_bt() + return + end + + subroutine adi () + +! DVM$ INTERVAL 1 + call compute_rhs_bt() + +! DVM$ INTERVAL 11 +! DVM$ END INTERVAL + call x_solve_bt() + +! DVM$ INTERVAL 12 +! DVM$ END INTERVAL + call y_solve_bt() + +! DVM$ INTERVAL 13 +! DVM$ END INTERVAL + call z_solve_bt() + +! DVM$ END INTERVAL + return + end + diff --git a/compute_errors_bt.for b/compute_errors_bt.for new file mode 100644 index 0000000..e17757c --- /dev/null +++ b/compute_errors_bt.for @@ -0,0 +1,122 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +! this function computes the norm of the difference between the +! computed solution and the exact solution +!--------------------------------------------------------------------- + subroutine error_norm_bt (rms) + + include 'header3d_bt.h' + integer :: i,j,k,m,d + double precision :: xi,eta,zeta,u_exact(5),rms(5),add + double precision :: r1,r2,r3,r4,r5 + do m = 1,5 + rms(m) = 0.0d0 + enddo + r1 = 0.0d0 + r2 = 0.0d0 + r3 = 0.0d0 + r4 = 0.0d0 + r5 = 0.0d0 + +! DVM$ PARALLEL (K,J,I) ON U(*,I,J,K), REDUCTION (SUM(R1),SUM(R2),SUM(R3) +! DVM$&,SUM(R4),SUM(R5)),PRIVATE (U_EXACT,XI,ETA,ZETA,M,ADD) +! DVM$ REGION + do k = 0,problem_size - 1 + do j = 0,problem_size - 1 + do i = 0,problem_size - 1 + zeta = dble (k) * dnzm1 + eta = dble (j) * dnym1 + xi = dble (i) * dnxm1 + +! call exact_solution_bt(xi, eta, zeta, u_exact) + do m = 1,5 + u_exact(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + add = u(1,i,j,k) - u_exact(1) + r1 = r1 + add * add + add = u(2,i,j,k) - u_exact(2) + r2 = r2 + add * add + add = u(3,i,j,k) - u_exact(3) + r3 = r3 + add * add + add = u(4,i,j,k) - u_exact(4) + r4 = r4 + add * add + add = u(5,i,j,k) - u_exact(5) + r5 = r5 + add * add + enddo + enddo + enddo + +! DVM$ END REGION + rms(1) = r1 + rms(2) = r2 + rms(3) = r3 + rms(4) = r4 + rms(5) = r5 + do m = 1,5 + do d = 1,3 + rms(m) = rms(m) / dble (grid_points(d) - 2) + enddo + rms(m) = dsqrt (rms(m)) + enddo + return + end + + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine rhs_norm_bt (rms) + + include 'header3d_bt.h' + integer :: i,j,k,d,m + double precision :: rms(5),add,r1,r2,r3,r4,r5 + r1 = 0.0d0 + r2 = 0.0d0 + r3 = 0.0d0 + r4 = 0.0d0 + r5 = 0.0d0 + +! DVM$ PARALLEL (K,J,I) ON RHS(*,I,J,K), REDUCTION (SUM(R1),SUM(R2),SUM(R +! DVM$&3),SUM(R4),SUM(R5)),PRIVATE (ADD) +! DVM$ REGION + do k = 1,problem_size - 2 + do j = 1,problem_size - 2 + do i = 1,problem_size - 2 + add = rhs(1,i,j,k) + r1 = r1 + add * add + add = rhs(2,i,j,k) + r2 = r2 + add * add + add = rhs(3,i,j,k) + r3 = r3 + add * add + add = rhs(4,i,j,k) + r4 = r4 + add * add + add = rhs(5,i,j,k) + r5 = r5 + add * add + enddo + enddo + enddo + +! DVM$ END REGION + rms(1) = r1 + rms(2) = r2 + rms(3) = r3 + rms(4) = r4 + rms(5) = r5 + do m = 1,5 + do d = 1,3 + rms(m) = rms(m) / dble (grid_points(d) - 2) + enddo + rms(m) = dsqrt (rms(m)) + enddo + return + end + diff --git a/compute_errors_sp.for b/compute_errors_sp.for new file mode 100644 index 0000000..ae51cb5 --- /dev/null +++ b/compute_errors_sp.for @@ -0,0 +1,106 @@ + +! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine error_norm_sp (rms) + + include 'header_sp.h' + integer :: i,j,k,m,d + double precision :: xi,eta,zeta,u_exact(5),rms(5),add + double precision :: r1,r2,r3,r4,r5 + do m = 1,5 + rms(m) = 0.0d0 + enddo + r1 = 0.0d0 + r2 = 0.0d0 + r3 = 0.0d0 + r4 = 0.0d0 + r5 = 0.0d0 + +! DVM$ PARALLEL (K,J,I) ON U(*,I,J,K), PRIVATE (ZETA,ETA,XI,ADD,U_EXACT,M +! DVM$&),REDUCTION (SUM(R1),SUM(R2),SUM(R3),SUM(R4),SUM(R5)) +! DVM$ REGION +! DVM$& ,shadow_renew(u, rhs) + do k = 0,problem_size - 1 + do j = 0,problem_size - 1 + do i = 0,problem_size - 1 + zeta = dble (k) * dnzm1 + eta = dble (j) * dnym1 + xi = dble (i) * dnxm1 + do m = 1,5 + u_exact(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + add = u(1,i,j,k) - u_exact(1) + r1 = r1 + add * add + add = u(2,i,j,k) - u_exact(2) + r2 = r2 + add * add + add = u(3,i,j,k) - u_exact(3) + r3 = r3 + add * add + add = u(4,i,j,k) - u_exact(4) + r4 = r4 + add * add + add = u(5,i,j,k) - u_exact(5) + r5 = r5 + add * add + enddo + enddo + enddo + +! DVM$ END REGION + rms(1) = r1 + rms(2) = r2 + rms(3) = r3 + rms(4) = r4 + rms(5) = r5 + do m = 1,5 + do d = 1,3 + rms(m) = rms(m) / dble (grid_points(d) - 2) + enddo + rms(m) = dsqrt (rms(m)) + enddo + return + end + + subroutine rhs_norm_sp (rms) + + include 'header_sp.h' + integer :: i,j,k,d,m + double precision :: rms(5),add + do m = 1,5 + rms(m) = 0.0d0 + enddo + +! DVM$ PARALLEL (K,J,I) ON U(*,I,J,K), PRIVATE (ADD),REDUCTION (SUM(RMS)) +! DVM$ REGION + do k = 1,nz2 + do j = 1,ny2 + do i = 1,nx2 + add = rhs(1,i,j,k) + rms(1) = rms(1) + add * add + add = rhs(2,i,j,k) + rms(2) = rms(2) + add * add + add = rhs(3,i,j,k) + rms(3) = rms(3) + add * add + add = rhs(4,i,j,k) + rms(4) = rms(4) + add * add + add = rhs(5,i,j,k) + rms(5) = rms(5) + add * add + enddo + enddo + enddo + +! DVM$ END REGION + do m = 1,5 + do d = 1,3 + rms(m) = rms(m) / dble (grid_points(d) - 2) + enddo + rms(m) = dsqrt (rms(m)) + enddo + return + end + diff --git a/compute_rhs_bt.for b/compute_rhs_bt.for new file mode 100644 index 0000000..b4f6cb9 --- /dev/null +++ b/compute_rhs_bt.for @@ -0,0 +1,226 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine compute_rhs_bt () + + include 'header3d_bt.h' + integer :: i,j,k,m + double precision :: rho_inv,uijk,up1,um1,vijk,vp1,vm1,wijk,wp1,wm + &1,rhs_(5) + +!$SPF PARALLEL_REG r0 +! DVM$ PARALLEL (K,J,I) ON US(I,J,K), SHADOW_COMPUTE ,PRIVATE (RHO_INV,M) +! DVM$&,CUDA_BLOCK (128) +! DVM$ REGION OUT (RHO_I,US,VS,WS,QS,SQUARE) + do k = 0,problem_size - 1 + do j = 0,problem_size - 1 + do i = 0,problem_size - 1 + rho_inv = 1.0d0 / u(1,i,j,k) + rho_i(i,j,k) = rho_inv + us(i,j,k) = u(2,i,j,k) * rho_inv + vs(i,j,k) = u(3,i,j,k) * rho_inv + ws(i,j,k) = u(4,i,j,k) * rho_inv + square(i,j,k) = 0.5d0 * (u(2,i,j,k) * u(2,i,j,k) + u(3,i, + &j,k) * u(3,i,j,k) + u(4,i,j,k) * u(4,i,j,k)) * rho_inv + qs(i,j,k) = square(i,j,k) * rho_inv + do m = 1,5 + rhs(m,i,j,k) = forcing(m,i,j,k) + enddo + enddo + enddo + enddo + +!$SPF ANALYSIS(PRIVATE(rhs_)) +! DVM$ PARALLEL (K,J,I) ON RHS(*,I,J,K), PRIVATE (UIJK,UP1,UM1,M,VIJK,VP1 +! DVM$&,VM1,WIJK,WP1,WM1,RHS_),CUDA_BLOCK (32) +!--------------------------------------------------------------------- +! compute xi-direction fluxes +!--------------------------------------------------------------------- + do k = 1,problem_size - 2 + do j = 1,problem_size - 2 + do i = 1,problem_size - 2 + uijk = us(i,j,k) + up1 = us(i + 1,j,k) + um1 = us(i - 1,j,k) + rhs_(1) = forcing(1,i,j,k) + rhs_(2) = forcing(2,i,j,k) + rhs_(3) = forcing(3,i,j,k) + rhs_(4) = forcing(4,i,j,k) + rhs_(5) = forcing(5,i,j,k) + rhs_(1) = rhs_(1) + dx1tx1 * (u(1,i + 1,j,k) - 2.0d0 * u( + &1,i,j,k) + u(1,i - 1,j,k)) - tx2 * (u(2,i + 1,j,k) - u(2,i - 1,j,k + &)) + rhs_(2) = rhs_(2) + dx2tx1 * (u(2,i + 1,j,k) - 2.0d0 * u( + &2,i,j,k) + u(2,i - 1,j,k)) + xxcon2 * con43 * (up1 - 2.0d0 * uijk + &+ um1) - tx2 * (u(2,i + 1,j,k) * up1 - u(2,i - 1,j,k) * um1 + (u(5 + &,i + 1,j,k) - square(i + 1,j,k) - u(5,i - 1,j,k) + square(i - 1,j, + &k)) * c2) + rhs_(3) = rhs_(3) + dx3tx1 * (u(3,i + 1,j,k) - 2.0d0 * u( + &3,i,j,k) + u(3,i - 1,j,k)) + xxcon2 * (vs(i + 1,j,k) - 2.0d0 * vs( + &i,j,k) + vs(i - 1,j,k)) - tx2 * (u(3,i + 1,j,k) * up1 - u(3,i - 1, + &j,k) * um1) + rhs_(4) = rhs_(4) + dx4tx1 * (u(4,i + 1,j,k) - 2.0d0 * u( + &4,i,j,k) + u(4,i - 1,j,k)) + xxcon2 * (ws(i + 1,j,k) - 2.0d0 * ws( + &i,j,k) + ws(i - 1,j,k)) - tx2 * (u(4,i + 1,j,k) * up1 - u(4,i - 1, + &j,k) * um1) + rhs_(5) = rhs_(5) + dx5tx1 * (u(5,i + 1,j,k) - 2.0d0 * u( + &5,i,j,k) + u(5,i - 1,j,k)) + xxcon3 * (qs(i + 1,j,k) - 2.0d0 * qs( + &i,j,k) + qs(i - 1,j,k)) + xxcon4 * (up1 * up1 - 2.0d0 * uijk * uij + &k + um1 * um1) + xxcon5 * (u(5,i + 1,j,k) * rho_i(i + 1,j,k) - 2.0 + &d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i - 1,j,k) * rho_i(i - 1,j,k) + &) - tx2 * ((c1 * u(5,i + 1,j,k) - c2 * square(i + 1,j,k)) * up1 - + &(c1 * u(5,i - 1,j,k) - c2 * square(i - 1,j,k)) * um1) + if (i .eq. 1) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4. + &0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k)) + enddo + else if (i .eq. 2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i - 1, + &j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k + &)) + enddo + else if (i .ge. 3 .and. i .le. problem_size - 4) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0 + &* u(m,i - 1,j,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m + &,i + 2,j,k)) + enddo + else if (i .eq. problem_size - 3) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0 + &* u(m,i - 1,j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k)) + enddo + else if (i .eq. problem_size - 2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.d0 * + & u(m,i - 1,j,k) + 5.d0 * u(m,i,j,k)) + enddo + endif + vijk = vs(i,j,k) + vp1 = vs(i,j + 1,k) + vm1 = vs(i,j - 1,k) + rhs_(1) = rhs_(1) + dy1ty1 * (u(1,i,j + 1,k) - 2.0d0 * u( + &1,i,j,k) + u(1,i,j - 1,k)) - ty2 * (u(3,i,j + 1,k) - u(3,i,j - 1,k + &)) + rhs_(2) = rhs_(2) + dy2ty1 * (u(2,i,j + 1,k) - 2.0d0 * u( + &2,i,j,k) + u(2,i,j - 1,k)) + yycon2 * (us(i,j + 1,k) - 2.0d0 * us( + &i,j,k) + us(i,j - 1,k)) - ty2 * (u(2,i,j + 1,k) * vp1 - u(2,i,j - + &1,k) * vm1) + rhs_(3) = rhs_(3) + dy3ty1 * (u(3,i,j + 1,k) - 2.0d0 * u( + &3,i,j,k) + u(3,i,j - 1,k)) + yycon2 * con43 * (vp1 - 2.0d0 * vijk + &+ vm1) - ty2 * (u(3,i,j + 1,k) * vp1 - u(3,i,j - 1,k) * vm1 + (u(5 + &,i,j + 1,k) - square(i,j + 1,k) - u(5,i,j - 1,k) + square(i,j - 1, + &k)) * c2) + rhs_(4) = rhs_(4) + dy4ty1 * (u(4,i,j + 1,k) - 2.0d0 * u( + &4,i,j,k) + u(4,i,j - 1,k)) + yycon2 * (ws(i,j + 1,k) - 2.0d0 * ws( + &i,j,k) + ws(i,j - 1,k)) - ty2 * (u(4,i,j + 1,k) * vp1 - u(4,i,j - + &1,k) * vm1) + rhs_(5) = rhs_(5) + dy5ty1 * (u(5,i,j + 1,k) - 2.0d0 * u( + &5,i,j,k) + u(5,i,j - 1,k)) + yycon3 * (qs(i,j + 1,k) - 2.0d0 * qs( + &i,j,k) + qs(i,j - 1,k)) + yycon4 * (vp1 * vp1 - 2.0d0 * vijk * vij + &k + vm1 * vm1) + yycon5 * (u(5,i,j + 1,k) * rho_i(i,j + 1,k) - 2.0 + &d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i,j - 1,k) * rho_i(i,j - 1,k) + &) - ty2 * ((c1 * u(5,i,j + 1,k) - c2 * square(i,j + 1,k)) * vp1 - + &(c1 * u(5,i,j - 1,k) - c2 * square(i,j - 1,k)) * vm1) + if (j .eq. 1) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4. + &0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k)) + enddo + else if (j .eq. 2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j - + &1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k + &)) + enddo + else if (j .ge. 3 .and. j .le. problem_size - 4) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0 + &* u(m,i,j - 1,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m + &,i,j + 2,k)) + enddo + else if (j .eq. problem_size - 3) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0 + &* u(m,i,j - 1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k)) + enddo + else if (j .eq. problem_size - 2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.d0 * + & u(m,i,j - 1,k) + 5.d0 * u(m,i,j,k)) + enddo + endif + wijk = ws(i,j,k) + wp1 = ws(i,j,k + 1) + wm1 = ws(i,j,k - 1) + rhs_(1) = rhs_(1) + dz1tz1 * (u(1,i,j,k + 1) - 2.0d0 * u( + &1,i,j,k) + u(1,i,j,k - 1)) - tz2 * (u(4,i,j,k + 1) - u(4,i,j,k - 1 + &)) + rhs_(2) = rhs_(2) + dz2tz1 * (u(2,i,j,k + 1) - 2.0d0 * u( + &2,i,j,k) + u(2,i,j,k - 1)) + zzcon2 * (us(i,j,k + 1) - 2.0d0 * us( + &i,j,k) + us(i,j,k - 1)) - tz2 * (u(2,i,j,k + 1) * wp1 - u(2,i,j,k + &- 1) * wm1) + rhs_(3) = rhs_(3) + dz3tz1 * (u(3,i,j,k + 1) - 2.0d0 * u( + &3,i,j,k) + u(3,i,j,k - 1)) + zzcon2 * (vs(i,j,k + 1) - 2.0d0 * vs( + &i,j,k) + vs(i,j,k - 1)) - tz2 * (u(3,i,j,k + 1) * wp1 - u(3,i,j,k + &- 1) * wm1) + rhs_(4) = rhs_(4) + dz4tz1 * (u(4,i,j,k + 1) - 2.0d0 * u( + &4,i,j,k) + u(4,i,j,k - 1)) + zzcon2 * con43 * (wp1 - 2.0d0 * wijk + &+ wm1) - tz2 * (u(4,i,j,k + 1) * wp1 - u(4,i,j,k - 1) * wm1 + (u(5 + &,i,j,k + 1) - square(i,j,k + 1) - u(5,i,j,k - 1) + square(i,j,k - + &1)) * c2) + rhs_(5) = rhs_(5) + dz5tz1 * (u(5,i,j,k + 1) - 2.0d0 * u( + &5,i,j,k) + u(5,i,j,k - 1)) + zzcon3 * (qs(i,j,k + 1) - 2.0d0 * qs( + &i,j,k) + qs(i,j,k - 1)) + zzcon4 * (wp1 * wp1 - 2.0d0 * wijk * wij + &k + wm1 * wm1) + zzcon5 * (u(5,i,j,k + 1) * rho_i(i,j,k + 1) - 2.0 + &d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i,j,k - 1) * rho_i(i,j,k - 1) + &) - tz2 * ((c1 * u(5,i,j,k + 1) - c2 * square(i,j,k + 1)) * wp1 - + &(c1 * u(5,i,j,k - 1) - c2 * square(i,j,k - 1)) * wm1) + if (k .eq. 1) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4. + &0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2)) + enddo + else if (k .eq. 2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j,k + &- 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2 + &)) + enddo + else if (k .ge. 3 .and. k .le. problem_size - 4) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0 + &* u(m,i,j,k - 1) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m + &,i,j,k + 2)) + enddo + else if (k .eq. problem_size - 3) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0 + &* u(m,i,j,k - 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1)) + enddo + else if (k .eq. problem_size - 2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.d0 * + & u(m,i,j,k - 1) + 5.d0 * u(m,i,j,k)) + enddo + endif + rhs(1,i,j,k) = rhs_(1) * dt + rhs(2,i,j,k) = rhs_(2) * dt + rhs(3,i,j,k) = rhs_(3) * dt + rhs(4,i,j,k) = rhs_(4) * dt + rhs(5,i,j,k) = rhs_(5) * dt + enddo + enddo + enddo +!$SPF END PARALLEL_REG +! DVM$ END REGION + return + end + diff --git a/compute_rhs_sp.for b/compute_rhs_sp.for new file mode 100644 index 0000000..009c101 --- /dev/null +++ b/compute_rhs_sp.for @@ -0,0 +1,256 @@ + +! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine compute_rhs_sp (aditional_comp) + + include 'header_sp.h' + integer :: i,j,k,m + double precision :: aux,rho_inv,uijk,up1,um1,vijk,vp1,vm1,wijk,wp + &1,wm1,rhs_(5) + double precision :: t1,t2,t3,ac,ru1,uu,vv,ww,ac2inv + integer :: aditional_comp + if (timeron) call timer_start_sp(t_rhs) + +!$SPF PARALLEL_REG r0 +! DVM$ PARALLEL (K,J,I) ON U(*,I,J,K), PRIVATE (RHO_INV,AUX,M),SHADOW_REN +! DVM$&EW (U(0:0,2:3,2:3,2:3)),SHADOW_COMPUTE +! DVM$ REGION OUT (US,VS,WS,QS,RHO_I,SPEED,SQUARE) + do k = 0,problem_size - 1 + do j = 0,problem_size - 1 + do i = 0,problem_size - 1 + rho_inv = 1.0d0 / u(1,i,j,k) + rho_i(i,j,k) = rho_inv + us(i,j,k) = u(2,i,j,k) * rho_inv + vs(i,j,k) = u(3,i,j,k) * rho_inv + ws(i,j,k) = u(4,i,j,k) * rho_inv + square(i,j,k) = 0.5d0 * (u(2,i,j,k) * u(2,i,j,k) + u(3,i, + &j,k) * u(3,i,j,k) + u(4,i,j,k) * u(4,i,j,k)) * rho_inv + qs(i,j,k) = square(i,j,k) * rho_inv + +!--------------------------------------------------------------------- +! (don't need speed and ainx until the lhs computation) +!--------------------------------------------------------------------- + aux = c1c2 * rho_inv * (u(5,i,j,k) - square(i,j,k)) + speed(i,j,k) = dsqrt (aux) + do m = 1,5 + rhs(m,i,j,k) = forcing(m,i,j,k) + enddo + enddo + enddo + enddo + +!$SPF ANALYSIS(PRIVATE(rhs_)) +! DVM$ PARALLEL (K,J,I) ON RHS(*,I,J,K), PRIVATE (UIJK,UP1,UM1,M,VIJK,VP1 +! DVM$&,VM1,WIJK,WP1,WM1,RHS_,T1,T2,T3,AC,RU1,UU,VV,WW,AC2INV),CUDA_BLOCK +! DVM$& (32,4) + do k = 1,nz2 + do j = 1,ny2 + do i = 1,nx2 + uijk = us(i,j,k) + up1 = us(i + 1,j,k) + um1 = us(i - 1,j,k) + rhs_(1) = rhs(1,i,j,k) + rhs_(2) = rhs(2,i,j,k) + rhs_(3) = rhs(3,i,j,k) + rhs_(4) = rhs(4,i,j,k) + rhs_(5) = rhs(5,i,j,k) + rhs_(1) = rhs_(1) + dx1tx1 * (u(1,i + 1,j,k) - 2.0d0 * u( + &1,i,j,k) + u(1,i - 1,j,k)) - tx2 * (u(2,i + 1,j,k) - u(2,i - 1,j,k + &)) + rhs_(2) = rhs_(2) + dx2tx1 * (u(2,i + 1,j,k) - 2.0d0 * u( + &2,i,j,k) + u(2,i - 1,j,k)) + xxcon2 * con43 * (up1 - 2.0d0 * uijk + &+ um1) - tx2 * (u(2,i + 1,j,k) * up1 - u(2,i - 1,j,k) * um1 + (u(5 + &,i + 1,j,k) - square(i + 1,j,k) - u(5,i - 1,j,k) + square(i - 1,j, + &k)) * c2) + rhs_(3) = rhs_(3) + dx3tx1 * (u(3,i + 1,j,k) - 2.0d0 * u( + &3,i,j,k) + u(3,i - 1,j,k)) + xxcon2 * (vs(i + 1,j,k) - 2.0d0 * vs( + &i,j,k) + vs(i - 1,j,k)) - tx2 * (u(3,i + 1,j,k) * up1 - u(3,i - 1, + &j,k) * um1) + rhs_(4) = rhs_(4) + dx4tx1 * (u(4,i + 1,j,k) - 2.0d0 * u( + &4,i,j,k) + u(4,i - 1,j,k)) + xxcon2 * (ws(i + 1,j,k) - 2.0d0 * ws( + &i,j,k) + ws(i - 1,j,k)) - tx2 * (u(4,i + 1,j,k) * up1 - u(4,i - 1, + &j,k) * um1) + rhs_(5) = rhs_(5) + dx5tx1 * (u(5,i + 1,j,k) - 2.0d0 * u( + &5,i,j,k) + u(5,i - 1,j,k)) + xxcon3 * (qs(i + 1,j,k) - 2.0d0 * qs( + &i,j,k) + qs(i - 1,j,k)) + xxcon4 * (up1 * up1 - 2.0d0 * uijk * uij + &k + um1 * um1) + xxcon5 * (u(5,i + 1,j,k) * rho_i(i + 1,j,k) - 2.0 + &d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i - 1,j,k) * rho_i(i - 1,j,k) + &) - tx2 * ((c1 * u(5,i + 1,j,k) - c2 * square(i + 1,j,k)) * up1 - + &(c1 * u(5,i - 1,j,k) - c2 * square(i - 1,j,k)) * um1) + if (i .eq. 1) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4. + &0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k)) + enddo + else if (i .eq. 2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i - 1, + &j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k + &)) + enddo + else if (i .ge. 3 .and. i .le. nx2 - 2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0 + &* u(m,i - 1,j,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m + &,i + 2,j,k)) + enddo + else if (i .eq. nx2 - 1) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0 + &* u(m,i - 1,j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k)) + enddo + else if (i .eq. nx2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.d0 * + & u(m,i - 1,j,k) + 5.d0 * u(m,i,j,k)) + enddo + endif + vijk = vs(i,j,k) + vp1 = vs(i,j + 1,k) + vm1 = vs(i,j - 1,k) + rhs_(1) = rhs_(1) + dy1ty1 * (u(1,i,j + 1,k) - 2.0d0 * u( + &1,i,j,k) + u(1,i,j - 1,k)) - ty2 * (u(3,i,j + 1,k) - u(3,i,j - 1,k + &)) + rhs_(2) = rhs_(2) + dy2ty1 * (u(2,i,j + 1,k) - 2.0d0 * u( + &2,i,j,k) + u(2,i,j - 1,k)) + yycon2 * (us(i,j + 1,k) - 2.0d0 * us( + &i,j,k) + us(i,j - 1,k)) - ty2 * (u(2,i,j + 1,k) * vp1 - u(2,i,j - + &1,k) * vm1) + rhs_(3) = rhs_(3) + dy3ty1 * (u(3,i,j + 1,k) - 2.0d0 * u( + &3,i,j,k) + u(3,i,j - 1,k)) + yycon2 * con43 * (vp1 - 2.0d0 * vijk + &+ vm1) - ty2 * (u(3,i,j + 1,k) * vp1 - u(3,i,j - 1,k) * vm1 + (u(5 + &,i,j + 1,k) - square(i,j + 1,k) - u(5,i,j - 1,k) + square(i,j - 1, + &k)) * c2) + rhs_(4) = rhs_(4) + dy4ty1 * (u(4,i,j + 1,k) - 2.0d0 * u( + &4,i,j,k) + u(4,i,j - 1,k)) + yycon2 * (ws(i,j + 1,k) - 2.0d0 * ws( + &i,j,k) + ws(i,j - 1,k)) - ty2 * (u(4,i,j + 1,k) * vp1 - u(4,i,j - + &1,k) * vm1) + rhs_(5) = rhs_(5) + dy5ty1 * (u(5,i,j + 1,k) - 2.0d0 * u( + &5,i,j,k) + u(5,i,j - 1,k)) + yycon3 * (qs(i,j + 1,k) - 2.0d0 * qs( + &i,j,k) + qs(i,j - 1,k)) + yycon4 * (vp1 * vp1 - 2.0d0 * vijk * vij + &k + vm1 * vm1) + yycon5 * (u(5,i,j + 1,k) * rho_i(i,j + 1,k) - 2.0 + &d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i,j - 1,k) * rho_i(i,j - 1,k) + &) - ty2 * ((c1 * u(5,i,j + 1,k) - c2 * square(i,j + 1,k)) * vp1 - + &(c1 * u(5,i,j - 1,k) - c2 * square(i,j - 1,k)) * vm1) + if (j .eq. 1) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4. + &0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k)) + enddo + else if (j .eq. 2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j - + &1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k + &)) + enddo + else if (j .ge. 3 .and. j .le. ny2 - 2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0 + &* u(m,i,j - 1,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m + &,i,j + 2,k)) + enddo + else if (j .eq. ny2 - 1) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0 + &* u(m,i,j - 1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k)) + enddo + else if (j .eq. ny2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.d0 * + & u(m,i,j - 1,k) + 5.d0 * u(m,i,j,k)) + enddo + endif + wijk = ws(i,j,k) + wp1 = ws(i,j,k + 1) + wm1 = ws(i,j,k - 1) + rhs_(1) = rhs_(1) + dz1tz1 * (u(1,i,j,k + 1) - 2.0d0 * u( + &1,i,j,k) + u(1,i,j,k - 1)) - tz2 * (u(4,i,j,k + 1) - u(4,i,j,k - 1 + &)) + rhs_(2) = rhs_(2) + dz2tz1 * (u(2,i,j,k + 1) - 2.0d0 * u( + &2,i,j,k) + u(2,i,j,k - 1)) + zzcon2 * (us(i,j,k + 1) - 2.0d0 * us( + &i,j,k) + us(i,j,k - 1)) - tz2 * (u(2,i,j,k + 1) * wp1 - u(2,i,j,k + &- 1) * wm1) + rhs_(3) = rhs_(3) + dz3tz1 * (u(3,i,j,k + 1) - 2.0d0 * u( + &3,i,j,k) + u(3,i,j,k - 1)) + zzcon2 * (vs(i,j,k + 1) - 2.0d0 * vs( + &i,j,k) + vs(i,j,k - 1)) - tz2 * (u(3,i,j,k + 1) * wp1 - u(3,i,j,k + &- 1) * wm1) + rhs_(4) = rhs_(4) + dz4tz1 * (u(4,i,j,k + 1) - 2.0d0 * u( + &4,i,j,k) + u(4,i,j,k - 1)) + zzcon2 * con43 * (wp1 - 2.0d0 * wijk + &+ wm1) - tz2 * (u(4,i,j,k + 1) * wp1 - u(4,i,j,k - 1) * wm1 + (u(5 + &,i,j,k + 1) - square(i,j,k + 1) - u(5,i,j,k - 1) + square(i,j,k - + &1)) * c2) + rhs_(5) = rhs_(5) + dz5tz1 * (u(5,i,j,k + 1) - 2.0d0 * u( + &5,i,j,k) + u(5,i,j,k - 1)) + zzcon3 * (qs(i,j,k + 1) - 2.0d0 * qs( + &i,j,k) + qs(i,j,k - 1)) + zzcon4 * (wp1 * wp1 - 2.0d0 * wijk * wij + &k + wm1 * wm1) + zzcon5 * (u(5,i,j,k + 1) * rho_i(i,j,k + 1) - 2.0 + &d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i,j,k - 1) * rho_i(i,j,k - 1) + &) - tz2 * ((c1 * u(5,i,j,k + 1) - c2 * square(i,j,k + 1)) * wp1 - + &(c1 * u(5,i,j,k - 1) - c2 * square(i,j,k - 1)) * wm1) + if (k .eq. 1) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4. + &0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2)) + enddo + else if (k .eq. 2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j,k + &- 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2 + &)) + enddo + else if (k .ge. 3 .and. k .le. nz2 - 2) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0 + &* u(m,i,j,k - 1) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m + &,i,j,k + 2)) + enddo + else if (k .eq. nz2 - 1) then + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0 + &* u(m,i,j,k - 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1)) + enddo + else + do m = 1,5 + rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.d0 * + & u(m,i,j,k - 1) + 5.d0 * u(m,i,j,k)) + enddo + endif + rhs_(1) = rhs_(1) * dt + rhs_(2) = rhs_(2) * dt + rhs_(3) = rhs_(3) * dt + rhs_(4) = rhs_(4) * dt + rhs_(5) = rhs_(5) * dt + rhs(1,i,j,k) = rhs_(1) + rhs(2,i,j,k) = rhs_(2) + rhs(3,i,j,k) = rhs_(3) + rhs(4,i,j,k) = rhs_(4) + rhs(5,i,j,k) = rhs_(5) + if (aditional_comp .eq. 1) then + ru1 = rho_i(i,j,k) + uu = us(i,j,k) + vv = vs(i,j,k) + ww = ws(i,j,k) + ac = speed(i,j,k) + ac2inv = ac * ac + t1 = c2 / ac2inv * (qs(i,j,k) * rhs_(1) - uu * rhs_(2) + & - vv * rhs_(3) - ww * rhs_(4) + rhs_(5)) + t2 = bt * ru1 * (uu * rhs_(1) - rhs_(2)) + t3 = bt * ru1 * ac * t1 + rhs(1,i,j,k) = rhs_(1) - t1 + rhs(2,i,j,k) = (-(ru1)) * (ww * rhs_(1) - rhs_(4)) + rhs(3,i,j,k) = ru1 * (vv * rhs_(1) - rhs_(3)) + rhs(4,i,j,k) = (-(t2)) + t3 + rhs(5,i,j,k) = t2 + t3 + endif + enddo + enddo + enddo +!$SPF END PARALLEL_REG +! DVM$ END REGION + if (timeron) call timer_stop_sp(t_rhs) + return + end + diff --git a/exact_rhs_bt.for b/exact_rhs_bt.for new file mode 100644 index 0000000..0f10772 --- /dev/null +++ b/exact_rhs_bt.for @@ -0,0 +1,280 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine exact_rhs_bt () + + include 'header3d_bt.h' + double precision :: dtemp(5),xi,eta,zeta,dtpp + integer :: m,i,j,k,ip1,im1,jp1,p,p1,jm1,km1,kp1,z + double precision :: ue_((-(2)):2,5),buf_((-(2)):2,5),cuf_((-(2)): + &2),q_((-(2)):2) + +! DVM$ PARALLEL (K,J,I) ON FORCING(*,I,J,K), PRIVATE (M) +! DVM$ REGION + do k = 0,problem_size - 1 + do j = 0,problem_size - 1 + do i = 0,problem_size - 1 + do m = 1,5 + forcing(m,i,j,k) = 0.0d0 + enddo + enddo + enddo + enddo + +! DVM$ PARALLEL (K,J,I) ON FORCING(*,I,J,K), PRIVATE (ZETA,ETA,XI,M,DTEMP +! DVM$&,BUF_,CUF_,Q_,DTPP,Z,UE_) +!--------------------------------------------------------------------- +! xi-direction flux differences +!--------------------------------------------------------------------- + do k = 1,problem_size - 2 + do j = 1,problem_size - 2 + do i = 1,problem_size - 2 + zeta = dble (k) * dnzm1 + eta = dble (j) * dnym1 + do z = (-(2)),2 + xi = dble (i + z) * dnxm1 + do m = 1,5 + dtemp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + &+ xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m, + &6) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + & (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + ue_(z,m) = dtemp(m) + enddo + dtpp = 1.0d0 / dtemp(1) + do m = 2,5 + buf_(z,m) = dtpp * dtemp(m) + enddo + cuf_(z) = buf_(z,2) * buf_(z,2) + buf_(z,1) = cuf_(z) + buf_(z,3) * buf_(z,3) + buf_(z,4 + &) * buf_(z,4) + q_(z) = 0.5d0 * (buf_(z,2) * ue_(z,2) + buf_(z,3) * ue + &_(z,3) + buf_(z,4) * ue_(z,4)) + enddo + forcing(1,i,j,k) = forcing(1,i,j,k) - tx2 * (ue_(1,2) - u + &e_((-(1)),2)) + dx1tx1 * (ue_(1,1) - 2.0d0 * ue_(0,1) + ue_((-(1)) + &,1)) + forcing(2,i,j,k) = forcing(2,i,j,k) - tx2 * (ue_(1,2) * b + &uf_(1,2) + c2 * (ue_(1,5) - q_(1)) - (ue_((-(1)),2) * buf_((-(1)), + &2) + c2 * (ue_((-(1)),5) - q_((-(1)))))) + xxcon1 * (buf_(1,2) - 2 + &.0d0 * buf_(0,2) + buf_((-(1)),2)) + dx2tx1 * (ue_(1,2) - 2.0d0 * + &ue_(0,2) + ue_((-(1)),2)) + forcing(3,i,j,k) = forcing(3,i,j,k) - tx2 * (ue_(1,3) * b + &uf_(1,2) - ue_((-(1)),3) * buf_((-(1)),2)) + xxcon2 * (buf_(1,3) - + & 2.0d0 * buf_(0,3) + buf_((-(1)),3)) + dx3tx1 * (ue_(1,3) - 2.0d0 + &* ue_(0,3) + ue_((-(1)),3)) + forcing(4,i,j,k) = forcing(4,i,j,k) - tx2 * (ue_(1,4) * b + &uf_(1,2) - ue_((-(1)),4) * buf_((-(1)),2)) + xxcon2 * (buf_(1,4) - + & 2.0d0 * buf_(0,4) + buf_((-(1)),4)) + dx4tx1 * (ue_(1,4) - 2.0d0 + &* ue_(0,4) + ue_((-(1)),4)) + forcing(5,i,j,k) = forcing(5,i,j,k) - tx2 * (buf_(1,2) * + &(c1 * ue_(1,5) - c2 * q_(1)) - buf_((-(1)),2) * (c1 * ue_((-(1)),5 + &) - c2 * q_((-(1))))) + 0.5d0 * xxcon3 * (buf_(1,1) - 2.0d0 * buf_ + &(0,1) + buf_((-(1)),1)) + xxcon4 * (cuf_(1) - 2.0d0 * cuf_(0) + cu + &f_((-(1)))) + xxcon5 * (buf_(1,5) - 2.0d0 * buf_(0,5) + buf_((-(1) + &),5)) + dx5tx1 * (ue_(1,5) - 2.0d0 * ue_(0,5) + ue_((-(1)),5)) + do m = 1,5 + if (i .eq. 1) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (5.0d0 + & * ue_(0,m) - 4.0d0 * ue_(1,m) + ue_(2,m)) + else if (i .eq. 2) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * ((-(4. + &0d0)) * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_(1,m) + ue_( + &2,m)) + else if (i .eq. problem_size - 3) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_( + &1,m)) + else if (i .eq. problem_size - 2) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 5.0d0 * ue_(0,m)) + else + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_( + &1,m) + ue_(2,m)) + endif + enddo + enddo + enddo + enddo + +! DVM$ PARALLEL (K,J,I) ON FORCING(*,I,J,K), PRIVATE (ZETA,ETA,XI,M,DTEMP +! DVM$&,BUF_,CUF_,Q_,DTPP,Z,UE_) +!--------------------------------------------------------------------- +! eta-direction flux differences +!--------------------------------------------------------------------- + do k = 1,problem_size - 2 + do j = 1,problem_size - 2 + do i = 1,problem_size - 2 + zeta = dble (k) * dnzm1 + xi = dble (i) * dnxm1 + do z = (-(2)),2 + eta = dble (j + z) * dnym1 + do m = 1,5 + dtemp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + &+ xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m, + &6) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + & (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + ue_(z,m) = dtemp(m) + enddo + dtpp = 1.0d0 / dtemp(1) + do m = 2,5 + buf_(z,m) = dtpp * dtemp(m) + enddo + cuf_(z) = buf_(z,3) * buf_(z,3) + buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) + buf_(z,4 + &) * buf_(z,4) + q_(z) = 0.5d0 * (buf_(z,2) * ue_(z,2) + buf_(z,3) * ue + &_(z,3) + buf_(z,4) * ue_(z,4)) + enddo + forcing(1,i,j,k) = forcing(1,i,j,k) - ty2 * (ue_(1,3) - u + &e_((-(1)),3)) + dy1ty1 * (ue_(1,1) - 2.0d0 * ue_(0,1) + ue_((-(1)) + &,1)) + forcing(2,i,j,k) = forcing(2,i,j,k) - ty2 * (ue_(1,2) * b + &uf_(1,3) - ue_((-(1)),2) * buf_((-(1)),3)) + yycon2 * (buf_(1,2) - + & 2.0d0 * buf_(0,2) + buf_((-(1)),2)) + dy2ty1 * (ue_(1,2) - 2.0 * + &ue_(0,2) + ue_((-(1)),2)) + forcing(3,i,j,k) = forcing(3,i,j,k) - ty2 * (ue_(1,3) * b + &uf_(1,3) + c2 * (ue_(1,5) - q_(1)) - (ue_((-(1)),3) * buf_((-(1)), + &3) + c2 * (ue_((-(1)),5) - q_((-(1)))))) + yycon1 * (buf_(1,3) - 2 + &.0d0 * buf_(0,3) + buf_((-(1)),3)) + dy3ty1 * (ue_(1,3) - 2.0d0 * + &ue_(0,3) + ue_((-(1)),3)) + forcing(4,i,j,k) = forcing(4,i,j,k) - ty2 * (ue_(1,4) * b + &uf_(1,3) - ue_((-(1)),4) * buf_((-(1)),3)) + yycon2 * (buf_(1,4) - + & 2.0d0 * buf_(0,4) + buf_((-(1)),4)) + dy4ty1 * (ue_(1,4) - 2.0d0 + &* ue_(0,4) + ue_((-(1)),4)) + forcing(5,i,j,k) = forcing(5,i,j,k) - ty2 * (buf_(1,3) * + &(c1 * ue_(1,5) - c2 * q_(1)) - buf_((-(1)),3) * (c1 * ue_((-(1)),5 + &) - c2 * q_((-(1))))) + 0.5d0 * yycon3 * (buf_(1,1) - 2.0d0 * buf_ + &(0,1) + buf_((-(1)),1)) + yycon4 * (cuf_(1) - 2.0d0 * cuf_(0) + cu + &f_((-(1)))) + yycon5 * (buf_(1,5) - 2.0d0 * buf_(0,5) + buf_((-(1) + &),5)) + dy5ty1 * (ue_(1,5) - 2.0d0 * ue_(0,5) + ue_((-(1)),5)) + do m = 1,5 + if (j .eq. 1) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (5.0d0 + & * ue_(0,m) - 4.0d0 * ue_(1,m) + ue_(2,m)) + else if (j .eq. 2) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * ((-(4. + &0d0)) * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_(1,m) + ue_( + &2,m)) + else if (j .eq. problem_size - 3) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_( + &1,m)) + else if (j .eq. problem_size - 2) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 5.0d0 * ue_(0,m)) + else + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_( + &1,m) + ue_(2,m)) + endif + enddo + enddo + enddo + enddo + +! DVM$ PARALLEL (K,J,I) ON FORCING(*,I,J,K), PRIVATE (ZETA,ETA,XI,M,BUF_, +! DVM$&CUF_,Q_,UE_,DTPP,DTEMP,Z) +!--------------------------------------------------------------------- +! zeta-direction flux differences +!--------------------------------------------------------------------- + do k = 1,problem_size - 2 + do j = 1,problem_size - 2 + do i = 1,problem_size - 2 + xi = dble (i) * dnxm1 + eta = dble (j) * dnym1 + do z = (-(2)),2 + zeta = dble (k + z) * dnzm1 + do m = 1,5 + dtemp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + &+ xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m, + &6) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + & (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + ue_(z,m) = dtemp(m) + enddo + dtpp = 1.0d0 / dtemp(1) + do m = 2,5 + buf_(z,m) = dtpp * dtemp(m) + enddo + cuf_(z) = buf_(z,4) * buf_(z,4) + buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) + buf_(z,3 + &) * buf_(z,3) + q_(z) = 0.5d0 * (buf_(z,2) * ue_(z,2) + buf_(z,3) * ue + &_(z,3) + buf_(z,4) * ue_(z,4)) + enddo + forcing(1,i,j,k) = forcing(1,i,j,k) - tz2 * (ue_(1,4) - u + &e_((-(1)),4)) + dz1tz1 * (ue_(1,1) - 2.0d0 * ue_(0,1) + ue_((-(1)) + &,1)) + forcing(2,i,j,k) = forcing(2,i,j,k) - tz2 * (ue_(1,2) * b + &uf_(1,4) - ue_((-(1)),2) * buf_((-(1)),4)) + zzcon2 * (buf_(1,2) - + & 2.0d0 * buf_(0,2) + buf_((-(1)),2)) + dz2tz1 * (ue_(1,2) - 2.0d0 + &* ue_(0,2) + ue_((-(1)),2)) + forcing(3,i,j,k) = forcing(3,i,j,k) - tz2 * (ue_(1,3) * b + &uf_(1,4) - ue_((-(1)),3) * buf_((-(1)),4)) + zzcon2 * (buf_(1,3) - + & 2.0d0 * buf_(0,3) + buf_((-(1)),3)) + dz3tz1 * (ue_(1,3) - 2.0d0 + &* ue_(0,3) + ue_((-(1)),3)) + forcing(4,i,j,k) = forcing(4,i,j,k) - tz2 * (ue_(1,4) * b + &uf_(1,4) + c2 * (ue_(1,5) - q_(1)) - (ue_((-(1)),4) * buf_((-(1)), + &4) + c2 * (ue_((-(1)),5) - q_((-(1)))))) + zzcon1 * (buf_(1,4) - 2 + &.0d0 * buf_(0,4) + buf_((-(1)),4)) + dz4tz1 * (ue_(1,4) - 2.0d0 * + &ue_(0,4) + ue_((-(1)),4)) + forcing(5,i,j,k) = forcing(5,i,j,k) - tz2 * (buf_(1,4) * + &(c1 * ue_(1,5) - c2 * q_(1)) - buf_((-(1)),4) * (c1 * ue_((-(1)),5 + &) - c2 * q_((-(1))))) + 0.5d0 * zzcon3 * (buf_(1,1) - 2.0d0 * buf_ + &(0,1) + buf_((-(1)),1)) + zzcon4 * (cuf_(1) - 2.0d0 * cuf_(0) + cu + &f_((-(1)))) + zzcon5 * (buf_(1,5) - 2.0d0 * buf_(0,5) + buf_((-(1) + &),5)) + dz5tz1 * (ue_(1,5) - 2.0d0 * ue_(0,5) + ue_((-(1)),5)) + do m = 1,5 + if (k .eq. 1) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (5.0d0 + & * ue_(0,m) - 4.0d0 * ue_(1,m) + ue_(2,m)) + else if (k .eq. 2) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * ((-(4. + &0d0)) * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_(1,m) + ue_( + &2,m)) + else if (k .eq. problem_size - 3) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_( + &1,m)) + else if (k .eq. problem_size - 2) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 5.0d0 * ue_(0,m)) + else + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_( + &1,m) + ue_(2,m)) + endif + enddo + enddo + enddo + enddo + +! DVM$ PARALLEL (K,J,I) ON FORCING(*,I,J,K), PRIVATE (M) +!--------------------------------------------------------------------- +! now change the sign of the forcing function, +!--------------------------------------------------------------------- + do k = 1,problem_size - 2 + do j = 1,problem_size - 2 + do i = 1,problem_size - 2 + do m = 1,5 + forcing(m,i,j,k) = (-(1.d0)) * forcing(m,i,j,k) + enddo + enddo + enddo + enddo + +! DVM$ END REGION + return + end + diff --git a/exact_rhs_sp.for b/exact_rhs_sp.for new file mode 100644 index 0000000..70bbf37 --- /dev/null +++ b/exact_rhs_sp.for @@ -0,0 +1,280 @@ + +! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine exact_rhs_sp () + + include 'header_sp.h' + double precision :: dtemp(5),xi,eta,zeta,dtpp + integer :: m,i,j,k,ip1,im1,jp1,p,p1,jm1,km1,kp1,z + double precision :: ue_((-(2)):2,5),buf_((-(2)):2,5),cuf_((-(2)): + &2),q_((-(2)):2) + +! DVM$ PARALLEL (K,J,I) ON FORCING(*,I,J,K), PRIVATE (M) +! DVM$ REGION + do k = 0,problem_size - 1 + do j = 0,problem_size - 1 + do i = 0,problem_size - 1 + do m = 1,5 + forcing(m,i,j,k) = 0.0d0 + enddo + enddo + enddo + enddo + +! DVM$ PARALLEL (K,J,I) ON FORCING(*,I,J,K), PRIVATE (ZETA,ETA,XI,M,DTEMP +! DVM$&,BUF_,CUF_,Q_,DTPP,Z,UE_) +!--------------------------------------------------------------------- +! xi-direction flux differences +!--------------------------------------------------------------------- + do k = 1,problem_size - 2 + do j = 1,problem_size - 2 + do i = 1,problem_size - 2 + zeta = dble (k) * dnzm1 + eta = dble (j) * dnym1 + do z = (-(2)),2 + xi = dble (i + z) * dnxm1 + do m = 1,5 + dtemp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + &+ xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m, + &6) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + & (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + ue_(z,m) = dtemp(m) + enddo + dtpp = 1.0d0 / dtemp(1) + do m = 2,5 + buf_(z,m) = dtpp * dtemp(m) + enddo + cuf_(z) = buf_(z,2) * buf_(z,2) + buf_(z,1) = cuf_(z) + buf_(z,3) * buf_(z,3) + buf_(z,4 + &) * buf_(z,4) + q_(z) = 0.5d0 * (buf_(z,2) * ue_(z,2) + buf_(z,3) * ue + &_(z,3) + buf_(z,4) * ue_(z,4)) + enddo + forcing(1,i,j,k) = forcing(1,i,j,k) - tx2 * (ue_(1,2) - u + &e_((-(1)),2)) + dx1tx1 * (ue_(1,1) - 2.0d0 * ue_(0,1) + ue_((-(1)) + &,1)) + forcing(2,i,j,k) = forcing(2,i,j,k) - tx2 * (ue_(1,2) * b + &uf_(1,2) + c2 * (ue_(1,5) - q_(1)) - (ue_((-(1)),2) * buf_((-(1)), + &2) + c2 * (ue_((-(1)),5) - q_((-(1)))))) + xxcon1 * (buf_(1,2) - 2 + &.0d0 * buf_(0,2) + buf_((-(1)),2)) + dx2tx1 * (ue_(1,2) - 2.0d0 * + &ue_(0,2) + ue_((-(1)),2)) + forcing(3,i,j,k) = forcing(3,i,j,k) - tx2 * (ue_(1,3) * b + &uf_(1,2) - ue_((-(1)),3) * buf_((-(1)),2)) + xxcon2 * (buf_(1,3) - + & 2.0d0 * buf_(0,3) + buf_((-(1)),3)) + dx3tx1 * (ue_(1,3) - 2.0d0 + &* ue_(0,3) + ue_((-(1)),3)) + forcing(4,i,j,k) = forcing(4,i,j,k) - tx2 * (ue_(1,4) * b + &uf_(1,2) - ue_((-(1)),4) * buf_((-(1)),2)) + xxcon2 * (buf_(1,4) - + & 2.0d0 * buf_(0,4) + buf_((-(1)),4)) + dx4tx1 * (ue_(1,4) - 2.0d0 + &* ue_(0,4) + ue_((-(1)),4)) + forcing(5,i,j,k) = forcing(5,i,j,k) - tx2 * (buf_(1,2) * + &(c1 * ue_(1,5) - c2 * q_(1)) - buf_((-(1)),2) * (c1 * ue_((-(1)),5 + &) - c2 * q_((-(1))))) + 0.5d0 * xxcon3 * (buf_(1,1) - 2.0d0 * buf_ + &(0,1) + buf_((-(1)),1)) + xxcon4 * (cuf_(1) - 2.0d0 * cuf_(0) + cu + &f_((-(1)))) + xxcon5 * (buf_(1,5) - 2.0d0 * buf_(0,5) + buf_((-(1) + &),5)) + dx5tx1 * (ue_(1,5) - 2.0d0 * ue_(0,5) + ue_((-(1)),5)) + do m = 1,5 + if (i .eq. 1) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (5.0d0 + & * ue_(0,m) - 4.0d0 * ue_(1,m) + ue_(2,m)) + else if (i .eq. 2) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * ((-(4. + &0d0)) * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_(1,m) + ue_( + &2,m)) + else if (i .eq. problem_size - 3) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_( + &1,m)) + else if (i .eq. problem_size - 2) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 5.0d0 * ue_(0,m)) + else + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_( + &1,m) + ue_(2,m)) + endif + enddo + enddo + enddo + enddo + +! DVM$ PARALLEL (K,J,I) ON FORCING(*,I,J,K), PRIVATE (ZETA,ETA,XI,M,DTEMP +! DVM$&,BUF_,CUF_,Q_,DTPP,Z,UE_) +!--------------------------------------------------------------------- +! eta-direction flux differences +!--------------------------------------------------------------------- + do k = 1,problem_size - 2 + do j = 1,problem_size - 2 + do i = 1,problem_size - 2 + zeta = dble (k) * dnzm1 + xi = dble (i) * dnxm1 + do z = (-(2)),2 + eta = dble (j + z) * dnym1 + do m = 1,5 + dtemp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + &+ xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m, + &6) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + & (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + ue_(z,m) = dtemp(m) + enddo + dtpp = 1.0d0 / dtemp(1) + do m = 2,5 + buf_(z,m) = dtpp * dtemp(m) + enddo + cuf_(z) = buf_(z,3) * buf_(z,3) + buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) + buf_(z,4 + &) * buf_(z,4) + q_(z) = 0.5d0 * (buf_(z,2) * ue_(z,2) + buf_(z,3) * ue + &_(z,3) + buf_(z,4) * ue_(z,4)) + enddo + forcing(1,i,j,k) = forcing(1,i,j,k) - ty2 * (ue_(1,3) - u + &e_((-(1)),3)) + dy1ty1 * (ue_(1,1) - 2.0d0 * ue_(0,1) + ue_((-(1)) + &,1)) + forcing(2,i,j,k) = forcing(2,i,j,k) - ty2 * (ue_(1,2) * b + &uf_(1,3) - ue_((-(1)),2) * buf_((-(1)),3)) + yycon2 * (buf_(1,2) - + & 2.0d0 * buf_(0,2) + buf_((-(1)),2)) + dy2ty1 * (ue_(1,2) - 2.0 * + &ue_(0,2) + ue_((-(1)),2)) + forcing(3,i,j,k) = forcing(3,i,j,k) - ty2 * (ue_(1,3) * b + &uf_(1,3) + c2 * (ue_(1,5) - q_(1)) - (ue_((-(1)),3) * buf_((-(1)), + &3) + c2 * (ue_((-(1)),5) - q_((-(1)))))) + yycon1 * (buf_(1,3) - 2 + &.0d0 * buf_(0,3) + buf_((-(1)),3)) + dy3ty1 * (ue_(1,3) - 2.0d0 * + &ue_(0,3) + ue_((-(1)),3)) + forcing(4,i,j,k) = forcing(4,i,j,k) - ty2 * (ue_(1,4) * b + &uf_(1,3) - ue_((-(1)),4) * buf_((-(1)),3)) + yycon2 * (buf_(1,4) - + & 2.0d0 * buf_(0,4) + buf_((-(1)),4)) + dy4ty1 * (ue_(1,4) - 2.0d0 + &* ue_(0,4) + ue_((-(1)),4)) + forcing(5,i,j,k) = forcing(5,i,j,k) - ty2 * (buf_(1,3) * + &(c1 * ue_(1,5) - c2 * q_(1)) - buf_((-(1)),3) * (c1 * ue_((-(1)),5 + &) - c2 * q_((-(1))))) + 0.5d0 * yycon3 * (buf_(1,1) - 2.0d0 * buf_ + &(0,1) + buf_((-(1)),1)) + yycon4 * (cuf_(1) - 2.0d0 * cuf_(0) + cu + &f_((-(1)))) + yycon5 * (buf_(1,5) - 2.0d0 * buf_(0,5) + buf_((-(1) + &),5)) + dy5ty1 * (ue_(1,5) - 2.0d0 * ue_(0,5) + ue_((-(1)),5)) + do m = 1,5 + if (j .eq. 1) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (5.0d0 + & * ue_(0,m) - 4.0d0 * ue_(1,m) + ue_(2,m)) + else if (j .eq. 2) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * ((-(4. + &0d0)) * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_(1,m) + ue_( + &2,m)) + else if (j .eq. problem_size - 3) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_( + &1,m)) + else if (j .eq. problem_size - 2) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 5.0d0 * ue_(0,m)) + else + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_( + &1,m) + ue_(2,m)) + endif + enddo + enddo + enddo + enddo + +! DVM$ PARALLEL (K,J,I) ON FORCING(*,I,J,K), PRIVATE (ZETA,ETA,XI,M,BUF_, +! DVM$&CUF_,Q_,UE_,DTPP,DTEMP,Z) +!--------------------------------------------------------------------- +! zeta-direction flux differences +!--------------------------------------------------------------------- + do k = 1,problem_size - 2 + do j = 1,problem_size - 2 + do i = 1,problem_size - 2 + xi = dble (i) * dnxm1 + eta = dble (j) * dnym1 + do z = (-(2)),2 + zeta = dble (k + z) * dnzm1 + do m = 1,5 + dtemp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + &+ xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m, + &6) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + & (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + ue_(z,m) = dtemp(m) + enddo + dtpp = 1.0d0 / dtemp(1) + do m = 2,5 + buf_(z,m) = dtpp * dtemp(m) + enddo + cuf_(z) = buf_(z,4) * buf_(z,4) + buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) + buf_(z,3 + &) * buf_(z,3) + q_(z) = 0.5d0 * (buf_(z,2) * ue_(z,2) + buf_(z,3) * ue + &_(z,3) + buf_(z,4) * ue_(z,4)) + enddo + forcing(1,i,j,k) = forcing(1,i,j,k) - tz2 * (ue_(1,4) - u + &e_((-(1)),4)) + dz1tz1 * (ue_(1,1) - 2.0d0 * ue_(0,1) + ue_((-(1)) + &,1)) + forcing(2,i,j,k) = forcing(2,i,j,k) - tz2 * (ue_(1,2) * b + &uf_(1,4) - ue_((-(1)),2) * buf_((-(1)),4)) + zzcon2 * (buf_(1,2) - + & 2.0d0 * buf_(0,2) + buf_((-(1)),2)) + dz2tz1 * (ue_(1,2) - 2.0d0 + &* ue_(0,2) + ue_((-(1)),2)) + forcing(3,i,j,k) = forcing(3,i,j,k) - tz2 * (ue_(1,3) * b + &uf_(1,4) - ue_((-(1)),3) * buf_((-(1)),4)) + zzcon2 * (buf_(1,3) - + & 2.0d0 * buf_(0,3) + buf_((-(1)),3)) + dz3tz1 * (ue_(1,3) - 2.0d0 + &* ue_(0,3) + ue_((-(1)),3)) + forcing(4,i,j,k) = forcing(4,i,j,k) - tz2 * (ue_(1,4) * b + &uf_(1,4) + c2 * (ue_(1,5) - q_(1)) - (ue_((-(1)),4) * buf_((-(1)), + &4) + c2 * (ue_((-(1)),5) - q_((-(1)))))) + zzcon1 * (buf_(1,4) - 2 + &.0d0 * buf_(0,4) + buf_((-(1)),4)) + dz4tz1 * (ue_(1,4) - 2.0d0 * + &ue_(0,4) + ue_((-(1)),4)) + forcing(5,i,j,k) = forcing(5,i,j,k) - tz2 * (buf_(1,4) * + &(c1 * ue_(1,5) - c2 * q_(1)) - buf_((-(1)),4) * (c1 * ue_((-(1)),5 + &) - c2 * q_((-(1))))) + 0.5d0 * zzcon3 * (buf_(1,1) - 2.0d0 * buf_ + &(0,1) + buf_((-(1)),1)) + zzcon4 * (cuf_(1) - 2.0d0 * cuf_(0) + cu + &f_((-(1)))) + zzcon5 * (buf_(1,5) - 2.0d0 * buf_(0,5) + buf_((-(1) + &),5)) + dz5tz1 * (ue_(1,5) - 2.0d0 * ue_(0,5) + ue_((-(1)),5)) + do m = 1,5 + if (k .eq. 1) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (5.0d0 + & * ue_(0,m) - 4.0d0 * ue_(1,m) + ue_(2,m)) + else if (k .eq. 2) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * ((-(4. + &0d0)) * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_(1,m) + ue_( + &2,m)) + else if (k .eq. problem_size - 3) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_( + &1,m)) + else if (k .eq. problem_size - 2) then + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 5.0d0 * ue_(0,m)) + else + forcing(m,i,j,k) = forcing(m,i,j,k) - dssp * (ue_(( + &-(2)),m) - 4.0d0 * ue_((-(1)),m) + 6.0d0 * ue_(0,m) - 4.0d0 * ue_( + &1,m) + ue_(2,m)) + endif + enddo + enddo + enddo + enddo + +! DVM$ PARALLEL (K,J,I) ON FORCING(*,I,J,K), PRIVATE (M) +!--------------------------------------------------------------------- +! now change the sign of the forcing function, +!--------------------------------------------------------------------- + do k = 1,problem_size - 2 + do j = 1,problem_size - 2 + do i = 1,problem_size - 2 + do m = 1,5 + forcing(m,i,j,k) = (-(1.d0)) * forcing(m,i,j,k) + enddo + enddo + enddo + enddo + +! DVM$ END REGION + return + end + diff --git a/exact_solution_bt.for b/exact_solution_bt.for new file mode 100644 index 0000000..04b8d25 --- /dev/null +++ b/exact_solution_bt.for @@ -0,0 +1,25 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +! this function returns the exact solution at point xi, eta, zeta +!--------------------------------------------------------------------- + subroutine exact_solution_bt (xi, eta, zeta, dtemp) + + include 'header3d_bt.h' + double precision :: xi,eta,zeta,dtemp(5) + integer :: m + do m = 1,5 + dtemp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi * (ce(m + &,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) + eta * ( + &ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce(m,7) + + &zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + return + end + diff --git a/header3d_bt.h b/header3d_bt.h new file mode 100644 index 0000000..7e6cee5 --- /dev/null +++ b/header3d_bt.h @@ -0,0 +1,107 @@ +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +! +! header.h +! +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + + implicit none + +!--------------------------------------------------------------------- +! The following include file is generated automatically by the +! "setparams" utility. it defines +! maxcells: the square root of the maximum number of processors +! problem_size: 12, 64, 102, 162 (for class t, a, b, c) +! dt_default: default time step for this problem size if no +! config file +! niter_default: default number of iterations for this problem size +!--------------------------------------------------------------------- + + include 'npbparams_bt.h' + + integer aa, bb, cc, block_size + parameter (aa=1, bb=2, cc=3, block_size=5) + + integer grid_points(3) + double precision elapsed_time + common /global_bt/ elapsed_time, grid_points + + double precision tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3 + double precision dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4 + double precision dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt + double precision ce(5,13), dxmax, dymax, dzmax, xxcon1, xxcon2 + double precision xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1 + double precision dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4 + double precision yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1 + double precision zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1 + double precision dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1 + double precision dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2 + double precision c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1 + double precision dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1 + double precision c2dtty1, c2dttz1, comz1, comz4, comz5, comz6 + double precision c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16 + integer stage_n, bl, r + + common /constants_bt/ tx1,tx2,tx3,ty1,ty2,ty3, tz1,tz2,tz3 + common /constants_bt/ dx1,dx2,dx3,dx4,dx5,dy1, dy2, dy3, dy4 + common /constants_bt/ dy5,dz1,dz2,dz3,dz4,dz5, dssp, dt + common /constants_bt/ ce,dxmax,dymax,dzmax,xxcon1,xxcon2 + common /constants_bt/ xxcon3,xxcon4,xxcon5,dx1tx1,dx2tx1,dx3tx1 + common /constants_bt/ dx4tx1,dx5tx1,yycon1,yycon2,yycon3,yycon4 + common /constants_bt/ yycon5,dy1ty1,dy2ty1,dy3ty1,dy4ty1,dy5ty1 + common /constants_bt/ zzcon1,zzcon2,zzcon3,zzcon4,zzcon5,dz1tz1 + common /constants_bt/ dz2tz1,dz3tz1,dz4tz1,dz5tz1,dnxm1,dnym1 + common /constants_bt/ dnzm1,c1c2,c1c5,c3c4,c1345,conz1, c1, c2 + common /constants_bt/ c3,c4,c5,c4dssp,c5dssp,dtdssp, dttx1 + common /constants_bt/ dttx2,dtty1,dtty2,dttz1,dttz2,c2dttx1 + common /constants_bt/ c2dtty1,c2dttz1,comz1,comz4,comz5,comz6 + common /constants_bt/ c3c4tx3,c3c4ty3,c3c4tz3,c2iv,con43,con16 + common /constants_bt/ stage_n + + integer imax, jmax, kmax + + parameter (imax=problem_size,jmax=problem_size,kmax=problem_size) + parameter (bl=1, r=0) +! +! to improve cache performance, grid dimensions padded by 1 +! for even number sizes only. +! + double precision us(0:imax/2*2, 0:jmax/2*2, 0:kmax/2*2) + double precision vs(0:imax/2*2, 0:jmax/2*2, 0:kmax/2*2) + double precision ws(0:imax/2*2, 0:jmax/2*2, 0:kmax/2*2) + double precision qs(0:imax/2*2, 0:jmax/2*2, 0:kmax/2*2) + double precision rho_i(0:imax/2*2, 0:jmax/2*2, 0:kmax/2*2) + double precision square(0:imax/2*2, 0:jmax/2*2, 0:kmax/2*2) + double precision forcing (5,0:imax/2*2,0:jmax/2*2, 0:kmax/2*2) + double precision u(5,0:(imax+1)/2*2,0:(jmax+1)/2*2,0:(kmax+1)/2*2) + double precision rhs(5,0:imax/2*2, 0:jmax/2*2, 0:kmax/2*2) + double precision lhs__(5,5,0:imax/2*2,0:jmax/2*2,0:kmax/2*2/bl+r) + double precision speed(0:imax/2*2, 0:jmax/2*2, 0:kmax/2*2) + common /fields/ u, us, vs, ws, qs, rho_i, speed, square + common /fields/ rhs, forcing, lhs__ + + double precision cv(-2:problem_size+1) + double precision cuf(-2:problem_size+1), q(-2:problem_size+1) + double precision ue(-2:problem_size+1,5), buf(-2:problem_size+1,5) + common /work_1d_bt/ cv, cuf, q, ue, buf + + double precision tmp1, tmp2, tmp3, tmp11, tmp22 + double precision t1, t2, t3, tm1, tm2, tm3 + + common /work_lhs_bt/ tmp1, tmp2, tmp3, tmp11, tmp22 + common /work_lhs_bt/ t1, t2, t3, tm1, tm2, tm3 + double precision tmp_block(5,5), b_inverse(5,5), tmp_vec(5) + common /work_solve_bt/ tmp_block, b_inverse, tmp_vec +!-------------------------------------------------------------------- +! fdvm specifications +!-------------------------------------------------------------------- + +! dvm$ distribute us (block,block,block) +! dvm$ align (i,j,k) with us(i,j,k) :: vs, ws, qs, rho_i, square +! dvm$ align (*,*,i,j,k) with us(i,j,k) :: lhs__ +! dvm$ align (*,i,j,k) with us(i,j,k) :: u, rhs +! dvm$ align (*,i,j,k) with us(i,j,k) :: forcing + +! dvm$ shadow u(2:2,2:2,2:2,2:2) + diff --git a/header_sp.h b/header_sp.h new file mode 100644 index 0000000..5008cb9 --- /dev/null +++ b/header_sp.h @@ -0,0 +1,120 @@ + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + + implicit none + +c--------------------------------------------------------------------- +c The following include file is generated automatically by the +c "setparams" utility. it defines +c problem_size: 12, 64, 102, 162 (for class t, a, b, c) +c dt_default: default time step for this problem size if no +c config file +c niter_default: default number of iterations for this problem size +c--------------------------------------------------------------------- + + include 'npbparams_sp.h' + + integer grid_points(3), nx2, ny2, nz2,stage_n + common /global/ grid_points, nx2, ny2, nz2, timeron + + double precision tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, + & dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, + & dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, + & ce(5,13), dxmax, dymax, dzmax, xxcon1, xxcon2, + & xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1, + & dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4, + & yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1, + & zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, + & dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, + & dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, + & c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt, + & dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, + & c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, + & c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16 + + common /constants_sp/ tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, + & dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, + & dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, + & ce, dxmax, dymax, dzmax, xxcon1, xxcon2, + & xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1, + & dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4, + & yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1, + & zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, + & dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, + & dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, + & c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt, + & dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, + & c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, + & c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16, + & stage_n + + + integer imax, jmax, kmax, imaxp, jmaxp + + parameter (imax=problem_size,jmax=problem_size,kmax=problem_size) + parameter (imaxp=imax/2*2,jmaxp=jmax/2*2) + +c--------------------------------------------------------------------- +c to improve cache performance, first two dimensions padded by 1 +c for even number sizes only +c--------------------------------------------------------------------- + double precision + & u (5, 0:imaxp, 0:jmaxp, 0:kmax), + & us ( 0:imaxp, 0:jmaxp, 0:kmax), + & vs ( 0:imaxp, 0:jmaxp, 0:kmax), + & ws ( 0:imaxp, 0:jmaxp, 0:kmax), + & qs ( 0:imaxp, 0:jmaxp, 0:kmax), + & rho_i ( 0:imaxp, 0:jmaxp, 0:kmax), + & speed ( 0:imaxp, 0:jmaxp, 0:kmax), + & square ( 0:imaxp, 0:jmaxp, 0:kmax), + & rhs (5, 0:imaxp, 0:jmaxp, 0:kmax), + & forcing (5, 0:imaxp, 0:jmaxp, 0:kmax) + + common /fields/ u, us, vs, ws, qs, rho_i, speed, square, + & rhs, forcing + + double precision cv(0:problem_size-1), rhon(0:problem_size-1), + & rhos(0:problem_size-1), rhoq(0:problem_size-1), + & cuf(0:problem_size-1), q(0:problem_size-1), + & ue(0:problem_size-1,5), buf(0:problem_size-1,5), + & rhon_(0:problem_size-1,0:problem_size-1), + & cv_(0:problem_size-1,0:problem_size-1) + common /work_1d_sp/ cv,rhon,rhos,rhoq, cuf, q, ue, buf,rhon_,cv_ + + double precision + & lhs(0:2,1:5,0:imaxp, 0:jmaxp, 0:kmax) + common /work_lhs_sp/ lhs + +c----------------------------------------------------------------------- +c timer constants +c----------------------------------------------------------------------- + integer t_rhsx,t_rhsy,t_rhsz,t_xsolve,t_ysolve,t_zsolve, + & t_rdis1,t_rdis2,t_tzetar,t_ninvr,t_pinvr,t_add, + & t_rhs,t_txinvr,t_last,t_total + logical timeron + parameter (t_total = 1) + parameter (t_rhsx = 2) + parameter (t_rhsy = 3) + parameter (t_rhsz = 4) + parameter (t_rhs = 5) + parameter (t_xsolve = 6) + parameter (t_ysolve = 7) + parameter (t_zsolve = 8) + parameter (t_rdis1 = 9) + parameter (t_rdis2 = 10) + parameter (t_txinvr = 11) + parameter (t_pinvr = 12) + parameter (t_ninvr = 13) + parameter (t_tzetar = 14) + parameter (t_add = 15) + parameter (t_last = 15) + +! dvm$ shadow lhs(0:0,0:0,2:2,2:2,2:2) +! dvm$ shadow (0:0,2:3,2:3,2:3) :: rhs,forcing,u +! dvm$ shadow (2:3,2:3,2:3) :: qs,us,ws,vs,speed,square,rho_i + +! dvm$ distribute u(*,block,block,block) +! dvm$ align (*,i,j,k) with u(*,i,j,k) :: forcing,rhs +! dvm$ align (*,*,i,j,k) with u(*,i,j,k) :: lhs +! dvm$ align (i,j,k) with u(*,i,j,k) :: square,speed,rho_i,qs,ws,vs,us diff --git a/initialize_bt.for b/initialize_bt.for new file mode 100644 index 0000000..b58b1ca --- /dev/null +++ b/initialize_bt.for @@ -0,0 +1,186 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +! This subroutine initializes_bt the field variable u using +! tri-linear transfinite interpolation of the boundary values +!--------------------------------------------------------------------- + subroutine initialize_bt () + + include 'header3d_bt.h' + integer :: i,j,k,m,ix,iy,iz + double precision :: xi,eta,zeta,pface(5,3,2),pxi,peta,pzeta,temp( + &5),xi1,yi1,zi1 + xi = 0.0 + eta = 0.0 + zeta = 0.0 + + +!$SPF PARALLEL_REG r0 +! DVM$ PARALLEL (K,J,I) ON U(*,I,J,K), SHADOW_COMPUTE ,PRIVATE (M) +! DVM$ REGION OUT (U) +!--------------------------------------------------------------------- +! Later (in compute_rhs) we compute 1/u for every element. A few of +! the corner elements are not used, but it convenient (and faster) +! to compute the whole thing with a simple loop. Make sure those +! values are nonzero by initializing the whole thing here. +!--------------------------------------------------------------------- + do k = 0,imax - 1 + do j = 0,imax - 1 + do i = 0,imax - 1 + do m = 1,5 + u(m,i,j,k) = 1.0 + enddo + enddo + enddo + enddo + +!$SPF ANALYSIS(PRIVATE(temp, pface)) +! DVM$ PARALLEL (K,J,I) ON U(*,I,J,K), PRIVATE (M,ZETA,ETA,XI,IX,IY,IZ,PX +! DVM$&I,PETA,PZETA,PFACE,XI1,YI1,ZI1,TEMP),SHADOW_COMPUTE + do k = 0,grid_points(3) - 1 + do j = 0,grid_points(2) - 1 + do i = 0,grid_points(1) - 1 + zeta = dble (k) * dnzm1 + eta = dble (j) * dnym1 + xi = dble (i) * dnxm1 + do ix = 1,2 + +! call exact_solution_bt(dble(ix-1), eta, zeta, Pface(1,1,ix)) + xi1 = dble (ix - 1) + do m = 1,5 + pface(m,1,ix) = ce(m,1) + xi1 * (ce(m,2) + xi1 * (c + &e(m,5) + xi1 * (ce(m,8) + xi1 * ce(m,11)))) + eta * (ce(m,3) + eta + & * (ce(m,6) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + & + zeta * (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + enddo + do iy = 1,2 + +! call exact_solution_bt(xi, dble(iy-1) , zeta, Pface(1,2,iy)) + yi1 = dble (iy - 1) + do m = 1,5 + pface(m,2,iy) = ce(m,1) + xi * (ce(m,2) + xi * (ce( + &m,5) + xi * (ce(m,8) + xi * ce(m,11)))) + yi1 * (ce(m,3) + yi1 * ( + &ce(m,6) + yi1 * (ce(m,9) + yi1 * ce(m,12)))) + zeta * (ce(m,4) + z + &eta * (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + enddo + do iz = 1,2 + +! call exact_solution_bt(xi, eta, dble(iz-1), Pface(1,3,iz)) + zi1 = dble (iz - 1) + do m = 1,5 + pface(m,3,iz) = ce(m,1) + xi * (ce(m,2) + xi * (ce( + &m,5) + xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * ( + &ce(m,6) + eta * (ce(m,9) + eta * ce(m,12)))) + zi1 * (ce(m,4) + zi + &1 * (ce(m,7) + zi1 * (ce(m,10) + zi1 * ce(m,13)))) + enddo + enddo + do m = 1,5 + pxi = xi * pface(m,1,2) + (1.0d0 - xi) * pface(m,1,1) + peta = eta * pface(m,2,2) + (1.0d0 - eta) * pface(m,2, + &1) + pzeta = zeta * pface(m,3,2) + (1.0d0 - zeta) * pface(m + &,3,1) + u(m,i,j,k) = pxi + peta + pzeta - pxi * peta - pxi * p + &zeta - peta * pzeta + pxi * peta * pzeta + enddo + if (i .eq. 0) then + do m = 1,5 + temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + u(m,i,j,k) = temp(m) + enddo + endif + if (i .eq. grid_points(1) - 1) then + xi = 1.0d0 + do m = 1,5 + temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + u(m,i,j,k) = temp(m) + enddo + endif + if (j .eq. 0) then + zeta = dble (k) * dnzm1 + xi = dble (i) * dnxm1 + eta = 0.0d0 + do m = 1,5 + temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + u(m,i,j,k) = temp(m) + enddo + endif + if (j .eq. grid_points(2) - 1) then + zeta = dble (k) * dnzm1 + xi = dble (i) * dnxm1 + eta = 1.0d0 + +! call exact_solution_bt(xi, eta, zeta, temp) + do m = 1,5 + temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + u(m,i,j,k) = temp(m) + enddo + endif + if (k .eq. 0) then + zeta = 0.0d0 + xi = dble (i) * dnxm1 + eta = dble (j) * dnym1 + +! call exact_solution_bt(xi, eta, zeta, temp) + do m = 1,5 + temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + u(m,i,j,k) = temp(m) + enddo + endif + if (k .eq. grid_points(3) - 1) then + zeta = 1.0d0 + xi = dble (i) * dnxm1 + eta = dble (j) * dnym1 + +! call exact_solution_bt(xi, eta, zeta, temp) + do m = 1,5 + temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + u(m,i,j,k) = temp(m) + enddo + endif + enddo + enddo + enddo +!$SPF END PARALLEL_REG +! DVM$ END REGION + return + end + diff --git a/initialize_sp.for b/initialize_sp.for new file mode 100644 index 0000000..2865bf1 --- /dev/null +++ b/initialize_sp.for @@ -0,0 +1,163 @@ + +! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine initialize_sp () + + include 'header_sp.h' + integer :: i,j,k,m,ix,iy,iz + double precision :: xi,eta,zeta,pface(5,3,2),pxi,peta,pzeta,temp( + &5) + +! DVM$ PARALLEL (K,J,I) ON U(*,I,J,K), PRIVATE (ZETA,ETA,XI,IX,PXI,M,PFAC +! DVM$&E,IY,PETA,IZ,PZETA,TEMP) +! DVM$ REGION + do k = 0,problem_size - 1 + do j = 0,problem_size - 1 + do i = 0,problem_size - 1 + u(1,i,j,k) = 1.0 + u(2,i,j,k) = 0.0 + u(3,i,j,k) = 0.0 + u(4,i,j,k) = 0.0 + u(5,i,j,k) = 1.0 + zeta = dble (k) * dnzm1 + eta = dble (j) * dnym1 + xi = dble (i) * dnxm1 + do ix = 1,2 + pxi = dble (ix - 1) + do m = 1,5 + pface(m,1,ix) = ce(m,1) + pxi * (ce(m,2) + pxi * (c + &e(m,5) + pxi * (ce(m,8) + pxi * ce(m,11)))) + eta * (ce(m,3) + eta + & * (ce(m,6) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + & + zeta * (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + enddo + do iy = 1,2 + peta = dble (iy - 1) + do m = 1,5 + pface(m,2,iy) = ce(m,1) + xi * (ce(m,2) + xi * (ce( + &m,5) + xi * (ce(m,8) + xi * ce(m,11)))) + peta * (ce(m,3) + peta * + & (ce(m,6) + peta * (ce(m,9) + peta * ce(m,12)))) + zeta * (ce(m,4) + & + zeta * (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + enddo + do iz = 1,2 + pzeta = dble (iz - 1) + do m = 1,5 + pface(m,3,iz) = ce(m,1) + xi * (ce(m,2) + xi * (ce( + &m,5) + xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * ( + &ce(m,6) + eta * (ce(m,9) + eta * ce(m,12)))) + pzeta * (ce(m,4) + + &pzeta * (ce(m,7) + pzeta * (ce(m,10) + pzeta * ce(m,13)))) + enddo + enddo + do m = 1,5 + pxi = xi * pface(m,1,2) + (1.0d0 - xi) * pface(m,1,1) + peta = eta * pface(m,2,2) + (1.0d0 - eta) * pface(m,2, + &1) + pzeta = zeta * pface(m,3,2) + (1.0d0 - zeta) * pface(m + &,3,1) + u(m,i,j,k) = pxi + peta + pzeta - pxi * peta - pxi * p + &zeta - peta * pzeta + pxi * peta * pzeta + enddo + zeta = dble (k) * dnzm1 + eta = dble (j) * dnym1 + xi = 0.0d0 + if (i .eq. 0) then + +! call exact_solution_sp(xi, eta, zeta, temp) + do m = 1,5 + temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + u(m,i,j,k) = temp(m) + enddo + endif + xi = 1.0d0 + if (i .eq. problem_size - 1) then + +! call exact_solution_sp(xi, eta, zeta, temp) + do m = 1,5 + temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + u(m,i,j,k) = temp(m) + enddo + endif + zeta = dble (k) * dnzm1 + eta = 0.0d0 + xi = dble (i) * dnxm1 + if (j .eq. 0) then + +! call exact_solution_sp(xi, eta, zeta, temp) + do m = 1,5 + temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + u(m,i,j,k) = temp(m) + enddo + endif + eta = 1.0d0 + if (j .eq. problem_size - 1) then + +! call exact_solution_sp(xi, eta, zeta, temp) + do m = 1,5 + temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + u(m,i,j,k) = temp(m) + enddo + endif + zeta = 0.0d0 + eta = dble (j) * dnym1 + xi = dble (i) * dnxm1 + if (k .eq. 0) then + +! call exact_solution_sp(xi, eta, zeta, temp) + do m = 1,5 + temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + u(m,i,j,k) = temp(m) + enddo + endif + zeta = 1.0d0 + if (k .eq. problem_size - 1) then + +! call exact_solution_sp(xi, eta, zeta, temp) + do m = 1,5 + temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + + & xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6 + &) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * + &(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13)))) + enddo + do m = 1,5 + u(m,i,j,k) = temp(m) + enddo + endif + enddo + enddo + enddo + +! DVM$ END REGION + return + end + diff --git a/npbparams_bt.h b/npbparams_bt.h new file mode 100644 index 0000000..bb1f66f --- /dev/null +++ b/npbparams_bt.h @@ -0,0 +1,31 @@ +! class = c +! +! +! this file is generated automatically by the setparams utility. +! it sets the number of processors and the class of the npb +! in this directory. do not modify it by hand. +! + integer problem_size, niter_default + parameter (problem_size=162, niter_default=200) + double precision dt_default + parameter (dt_default = 0.0001d0) + logical convertdouble + parameter (convertdouble = .false.) + character compiletime*11 + parameter (compiletime='29 apr 2025') + character npbversion*5 + parameter (npbversion='3.3.1') + character cs1*3 + parameter (cs1='dvm') + character cs2*3 + parameter (cs2='dvm') + character cs3*6 + parameter (cs3='(none)') + character cs4*6 + parameter (cs4='(none)') + character cs5*7 + parameter (cs5='${fopt}') + character cs6*6 + parameter (cs6='(none)') + character cs7*6 + parameter (cs7='(none)') diff --git a/npbparams_sp.h b/npbparams_sp.h new file mode 100644 index 0000000..2303491 --- /dev/null +++ b/npbparams_sp.h @@ -0,0 +1,31 @@ +! class = c +! +! +! this file is generated automatically by the setparams utility. +! it sets the number of processors and the class of the npb +! in this directory. do not modify it by hand. +! + integer problem_size, niter_default + parameter (problem_size=162, niter_default=400) + double precision dt_default + parameter (dt_default = 0.00067d0) + logical convertdouble + parameter (convertdouble = .false.) + character compiletime*11 + parameter (compiletime='29 apr 2025') + character npbversion*5 + parameter (npbversion='3.3.1') + character cs1*3 + parameter (cs1='dvm') + character cs2*3 + parameter (cs2='dvm') + character cs3*6 + parameter (cs3='(none)') + character cs4*6 + parameter (cs4='(none)') + character cs5*7 + parameter (cs5='${fopt}') + character cs6*6 + parameter (cs6='(none)') + character cs7*6 + parameter (cs7='(none)') diff --git a/print_result_bt.for b/print_result_bt.for new file mode 100644 index 0000000..37d9ea7 --- /dev/null +++ b/print_result_bt.for @@ -0,0 +1,63 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + subroutine print_results_bt (name,class,n1,n2,n3,niter,t,mops, + & optype, verified, npbversion) + +! , compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7) + implicit none + character*2 :: name + character*1 :: class + integer :: n1,n2,n3,niter,j + double precision :: t,mops + character :: optype*24,size*13 + logical :: verified + character*(*) :: npbversion + +! , compiletime,cs1, cs2, cs3, cs4, cs5, cs6, cs7 + write (unit = *,fmt = 2) name +2 format(//, ' ', A2, ' Benchmark Completed.') + write (unit = *,fmt = 3) class +3 format(' Class = ', 12x, a12) + +! If this is not a grid-based problem (EP, FT, CG), then +! we only print n1, which contains some measure of the +! problem size. In that case, n2 and n3 are both zero. +! Otherwise, we print the grid size n1xn2xn3 + if (n2 .eq. 0 .and. n3 .eq. 0) then + if (name(1:2) .eq. 'EP') then + write (unit = size,fmt = '(f12.0)') 2.d0** n1 + do j = 13,1,(-(1)) + if (size(j:j) .eq. '.') size(j:j) = ' ' + enddo + write (unit = *,fmt = 42) size +42 format(' Size = ',12x, a14) + else + write (unit = *,fmt = 44) n1 +44 format(' Size = ',12x, i12) + endif + else + write (unit = *,fmt = 4) n1,n2,n3 +4 format(' Size = ',12x, i3,'x',i3,'x',i3) + endif + write (unit = *,fmt = 5) niter +5 format(' Iterations = ', 12x, i12) + write (unit = *,fmt = 6) t +6 format(' Time in seconds = ',12x, f12.2) + write (unit = *,fmt = 9) mops +9 format(' Mop/s total = ',12x, f12.2) + write (unit = *,fmt = 11) optype +11 format(' Operation type = ', a24) + if (verified) then + write (unit = *,fmt = 12) ' SUCCESSFUL' + else + write (unit = *,fmt = 12) 'UNSUCCESSFUL' + endif +12 format(' Verification = ', 12x, a) + write (unit = *,fmt = 13) npbversion +13 format(' Version = ', 12x, a12) + return + end + diff --git a/print_result_sp.for b/print_result_sp.for new file mode 100644 index 0000000..054fe8d --- /dev/null +++ b/print_result_sp.for @@ -0,0 +1,101 @@ + +! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + subroutine print_results_sp (name,class,n1,n2,n3,niter,t,mops, + & optype, verified, npbversion, compiletime, cs1, cs2, cs3, cs4, cs + &5, cs6, cs7) + implicit none + character :: name*(*) + character :: class*1 + integer :: n1,n2,n3,niter,j + double precision :: t,mops + character :: optype*24,size*15 + logical :: verified + character*(*) :: npbversion,compiletime,cs1,cs2,cs3,cs4,cs5,cs6,c + &s7 + integer :: num_threads,max_threads,i + max_threads = 1 + num_threads = 1 + write (unit = *,fmt = 2) name +2 format(//, ' ', A, ' Benchmark Completed.') + write (unit = *,fmt = 3) class +3 format(' Class = ', 12x, a12) + +! If this is not a grid-based problem (EP, FT, CG), then +! we only print n1, which contains some measure of the +! problem size. In that case, n2 and n3 are both zero. +! Otherwise, we print the grid size n1xn2xn3 + if (n2 .eq. 0 .and. n3 .eq. 0) then + if (name(1:2) .eq. 'EP') then + write (unit = size,fmt = '(f15.0)') 2.d0** n1 + j = 15 + if (size(j:j) .eq. '.') j = j - 1 + write (unit = *,fmt = 42) size(1:j) +42 format(' Size = ',9x, a15) + else + write (unit = *,fmt = 44) n1 +44 format(' Size = ',12x, i12) + endif + else + write (unit = *,fmt = 4) n1,n2,n3 +4 format(' Size = ',9x, i4,'x',i4,'x',i4) + endif + write (unit = *,fmt = 5) niter +5 format(' Iterations = ', 12x, i12) + write (unit = *,fmt = 6) t +6 format(' Time in seconds = ',12x, f12.2) + write (unit = *,fmt = 7) num_threads +7 format(' Total threads = ', 12x, i12) + write (unit = *,fmt = 8) max_threads +8 format(' Avail threads = ', 12x, i12) + if (num_threads .ne. max_threads) write (unit = *,fmt = 88) +88 format(' Warning: Threads used differ from threads available') + write (unit = *,fmt = 9) mops +9 format(' Mop/s total = ',12x, f12.2) + write (unit = *,fmt = 10) mops / float (num_threads) +10 format(' Mop/s/thread = ', 12x, f12.2) + write (unit = *,fmt = 11) optype +11 format(' Operation type = ', a24) + if (verified) then + write (unit = *,fmt = 12) ' SUCCESSFUL' + else + write (unit = *,fmt = 12) 'UNSUCCESSFUL' + endif +12 format(' Verification = ', 12x, a) + write (unit = *,fmt = 13) npbversion +13 format(' Version = ', 12x, a12) + write (unit = *,fmt = 14) compiletime +14 format(' Compile date = ', 12x, a12) + write (unit = *,fmt = 121) cs1 +121 format(/, ' Compile options:', /, ' F77 = + & ', A) + write (unit = *,fmt = 122) cs2 +122 format(' FLINK = ', A) + write (unit = *,fmt = 123) cs3 +123 format(' F_LIB = ', A) + write (unit = *,fmt = 124) cs4 +124 format(' F_INC = ', A) + write (unit = *,fmt = 125) cs5 +125 format(' FFLAGS = ', A) + write (unit = *,fmt = 126) cs6 +126 format(' FLINKFLAGS = ', A) + write (unit = *,fmt = 127) cs7 +127 format(' RAND = ', A) + write (unit = *,fmt = 130) +130 format(//' Please send all errors/feedbacks to:'// ' + & NPB Development Team'/ ' npb@nas.nasa.gov'//) + +! 130 format(//' Please send the results of this run to:'// +! > ' NPB Development Team '/ +! > ' Internet: npb@nas.nasa.gov'/ +! > ' '/ +! > ' If email is not available, send this to:'// +! > ' MS T27A-1'/ +! > ' NASA Ames Research Center'/ +! > ' Moffett Field, CA 94035-1000'// +! > ' Fax: 650-604-3957'//) + return + end + diff --git a/set_constants_bt.for b/set_constants_bt.for new file mode 100644 index 0000000..6394718 --- /dev/null +++ b/set_constants_bt.for @@ -0,0 +1,172 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine set_constants_bt () + + include 'header3d_bt.h' + ce(1,1) = 2.0d0 + ce(1,2) = 0.0d0 + ce(1,3) = 0.0d0 + ce(1,4) = 4.0d0 + ce(1,5) = 5.0d0 + ce(1,6) = 3.0d0 + ce(1,7) = 0.5d0 + ce(1,8) = 0.02d0 + ce(1,9) = 0.01d0 + ce(1,10) = 0.03d0 + ce(1,11) = 0.5d0 + ce(1,12) = 0.4d0 + ce(1,13) = 0.3d0 + ce(2,1) = 1.0d0 + ce(2,2) = 0.0d0 + ce(2,3) = 0.0d0 + ce(2,4) = 0.0d0 + ce(2,5) = 1.0d0 + ce(2,6) = 2.0d0 + ce(2,7) = 3.0d0 + ce(2,8) = 0.01d0 + ce(2,9) = 0.03d0 + ce(2,10) = 0.02d0 + ce(2,11) = 0.4d0 + ce(2,12) = 0.3d0 + ce(2,13) = 0.5d0 + ce(3,1) = 2.0d0 + ce(3,2) = 2.0d0 + ce(3,3) = 0.0d0 + ce(3,4) = 0.0d0 + ce(3,5) = 0.0d0 + ce(3,6) = 2.0d0 + ce(3,7) = 3.0d0 + ce(3,8) = 0.04d0 + ce(3,9) = 0.03d0 + ce(3,10) = 0.05d0 + ce(3,11) = 0.3d0 + ce(3,12) = 0.5d0 + ce(3,13) = 0.4d0 + ce(4,1) = 2.0d0 + ce(4,2) = 2.0d0 + ce(4,3) = 0.0d0 + ce(4,4) = 0.0d0 + ce(4,5) = 0.0d0 + ce(4,6) = 2.0d0 + ce(4,7) = 3.0d0 + ce(4,8) = 0.03d0 + ce(4,9) = 0.05d0 + ce(4,10) = 0.04d0 + ce(4,11) = 0.2d0 + ce(4,12) = 0.1d0 + ce(4,13) = 0.3d0 + ce(5,1) = 5.0d0 + ce(5,2) = 4.0d0 + ce(5,3) = 3.0d0 + ce(5,4) = 2.0d0 + ce(5,5) = 0.1d0 + ce(5,6) = 0.4d0 + ce(5,7) = 0.3d0 + ce(5,8) = 0.05d0 + ce(5,9) = 0.04d0 + ce(5,10) = 0.03d0 + ce(5,11) = 0.1d0 + ce(5,12) = 0.3d0 + ce(5,13) = 0.2d0 + c1 = 1.4d0 + c2 = 0.4d0 + c3 = 0.1d0 + c4 = 1.0d0 + c5 = 1.4d0 + dnxm1 = 1.0d0 / dble (grid_points(1) - 1) + dnym1 = 1.0d0 / dble (grid_points(2) - 1) + dnzm1 = 1.0d0 / dble (grid_points(3) - 1) + c1c2 = c1 * c2 + c1c5 = c1 * c5 + c3c4 = c3 * c4 + c1345 = c1c5 * c3c4 + conz1 = 1.0d0 - c1c5 + tx1 = 1.0d0 / (dnxm1 * dnxm1) + tx2 = 1.0d0 / (2.0d0 * dnxm1) + tx3 = 1.0d0 / dnxm1 + ty1 = 1.0d0 / (dnym1 * dnym1) + ty2 = 1.0d0 / (2.0d0 * dnym1) + ty3 = 1.0d0 / dnym1 + tz1 = 1.0d0 / (dnzm1 * dnzm1) + tz2 = 1.0d0 / (2.0d0 * dnzm1) + tz3 = 1.0d0 / dnzm1 + dx1 = 0.75d0 + dx2 = 0.75d0 + dx3 = 0.75d0 + dx4 = 0.75d0 + dx5 = 0.75d0 + dy1 = 0.75d0 + dy2 = 0.75d0 + dy3 = 0.75d0 + dy4 = 0.75d0 + dy5 = 0.75d0 + dz1 = 1.0d0 + dz2 = 1.0d0 + dz3 = 1.0d0 + dz4 = 1.0d0 + dz5 = 1.0d0 + dxmax = dmax1 (dx3,dx4) + dymax = dmax1 (dy2,dy4) + dzmax = dmax1 (dz2,dz3) + dssp = 0.25d0 * dmax1 (dx1,dmax1 (dy1,dz1)) + c4dssp = 4.0d0 * dssp + c5dssp = 5.0d0 * dssp + dttx1 = dt * tx1 + dttx2 = dt * tx2 + dtty1 = dt * ty1 + dtty2 = dt * ty2 + dttz1 = dt * tz1 + dttz2 = dt * tz2 + c2dttx1 = 2.0d0 * dttx1 + c2dtty1 = 2.0d0 * dtty1 + c2dttz1 = 2.0d0 * dttz1 + dtdssp = dt * dssp + comz1 = dtdssp + comz4 = 4.0d0 * dtdssp + comz5 = 5.0d0 * dtdssp + comz6 = 6.0d0 * dtdssp + c3c4tx3 = c3c4 * tx3 + c3c4ty3 = c3c4 * ty3 + c3c4tz3 = c3c4 * tz3 + dx1tx1 = dx1 * tx1 + dx2tx1 = dx2 * tx1 + dx3tx1 = dx3 * tx1 + dx4tx1 = dx4 * tx1 + dx5tx1 = dx5 * tx1 + dy1ty1 = dy1 * ty1 + dy2ty1 = dy2 * ty1 + dy3ty1 = dy3 * ty1 + dy4ty1 = dy4 * ty1 + dy5ty1 = dy5 * ty1 + dz1tz1 = dz1 * tz1 + dz2tz1 = dz2 * tz1 + dz3tz1 = dz3 * tz1 + dz4tz1 = dz4 * tz1 + dz5tz1 = dz5 * tz1 + c2iv = 2.5d0 + con43 = 4.0d0 / 3.0d0 + con16 = 1.0d0 / 6.0d0 + xxcon1 = c3c4tx3 * con43 * tx3 + xxcon2 = c3c4tx3 * tx3 + xxcon3 = c3c4tx3 * conz1 * tx3 + xxcon4 = c3c4tx3 * con16 * tx3 + xxcon5 = c3c4tx3 * c1c5 * tx3 + yycon1 = c3c4ty3 * con43 * ty3 + yycon2 = c3c4ty3 * ty3 + yycon3 = c3c4ty3 * conz1 * ty3 + yycon4 = c3c4ty3 * con16 * ty3 + yycon5 = c3c4ty3 * c1c5 * ty3 + zzcon1 = c3c4tz3 * con43 * tz3 + zzcon2 = c3c4tz3 * tz3 + zzcon3 = c3c4tz3 * conz1 * tz3 + zzcon4 = c3c4tz3 * con16 * tz3 + zzcon5 = c3c4tz3 * c1c5 * tz3 + return + end + diff --git a/set_constants_sp.for b/set_constants_sp.for new file mode 100644 index 0000000..7caf3fc --- /dev/null +++ b/set_constants_sp.for @@ -0,0 +1,173 @@ + +! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine set_constants_sp () + + include 'header_sp.h' + ce(1,1) = 2.0d0 + ce(1,2) = 0.0d0 + ce(1,3) = 0.0d0 + ce(1,4) = 4.0d0 + ce(1,5) = 5.0d0 + ce(1,6) = 3.0d0 + ce(1,7) = 0.5d0 + ce(1,8) = 0.02d0 + ce(1,9) = 0.01d0 + ce(1,10) = 0.03d0 + ce(1,11) = 0.5d0 + ce(1,12) = 0.4d0 + ce(1,13) = 0.3d0 + ce(2,1) = 1.0d0 + ce(2,2) = 0.0d0 + ce(2,3) = 0.0d0 + ce(2,4) = 0.0d0 + ce(2,5) = 1.0d0 + ce(2,6) = 2.0d0 + ce(2,7) = 3.0d0 + ce(2,8) = 0.01d0 + ce(2,9) = 0.03d0 + ce(2,10) = 0.02d0 + ce(2,11) = 0.4d0 + ce(2,12) = 0.3d0 + ce(2,13) = 0.5d0 + ce(3,1) = 2.0d0 + ce(3,2) = 2.0d0 + ce(3,3) = 0.0d0 + ce(3,4) = 0.0d0 + ce(3,5) = 0.0d0 + ce(3,6) = 2.0d0 + ce(3,7) = 3.0d0 + ce(3,8) = 0.04d0 + ce(3,9) = 0.03d0 + ce(3,10) = 0.05d0 + ce(3,11) = 0.3d0 + ce(3,12) = 0.5d0 + ce(3,13) = 0.4d0 + ce(4,1) = 2.0d0 + ce(4,2) = 2.0d0 + ce(4,3) = 0.0d0 + ce(4,4) = 0.0d0 + ce(4,5) = 0.0d0 + ce(4,6) = 2.0d0 + ce(4,7) = 3.0d0 + ce(4,8) = 0.03d0 + ce(4,9) = 0.05d0 + ce(4,10) = 0.04d0 + ce(4,11) = 0.2d0 + ce(4,12) = 0.1d0 + ce(4,13) = 0.3d0 + ce(5,1) = 5.0d0 + ce(5,2) = 4.0d0 + ce(5,3) = 3.0d0 + ce(5,4) = 2.0d0 + ce(5,5) = 0.1d0 + ce(5,6) = 0.4d0 + ce(5,7) = 0.3d0 + ce(5,8) = 0.05d0 + ce(5,9) = 0.04d0 + ce(5,10) = 0.03d0 + ce(5,11) = 0.1d0 + ce(5,12) = 0.3d0 + ce(5,13) = 0.2d0 + c1 = 1.4d0 + c2 = 0.4d0 + c3 = 0.1d0 + c4 = 1.0d0 + c5 = 1.4d0 + bt = dsqrt (0.5d0) + dnxm1 = 1.0d0 / dble (problem_size - 1) + dnym1 = 1.0d0 / dble (problem_size - 1) + dnzm1 = 1.0d0 / dble (problem_size - 1) + c1c2 = c1 * c2 + c1c5 = c1 * c5 + c3c4 = c3 * c4 + c1345 = c1c5 * c3c4 + conz1 = 1.0d0 - c1c5 + tx1 = 1.0d0 / (dnxm1 * dnxm1) + tx2 = 1.0d0 / (2.0d0 * dnxm1) + tx3 = 1.0d0 / dnxm1 + ty1 = 1.0d0 / (dnym1 * dnym1) + ty2 = 1.0d0 / (2.0d0 * dnym1) + ty3 = 1.0d0 / dnym1 + tz1 = 1.0d0 / (dnzm1 * dnzm1) + tz2 = 1.0d0 / (2.0d0 * dnzm1) + tz3 = 1.0d0 / dnzm1 + dx1 = 0.75d0 + dx2 = 0.75d0 + dx3 = 0.75d0 + dx4 = 0.75d0 + dx5 = 0.75d0 + dy1 = 0.75d0 + dy2 = 0.75d0 + dy3 = 0.75d0 + dy4 = 0.75d0 + dy5 = 0.75d0 + dz1 = 1.0d0 + dz2 = 1.0d0 + dz3 = 1.0d0 + dz4 = 1.0d0 + dz5 = 1.0d0 + dxmax = dmax1 (dx3,dx4) + dymax = dmax1 (dy2,dy4) + dzmax = dmax1 (dz2,dz3) + dssp = 0.25d0 * dmax1 (dx1,dmax1 (dy1,dz1)) + c4dssp = 4.0d0 * dssp + c5dssp = 5.0d0 * dssp + dttx1 = dt * tx1 + dttx2 = dt * tx2 + dtty1 = dt * ty1 + dtty2 = dt * ty2 + dttz1 = dt * tz1 + dttz2 = dt * tz2 + c2dttx1 = 2.0d0 * dttx1 + c2dtty1 = 2.0d0 * dtty1 + c2dttz1 = 2.0d0 * dttz1 + dtdssp = dt * dssp + comz1 = dtdssp + comz4 = 4.0d0 * dtdssp + comz5 = 5.0d0 * dtdssp + comz6 = 6.0d0 * dtdssp + c3c4tx3 = c3c4 * tx3 + c3c4ty3 = c3c4 * ty3 + c3c4tz3 = c3c4 * tz3 + dx1tx1 = dx1 * tx1 + dx2tx1 = dx2 * tx1 + dx3tx1 = dx3 * tx1 + dx4tx1 = dx4 * tx1 + dx5tx1 = dx5 * tx1 + dy1ty1 = dy1 * ty1 + dy2ty1 = dy2 * ty1 + dy3ty1 = dy3 * ty1 + dy4ty1 = dy4 * ty1 + dy5ty1 = dy5 * ty1 + dz1tz1 = dz1 * tz1 + dz2tz1 = dz2 * tz1 + dz3tz1 = dz3 * tz1 + dz4tz1 = dz4 * tz1 + dz5tz1 = dz5 * tz1 + c2iv = 2.5d0 + con43 = 4.0d0 / 3.0d0 + con16 = 1.0d0 / 6.0d0 + xxcon1 = c3c4tx3 * con43 * tx3 + xxcon2 = c3c4tx3 * tx3 + xxcon3 = c3c4tx3 * conz1 * tx3 + xxcon4 = c3c4tx3 * con16 * tx3 + xxcon5 = c3c4tx3 * c1c5 * tx3 + yycon1 = c3c4ty3 * con43 * ty3 + yycon2 = c3c4ty3 * ty3 + yycon3 = c3c4ty3 * conz1 * ty3 + yycon4 = c3c4ty3 * con16 * ty3 + yycon5 = c3c4ty3 * c1c5 * ty3 + zzcon1 = c3c4tz3 * con43 * tz3 + zzcon2 = c3c4tz3 * tz3 + zzcon3 = c3c4tz3 * conz1 * tz3 + zzcon4 = c3c4tz3 * con16 * tz3 + zzcon5 = c3c4tz3 * c1c5 * tz3 + return + end + diff --git a/sp.for b/sp.for new file mode 100644 index 0000000..ad5d9de --- /dev/null +++ b/sp.for @@ -0,0 +1,216 @@ + +! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!-------------------------------------------------------------------------! +! ! +! N A S P A R A L L E L B E N C H M A R K S 3.3.1 ! +! ! +! D V M H V E R S I O N ! +! ! +! S P ! +! ! +!-------------------------------------------------------------------------! +!-------------------------------------------------------------------------! +!--------------------------------------------------------------------- +! +! Authors: +! Original: +! R. Van der Wijngaart +! W. Saphir +! H. Jin +! Optimize for DVMH: +! Kolganov A.S. +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine sp + + include 'header_sp.h' + integer :: i,niter,step,fstatus,n3 + external timer_read_sp + double precision :: mflops,t,tmax,timer_read_sp,trecs(t_last) + logical :: verified + character :: class + character :: t_names(t_last)*8 + +!--------------------------------------------------------------------- +! Read input file (if it exists), else take +! defaults from parameters +!--------------------------------------------------------------------- + open (unit = 2,file = 'timer.flag',status = 'old',iostat = fstatus + &) + if (fstatus .eq. 0) then + timeron = .TRUE. + t_names(t_total) = 'total' + t_names(t_rhsx) = 'rhsx' + t_names(t_rhsy) = 'rhsy' + t_names(t_rhsz) = 'rhsz' + t_names(t_rhs) = 'rhs' + t_names(t_xsolve) = 'xsolve' + t_names(t_ysolve) = 'ysolve' + t_names(t_zsolve) = 'zsolve' + t_names(t_rdis1) = 'redist1' + t_names(t_rdis2) = 'redist2' + t_names(t_tzetar) = 'tzetar' + t_names(t_ninvr) = 'ninvr' + t_names(t_pinvr) = 'pinvr' + t_names(t_txinvr) = 'txinvr' + t_names(t_add) = 'add' + close (unit = 2) + else + timeron = .FALSE. + endif + write (unit = *,fmt = 1000) + open (unit = 2,file = 'inputsp.data',status = 'old',iostat = fstat + &us) + if (fstatus .eq. 0) then + write (unit = *,fmt = 233) +233 format(' Reading from input file inputsp.data') + read (unit = 2,fmt = *) niter + read (unit = 2,fmt = *) dt + read (unit = 2,fmt = *) grid_points(1),grid_points(2),grid_poin + &ts(3) + close (unit = 2) + else + write (unit = *,fmt = 234) + niter = niter_default + dt = dt_default + grid_points(1) = problem_size + grid_points(2) = problem_size + grid_points(3) = problem_size + endif +234 format(' No input file inputsp.data. Using compiled defaults') + open (unit = 2,file = 'inputStage',status = 'old',iostat = fstatus + &) + if (fstatus .eq. 0) then + read (unit = 2,fmt = *) stage_n + close (unit = 2) + else + stage_n = 0 + endif + write (unit = *,fmt = *) 'stage = ',stage_n + write (unit = *,fmt = 1001) problem_size,problem_size,problem_size + write (unit = *,fmt = 1002) niter,dt + write (unit = *,fmt = *) +1000 format(//, ' NAS Parallel Benchmarks (NPB3.3.1-DVMH)', + & ' - SP Benchmark', /) +1001 format(' Size: ', i4, 'x', i4, 'x', i4) +1002 format(' Iterations: ', i4, ' dt: ', F11.7) +1003 format(' Number of available threads: ', i5) + if (problem_size .gt. imax .or. problem_size .gt. jmax .or. proble + &m_size .gt. kmax) then + print *, (grid_points(i), i = 1,3) + print *, ' Problem size too big for compiled array sizes' + goto 999 + endif + nx2 = problem_size - 2 + ny2 = problem_size - 2 + nz2 = problem_size - 2 + call set_constants_sp() + call exact_rhs_sp() + call initialize_sp() + call adi_first_sp() + call adi_first_sp() + call initialize_sp() + do i = 1,t_last + call timer_clear_sp(i) + enddo + call timer_start_sp(1) + +! DVM$ BARRIER + do step = 1,niter + if (mod (step,20) .eq. 0 .or. step .eq. 1) then + write (unit = *,fmt = 200) step +200 format(' Time step ', i4) + endif + call adi() + enddo + call timer_stop_sp(1) + tmax = timer_read_sp (1) + call verify_sp(niter,class,verified) + if (tmax .ne. 0.) then + n3 = problem_size * problem_size * problem_size + t = (problem_size + problem_size + problem_size) / 3.0 + mflops = (881.174 * float (n3) - 4683.91 * t** 2 + 11484.5 * t + &- 19272.4) * float (niter) / (tmax * 1000000.0d0) + else + mflops = 0.0 + endif + call print_results_sp('SP',class,problem_size,problem_size, + &problem_si + &ze,niter,tmax,mflops,' floating point',verified,npbversio + &n,compiletime,cs1,cs2,cs3,cs4,cs5,cs6,'(none)') + +!--------------------------------------------------------------------- +! More timers +!--------------------------------------------------------------------- + if (.not.(timeron)) goto 999 + do i = 1,t_last + trecs(i) = timer_read_sp (i) + enddo + if (tmax .eq. 0.0) tmax = 1.0 + write (unit = *,fmt = 800) +800 format(' SECTION Time (secs)') + do i = 1,t_last + write (unit = *,fmt = 810) t_names(i),trecs(i),trecs(i) * 100. + &/ tmax + if (i .eq. t_rhs) then + t = trecs(t_rhsx) + trecs(t_rhsy) + trecs(t_rhsz) + write (unit = *,fmt = 820) 'sub-rhs',t,t * 100. / tmax + t = trecs(t_rhs) - t + write (unit = *,fmt = 820) 'rest-rhs',t,t * 100. / tmax + else if (i .eq. t_zsolve) then + t = trecs(t_zsolve) - trecs(t_rdis1) - trecs(t_rdis2) + write (unit = *,fmt = 820) 'sub-zsol',t,t * 100. / tmax + else if (i .eq. t_rdis2) then + t = trecs(t_rdis1) + trecs(t_rdis2) + write (unit = *,fmt = 820) 'redist',t,t * 100. / tmax + endif +810 format(2x,a8,':',f9.3,' (',f6.2,'%)') +820 format(' --> ',a8,':',f9.3,' (',f6.2,'%)') + enddo +999 continue + end + + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine adi_first_sp () + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + call compute_rhs_sp(1) + call x_solve_sp() + call y_solve_sp() + call z_solve_sp() + return + end + + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine adi_sp () + +! DVM$ INTERVAL 1 +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + call compute_rhs_sp(1) + +! DVM$ INTERVAL 12 +! DVM$ END INTERVAL + call x_solve_sp() + +! DVM$ INTERVAL 13 +! DVM$ END INTERVAL + call y_solve_sp() + +! DVM$ INTERVAL 14 +! DVM$ END INTERVAL + call z_solve_sp() + +! DVM$ END INTERVAL + return + end + diff --git a/spbt.for b/spbt.for new file mode 100644 index 0000000..640b85a --- /dev/null +++ b/spbt.for @@ -0,0 +1,13 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- + program spbt + include 'header3d_bt.h' + call btdv3() + call sp() + return + end \ No newline at end of file diff --git a/timers_bt.for b/timers_bt.for new file mode 100644 index 0000000..24836f5 --- /dev/null +++ b/timers_bt.for @@ -0,0 +1,91 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine timer_clear_bt (n) + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + implicit none + integer :: n + double precision :: start(64),elapsed(64) + common /tt/start,elapsed + elapsed(n) = 0.0 + return + end + + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine timer_start_bt (n) + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + implicit none + external elapsed_time_bt + double precision :: elapsed_time_bt + integer :: n + double precision :: start(64),elapsed(64) + common /tt/start,elapsed + start(n) = elapsed_time_bt () + return + end + + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine timer_stop_bt (n) + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + implicit none + external elapsed_time_bt + double precision :: elapsed_time_bt + integer :: n + double precision :: start(64),elapsed(64) + common /tt/start,elapsed + double precision :: t,now + now = elapsed_time_bt () + t = now - start(n) + elapsed(n) = elapsed(n) + t + return + end + + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + double precision function timer_read_bt (n) + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + implicit none + integer :: n + double precision :: start(64),elapsed(64) + common /tt/start,elapsed + timer_read_bt = elapsed(n) + return + end + + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + double precision function elapsed_time () + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + implicit none + double precision :: t,dvtime + integer :: dvm_debug + +! dvm_debug = 0 - standard mode, dvm_debug > 0 - debugging mode + parameter (dvm_debug = 0) + data t/0.d0/ + t = dvtime () + elapsed_time = t + return + end + diff --git a/timers_sp.for b/timers_sp.for new file mode 100644 index 0000000..9417507 --- /dev/null +++ b/timers_sp.for @@ -0,0 +1,86 @@ + +! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine timer_clear_sp (n) + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + implicit none + integer :: n + double precision :: start(64),elapsed(64) + common /tt/start,elapsed + elapsed(n) = 0.0 + return + end + + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine timer_start_sp (n) + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + implicit none + external elapsed_time_sp + double precision :: elapsed_time_sp + integer :: n + double precision :: start(64),elapsed(64) + common /tt/start,elapsed + start(n) = elapsed_time_sp () + return + end + + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine timer_stop_sp (n) + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + implicit none + external elapsed_time_sp + double precision :: elapsed_time_sp + integer :: n + double precision :: start(64),elapsed(64) + common /tt/start,elapsed + double precision :: t,now + now = elapsed_time_sp () + t = now - start(n) + elapsed(n) = elapsed(n) + t + return + end + + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + double precision function timer_read_sp (n) + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + implicit none + integer :: n + double precision :: start(64),elapsed(64) + common /tt/start,elapsed + timer_read_sp = elapsed(n) + return + end + + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + double precision function elapsed_time_sp () + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + implicit none + double precision :: t,dvtime + t = dvtime () + elapsed_time_sp = t + return + end + diff --git a/x_solve_bt.for b/x_solve_bt.for new file mode 100644 index 0000000..7d9a782 --- /dev/null +++ b/x_solve_bt.for @@ -0,0 +1,642 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +! performs guaussian elimination on this cell. +! +! assumes that unpacking routines for non-first cells +! preload C' and rhs' from previous cell. +! +! assumed send happens outside this routine, but that +! c'(IMAX) and rhs'(IMAX) will be sent to next cell +!--------------------------------------------------------------------- + subroutine x_solve_bt () + + include 'header3d_bt.h' + double precision :: pivot,coeff + integer :: i__0,j__1 + integer :: m,n + double precision :: coeff__2 + double precision :: pivot__3 + double precision :: lhs_(5,5,3),u_(0:3,5) + double precision :: rhs_(5) + integer :: i,j,k,isize + isize = problem_size - 1 + + +!$SPF PARALLEL_REG r0 +!$SPF ANALYSIS(PRIVATE(U_,RHS_,LHS_)) +! DVM$ PARALLEL (K,J) ON RHS(*,*,J,K), PRIVATE (U_,I,RHS_,TMP1,TMP2,TMP3, +! DVM$&T1,T2,T3,TM1,TM2,PIVOT,COEFF,TM3,I__0,J__1,TMP11,TMP22,LHS_,M,N,CO +! DVM$&EFF__2,PIVOT__3) +! DVM$ REGION LOCAL (LHS__) +!--------------------------------------------------------------------- +! outer most do loops - sweeping in i direction +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +! begin inner most do loop +! do all the elements of the cell unless last +!--------------------------------------------------------------------- +!, ACROSS(rhs(1:0,0:0,0:0,0:0),lhs__(1:0,0:0,0:0,0:0,0:0)) + do k = 1,problem_size - 2 + do j = 1,problem_size - 2 + do m = 1,5 + u_(0,m) = u(m,0,j,k) + u_(1,m) = u(m,1,j,k) + enddo + do i = 1,isize - 1 + do m = 1,5 + u_(2,m) = u(m,i + 1,j,k) + enddo + +! if(i .ne. isize) then + tmp1 = 1.0d+00 / u_(1,1) + tmp2 = tmp1 * tmp1 + tmp3 = tmp1 * tmp2 + t1 = 1.0d+00 / u_(0,1) + t2 = t1 * t1 + t3 = t1 * t2 + tm1 = 1.0d+00 / u_(2,1) + tm2 = tm1 * tm1 + tm3 = tm1 * tm2 + tmp11 = dt * tx1 + tmp22 = dt * tx2 + lhs_(1,1,1) = (-(tmp11)) * dx1 + lhs_(1,2,1) = (-(tmp22)) + lhs_(1,3,1) = 0. + lhs_(1,4,1) = 0. + lhs_(1,5,1) = 0. + lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * t2 * u_(0,2))) + + & c2 * 0.50d+00 * (u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4) + &* u_(0,4)) * t2) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,2)) + lhs_(2,2,1) = (-(tmp22)) * ((2.0d+00 - c2) * (u_(0,2) / u + &_(0,1))) - tmp11 * con43 * c3c4 * t1 - tmp11 * dx2 + lhs_(2,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * t1)) + lhs_(2,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * t1)) + lhs_(2,5,1) = (-(tmp22)) * c2 + lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2) + &- tmp11 * ((-(c3c4)) * t2 * u_(0,3)) + lhs_(3,2,1) = (-(tmp22)) * u_(0,3) * t1 + lhs_(3,3,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 * + &t1 - tmp11 * dx3 + lhs_(3,4,1) = 0. + lhs_(3,5,1) = 0. + lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2) + &- tmp11 * ((-(c3c4)) * t2 * u_(0,4)) + lhs_(4,2,1) = (-(tmp22)) * u_(0,4) * t1 + lhs_(4,3,1) = 0. + lhs_(4,4,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 * + &t1 - tmp11 * dx4 + lhs_(4,5,1) = 0. + lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_ + &(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) * + & (u_(0,2) * t1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * t3 * u_(0, + &2)** 2 - (c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 * + & u_(0,4)** 2 - c1345 * t2 * u_(0,5)) + lhs_(5,2,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00 + &* c2 * (3.0d+00 * u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4) + &* u_(0,4)) * t2) - tmp11 * ((con43 * c3c4 - c1345) * t2 * u_(0,2)) + lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,2)) + & * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3) + lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * u_(0,2)) + & * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4) + lhs_(5,5,1) = (-(tmp22)) * c1 * (u_(0,2) * t1) - tmp11 * + &c1345 * t1 - tmp11 * dx5 + lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dx1 + lhs_(1,2,2) = 0. + lhs_(1,3,2) = 0. + lhs_(1,4,2) = 0. + lhs_(1,5,2) = 0. + lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2 + & * u_(1,2)) + lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 * + &tmp1 + tmp11 * 2.0d+00 * dx2 + lhs_(2,3,2) = 0. + lhs_(2,4,2) = 0. + lhs_(2,5,2) = 0. + lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1, + &3)) + lhs_(3,2,2) = 0. + lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t + &mp11 * 2.0d+00 * dx3 + lhs_(3,4,2) = 0. + lhs_(3,5,2) = 0. + lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1, + &4)) + lhs_(4,2,2) = 0. + lhs_(4,3,2) = 0. + lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t + &mp11 * 2.0d+00 * dx4 + lhs_(4,5,2) = tmp11 * 2.0d+00 * 0 + lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(con43 * c3c4 - c1345) + &) * tmp3 * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3 + &c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5)) + lhs_(5,2,2) = tmp11 * 2.0d+00 * ((con43 * c3c4 - c1345) * + & tmp2 * u_(1,2)) + lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u + &_(1,3) + lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u + &_(1,4) + lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 + + &tmp11 * 2.0d+00 * dx5 + if (i .ne. 1) then + do j__1 = 1,5 + lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs + &_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3, + &j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1 + &,3) + lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs + &_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3, + &j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1 + &,3) + lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs + &_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3, + &j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1 + &,3) + lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs + &_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3, + &j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1 + &,3) + lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs + &_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3, + &j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1 + &,3) + enddo + endif + lhs_(1,1,3) = (-(tmp11)) * dx1 + lhs_(1,2,3) = tmp22 + lhs_(1,3,3) = 0. + lhs_(1,4,3) = 0. + lhs_(1,5,3) = 0. + lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * tm2 * u_(2,2))) + c2 + &* 0.50d+00 * (u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_ + &(2,4)) * tm2) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,2)) + lhs_(2,2,3) = tmp22 * ((2.0d+00 - c2) * (u_(2,2) / u_(2,1 + &))) - tmp11 * con43 * c3c4 * tm1 - tmp11 * dx2 + lhs_(2,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * tm1)) + lhs_(2,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * tm1)) + lhs_(2,5,3) = tmp22 * c2 + lhs_(3,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm + &p11 * ((-(c3c4)) * tm2 * u_(2,3)) + lhs_(3,2,3) = tmp22 * u_(2,3) * tm1 + lhs_(3,3,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1 + &- tmp11 * dx3 + lhs_(3,4,3) = 0. + lhs_(3,5,3) = 0. + lhs_(4,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm + &p11 * ((-(c3c4)) * tm2 * u_(2,4)) + lhs_(4,2,3) = tmp22 * u_(2,4) * tm1 + lhs_(4,3,3) = 0. + lhs_(4,4,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1 + &- tmp11 * dx4 + lhs_(4,5,3) = 0. + lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3) + & * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u + &_(2,2) * tm1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * tm3 * u_(2,2 + &)** 2 - (c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3 + &* u_(2,4)** 2 - c1345 * tm2 * u_(2,5)) + lhs_(5,2,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2 + & * (3.0d+00 * u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_ + &(2,4)) * tm2) - tmp11 * ((con43 * c3c4 - c1345) * tm2 * u_(2,2)) + lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,2)) * tm + &2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3) + lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * u_(2,2)) * tm + &2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4) + lhs_(5,5,3) = tmp22 * c1 * (u_(2,2) * tm1) - tmp11 * c134 + &5 * tm1 - tmp11 * dx5 + do m = 1,5 + rhs_(m) = rhs(m,i,j,k) + enddo + do i__0 = 1,5 + rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1 + &,j,k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3 + &,i - 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) * + & rhs(5,i - 1,j,k) + enddo + pivot = 1.00d0 / lhs_(1,1,2) + lhs_(1,2,2) = lhs_(1,2,2) * pivot + lhs_(1,3,2) = lhs_(1,3,2) * pivot + lhs_(1,4,2) = lhs_(1,4,2) * pivot + lhs_(1,5,2) = lhs_(1,5,2) * pivot + lhs_(1,1,3) = lhs_(1,1,3) * pivot + lhs_(1,2,3) = lhs_(1,2,3) * pivot + lhs_(1,3,3) = lhs_(1,3,3) * pivot + lhs_(1,4,3) = lhs_(1,4,3) * pivot + lhs_(1,5,3) = lhs_(1,5,3) * pivot + rhs_(1) = rhs_(1) * pivot + coeff = lhs_(2,1,2) + lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2) + lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2) + lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2) + lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3) + lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3) + lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3) + lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3) + lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3) + rhs_(2) = rhs_(2) - coeff * rhs_(1) + coeff = lhs_(3,1,2) + lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2) + lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2) + lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2) + lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3) + lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3) + lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3) + lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3) + lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3) + rhs_(3) = rhs_(3) - coeff * rhs_(1) + coeff = lhs_(4,1,2) + lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2) + lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2) + lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3) + lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3) + lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3) + lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3) + lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3) + rhs_(4) = rhs_(4) - coeff * rhs_(1) + coeff = lhs_(5,1,2) + lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2) + lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2) + lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3) + lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3) + lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3) + lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3) + lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3) + rhs_(5) = rhs_(5) - coeff * rhs_(1) + pivot = 1.00d0 / lhs_(2,2,2) + lhs_(2,3,2) = lhs_(2,3,2) * pivot + lhs_(2,4,2) = lhs_(2,4,2) * pivot + lhs_(2,5,2) = lhs_(2,5,2) * pivot + lhs_(2,1,3) = lhs_(2,1,3) * pivot + lhs_(2,2,3) = lhs_(2,2,3) * pivot + lhs_(2,3,3) = lhs_(2,3,3) * pivot + lhs_(2,4,3) = lhs_(2,4,3) * pivot + lhs_(2,5,3) = lhs_(2,5,3) * pivot + rhs_(2) = rhs_(2) * pivot + coeff = lhs_(1,2,2) + lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2) + lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2) + lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3) + lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3) + lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3) + lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3) + lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3) + rhs_(1) = rhs_(1) - coeff * rhs_(2) + coeff = lhs_(3,2,2) + lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2) + lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2) + lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3) + lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3) + lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3) + lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3) + lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3) + rhs_(3) = rhs_(3) - coeff * rhs_(2) + coeff = lhs_(4,2,2) + lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2) + lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3) + lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3) + lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3) + lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3) + lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3) + rhs_(4) = rhs_(4) - coeff * rhs_(2) + coeff = lhs_(5,2,2) + lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2) + lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3) + lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3) + lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3) + lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3) + lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3) + rhs_(5) = rhs_(5) - coeff * rhs_(2) + pivot = 1.00d0 / lhs_(3,3,2) + lhs_(3,4,2) = lhs_(3,4,2) * pivot + lhs_(3,5,2) = lhs_(3,5,2) * pivot + lhs_(3,1,3) = lhs_(3,1,3) * pivot + lhs_(3,2,3) = lhs_(3,2,3) * pivot + lhs_(3,3,3) = lhs_(3,3,3) * pivot + lhs_(3,4,3) = lhs_(3,4,3) * pivot + lhs_(3,5,3) = lhs_(3,5,3) * pivot + rhs_(3) = rhs_(3) * pivot + coeff = lhs_(1,3,2) + lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2) + lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3) + lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3) + lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3) + lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3) + lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3) + rhs_(1) = rhs_(1) - coeff * rhs_(3) + coeff = lhs_(2,3,2) + lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2) + lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3) + lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3) + lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3) + lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3) + lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3) + rhs_(2) = rhs_(2) - coeff * rhs_(3) + coeff = lhs_(4,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2) + lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3) + lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3) + lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3) + lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3) + lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3) + rhs_(4) = rhs_(4) - coeff * rhs_(3) + coeff = lhs_(5,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2) + lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3) + lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3) + lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3) + lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3) + lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3) + rhs_(5) = rhs_(5) - coeff * rhs_(3) + pivot = 1.00d0 / lhs_(4,4,2) + lhs_(4,5,2) = lhs_(4,5,2) * pivot + lhs_(4,1,3) = lhs_(4,1,3) * pivot + lhs_(4,2,3) = lhs_(4,2,3) * pivot + lhs_(4,3,3) = lhs_(4,3,3) * pivot + lhs_(4,4,3) = lhs_(4,4,3) * pivot + lhs_(4,5,3) = lhs_(4,5,3) * pivot + rhs_(4) = rhs_(4) * pivot + coeff = lhs_(1,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2) + lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3) + lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3) + lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3) + lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3) + lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3) + rhs_(1) = rhs_(1) - coeff * rhs_(4) + coeff = lhs_(2,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2) + lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3) + lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3) + lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3) + lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3) + lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3) + rhs_(2) = rhs_(2) - coeff * rhs_(4) + coeff = lhs_(3,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2) + lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3) + lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3) + lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3) + lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3) + lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3) + rhs_(3) = rhs_(3) - coeff * rhs_(4) + coeff = lhs_(5,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2) + lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3) + lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3) + lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3) + lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3) + lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3) + rhs_(5) = rhs_(5) - coeff * rhs_(4) + pivot = 1.00d0 / lhs_(5,5,2) + lhs_(5,1,3) = lhs_(5,1,3) * pivot + lhs_(5,2,3) = lhs_(5,2,3) * pivot + lhs_(5,3,3) = lhs_(5,3,3) * pivot + lhs_(5,4,3) = lhs_(5,4,3) * pivot + lhs_(5,5,3) = lhs_(5,5,3) * pivot + rhs_(5) = rhs_(5) * pivot + coeff = lhs_(1,5,2) + lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3) + lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3) + lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3) + lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3) + lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3) + rhs_(1) = rhs_(1) - coeff * rhs_(5) + coeff = lhs_(2,5,2) + lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3) + lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3) + lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3) + lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3) + lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3) + rhs_(2) = rhs_(2) - coeff * rhs_(5) + coeff = lhs_(3,5,2) + lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3) + lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3) + lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3) + lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3) + lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3) + rhs_(3) = rhs_(3) - coeff * rhs_(5) + coeff = lhs_(4,5,2) + lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3) + lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3) + lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3) + lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3) + lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3) + rhs_(4) = rhs_(4) - coeff * rhs_(5) + do i__0 = 1,5 + lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3) + lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3) + lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3) + lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3) + lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3) + enddo + do m = 1,5 + rhs(m,i,j,k) = rhs_(m) + u_(0,m) = u_(1,m) + u_(1,m) = u_(2,m) + enddo + enddo + +! else ! ******************* else case ************************* + do n = 1,5 + lhs_(1,n,1) = 0.0d0 + lhs_(1,n,2) = 0.0d0 + lhs_(1,n,3) = 0.0d0 + lhs_(2,n,1) = 0.0d0 + lhs_(2,n,2) = 0.0d0 + lhs_(2,n,3) = 0.0d0 + lhs_(3,n,1) = 0.0d0 + lhs_(3,n,2) = 0.0d0 + lhs_(3,n,3) = 0.0d0 + lhs_(4,n,1) = 0.0d0 + lhs_(4,n,2) = 0.0d0 + lhs_(4,n,3) = 0.0d0 + lhs_(5,n,1) = 0.0d0 + lhs_(5,n,2) = 0.0d0 + lhs_(5,n,3) = 0.0d0 + enddo + do m = 1,5 + lhs_(m,m,2) = 1.0d0 + enddo + do m = 1,5 + rhs_(m) = rhs(m,i,j,k) + enddo + do i__0 = 1,5 + rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1,j, + &k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3,i + &- 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) * rh + &s(5,i - 1,j,k) + enddo + do j__1 = 1,5 + lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_ + &_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3 + &) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3) + lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_ + &_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3 + &) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3) + lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_ + &_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3 + &) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3) + lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_ + &_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3 + &) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3) + lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_ + &_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3 + &) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3) + enddo + +!--------------------------------------------------------------------- +! +!--------------------------------------------------------------------- + pivot__3 = 1.00d0 / lhs_(1,1,2) + lhs_(1,2,2) = lhs_(1,2,2) * pivot__3 + lhs_(1,3,2) = lhs_(1,3,2) * pivot__3 + lhs_(1,4,2) = lhs_(1,4,2) * pivot__3 + lhs_(1,5,2) = lhs_(1,5,2) * pivot__3 + rhs_(1) = rhs_(1) * pivot__3 + coeff__2 = lhs_(2,1,2) + lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2) + lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2) + lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2) + rhs_(2) = rhs_(2) - coeff__2 * rhs_(1) + coeff__2 = lhs_(3,1,2) + lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2) + lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2) + lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2) + rhs_(3) = rhs_(3) - coeff__2 * rhs_(1) + coeff__2 = lhs_(4,1,2) + lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2) + lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2) + rhs_(4) = rhs_(4) - coeff__2 * rhs_(1) + coeff__2 = lhs_(5,1,2) + lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2) + lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2) + rhs_(5) = rhs_(5) - coeff__2 * rhs_(1) + pivot__3 = 1.00d0 / lhs_(2,2,2) + lhs_(2,3,2) = lhs_(2,3,2) * pivot__3 + lhs_(2,4,2) = lhs_(2,4,2) * pivot__3 + lhs_(2,5,2) = lhs_(2,5,2) * pivot__3 + rhs_(2) = rhs_(2) * pivot__3 + coeff__2 = lhs_(1,2,2) + lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2) + lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2) + rhs_(1) = rhs_(1) - coeff__2 * rhs_(2) + coeff__2 = lhs_(3,2,2) + lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2) + lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2) + rhs_(3) = rhs_(3) - coeff__2 * rhs_(2) + coeff__2 = lhs_(4,2,2) + lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2) + rhs_(4) = rhs_(4) - coeff__2 * rhs_(2) + coeff__2 = lhs_(5,2,2) + lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2) + rhs_(5) = rhs_(5) - coeff__2 * rhs_(2) + pivot__3 = 1.00d0 / lhs_(3,3,2) + lhs_(3,4,2) = lhs_(3,4,2) * pivot__3 + lhs_(3,5,2) = lhs_(3,5,2) * pivot__3 + rhs_(3) = rhs_(3) * pivot__3 + coeff__2 = lhs_(1,3,2) + lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2) + rhs_(1) = rhs_(1) - coeff__2 * rhs_(3) + coeff__2 = lhs_(2,3,2) + lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2) + rhs_(2) = rhs_(2) - coeff__2 * rhs_(3) + coeff__2 = lhs_(4,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2) + rhs_(4) = rhs_(4) - coeff__2 * rhs_(3) + coeff__2 = lhs_(5,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2) + rhs_(5) = rhs_(5) - coeff__2 * rhs_(3) + pivot__3 = 1.00d0 / lhs_(4,4,2) + lhs_(4,5,2) = lhs_(4,5,2) * pivot__3 + rhs_(4) = rhs_(4) * pivot__3 + coeff__2 = lhs_(1,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2) + rhs_(1) = rhs_(1) - coeff__2 * rhs_(4) + coeff__2 = lhs_(2,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2) + rhs_(2) = rhs_(2) - coeff__2 * rhs_(4) + coeff__2 = lhs_(3,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2) + rhs_(3) = rhs_(3) - coeff__2 * rhs_(4) + coeff__2 = lhs_(5,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2) + rhs_(5) = rhs_(5) - coeff__2 * rhs_(4) + pivot__3 = 1.00d0 / lhs_(5,5,2) + rhs_(5) = rhs_(5) * pivot__3 + coeff__2 = lhs_(1,5,2) + rhs_(1) = rhs_(1) - coeff__2 * rhs_(5) + coeff__2 = lhs_(2,5,2) + rhs_(2) = rhs_(2) - coeff__2 * rhs_(5) + coeff__2 = lhs_(3,5,2) + rhs_(3) = rhs_(3) - coeff__2 * rhs_(5) + coeff__2 = lhs_(4,5,2) + rhs_(4) = rhs_(4) - coeff__2 * rhs_(5) + +! endif + do m = 1,5 + rhs(m,i,j,k) = rhs_(m) + enddo + +! enddo + do i = problem_size - 2,0,(-(1)) + do m = 1,5 + rhs_(m) = rhs(m,i,j,k) + enddo + do m = 1,5 + rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhs(1,i + 1,j,k + &) + rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhs(2,i + 1,j,k + &) + rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhs(3,i + 1,j,k + &) + rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhs(4,i + 1,j,k + &) + rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhs(5,i + 1,j,k + &) + enddo + do m = 1,5 + rhs(m,i,j,k) = rhs_(m) + enddo + enddo + enddo + enddo +!$SPF END PARALLEL_REG +! DVM$ END REGION + return + end + diff --git a/x_solve_sp.for b/x_solve_sp.for new file mode 100644 index 0000000..249af7a --- /dev/null +++ b/x_solve_sp.for @@ -0,0 +1,333 @@ + +! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine x_solve_sp () + + include 'header_sp.h' + integer :: i,j,k,i1,i2,m,m1 + double precision :: ru1,fac1,fac2,rhs__(5,0:2),t1,t2 + double precision :: lhs__(5,0:2),lhsm__(5,0:2),lhsp__(5,0:2) + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + + +!$SPF PARALLEL_REG r0 +!$SPF ANALYSIS(PRIVATE(LHS__,LHSP__,LHSM__,RHS__)) +! DVM$ PARALLEL (K,J) ON U(*,*,J,K), CUDA_BLOCK (32,4),PRIVATE (M,I,RU1,I +! DVM$&1,I2,FAC1,FAC2,LHS__,LHSP__,LHSM__,RHS__,T1,T2) +! DVM$ REGION LOCAL (LHS) + do k = 1,nz2 + do j = 1,ny2 + do i = 0,problem_size - 1 + if (i .eq. 0) then + lhs__(1,0) = 0.0d0 + lhsp__(1,0) = 0.0d0 + lhsm__(1,0) = 0.0d0 + lhs__(2,0) = 0.0d0 + lhsp__(2,0) = 0.0d0 + lhsm__(2,0) = 0.0d0 + lhs__(3,0) = 1.0d0 + lhsp__(3,0) = 1.0d0 + lhsm__(3,0) = 1.0d0 + lhs__(4,0) = 0.0d0 + lhsp__(4,0) = 0.0d0 + lhsm__(4,0) = 0.0d0 + lhs__(5,0) = 0.0d0 + lhsp__(5,0) = 0.0d0 + lhsm__(5,0) = 0.0d0 + lhs__(1,1) = 0.0d0 + ru1 = c3c4 * 1.0d0 / u(1,1 - 1,j,k) + ru1 = dmax1 (dx2 + con43 * ru1,dx5 + c1c5 * ru1,dxmax + &+ ru1,dx1) + lhs__(2,1) = (-(dttx2)) * us(1 - 1,j,k) - dttx1 * ru1 + ru1 = c3c4 * 1.0d0 / u(1,1,j,k) + ru1 = dmax1 (dx2 + con43 * ru1,dx5 + c1c5 * ru1,dxmax + &+ ru1,dx1) + lhs__(3,1) = 1.0d0 + c2dttx1 * ru1 + ru1 = c3c4 * 1.0d0 / u(1,1 + 1,j,k) + ru1 = dmax1 (dx2 + con43 * ru1,dx5 + c1c5 * ru1,dxmax + &+ ru1,dx1) + lhs__(4,1) = dttx2 * us(1 + 1,j,k) - dttx1 * ru1 + lhs__(5,1) = 0.0d0 + lhs__(3,1) = lhs__(3,1) + comz5 + lhs__(4,1) = lhs__(4,1) - comz4 + lhs__(5,1) = lhs__(5,1) + comz1 + lhsp__(1,1) = lhs__(1,1) + lhsp__(2,1) = lhs__(2,1) - dttx2 * speed(1 - 1,j,k) + lhsp__(3,1) = lhs__(3,1) + lhsp__(4,1) = lhs__(4,1) + dttx2 * speed(1 + 1,j,k) + lhsp__(5,1) = lhs__(5,1) + lhsm__(1,1) = lhs__(1,1) + lhsm__(2,1) = lhs__(2,1) + dttx2 * speed(1 - 1,j,k) + lhsm__(3,1) = lhs__(3,1) + lhsm__(4,1) = lhs__(4,1) - dttx2 * speed(1 + 1,j,k) + lhsm__(5,1) = lhs__(5,1) + endif + if (i + 2 .lt. problem_size - 1) then + m = i + 2 + lhs__(1,2) = 0.0d0 + ru1 = c3c4 * 1.0d0 / u(1,m - 1,j,k) + ru1 = dmax1 (dx2 + con43 * ru1,dx5 + c1c5 * ru1,dxmax + &+ ru1,dx1) + lhs__(2,2) = (-(dttx2)) * us(m - 1,j,k) - dttx1 * ru1 + ru1 = c3c4 * 1.0d0 / u(1,m,j,k) + ru1 = dmax1 (dx2 + con43 * ru1,dx5 + c1c5 * ru1,dxmax + &+ ru1,dx1) + lhs__(3,2) = 1.0d0 + c2dttx1 * ru1 + ru1 = c3c4 * 1.0d0 / u(1,m + 1,j,k) + ru1 = dmax1 (dx2 + con43 * ru1,dx5 + c1c5 * ru1,dxmax + &+ ru1,dx1) + lhs__(4,2) = dttx2 * us(m + 1,j,k) - dttx1 * ru1 + lhs__(5,2) = 0.0d0 + if (m .eq. 1) then + lhs__(3,2) = lhs__(3,2) + comz5 + lhs__(4,2) = lhs__(4,2) - comz4 + lhs__(5,2) = lhs__(5,2) + comz1 + else if (m .eq. 2) then + lhs__(2,2) = lhs__(2,2) - comz4 + lhs__(3,2) = lhs__(3,2) + comz6 + lhs__(4,2) = lhs__(4,2) - comz4 + lhs__(5,2) = lhs__(5,2) + comz1 + else if (m .ge. 3 .and. m .le. nx2 - 2) then + lhs__(1,2) = lhs__(1,2) + comz1 + lhs__(2,2) = lhs__(2,2) - comz4 + lhs__(3,2) = lhs__(3,2) + comz6 + lhs__(4,2) = lhs__(4,2) - comz4 + lhs__(5,2) = lhs__(5,2) + comz1 + else if (m .eq. nx2 - 1) then + lhs__(1,2) = lhs__(1,2) + comz1 + lhs__(2,2) = lhs__(2,2) - comz4 + lhs__(3,2) = lhs__(3,2) + comz6 + lhs__(4,2) = lhs__(4,2) - comz4 + else if (m .eq. nx2) then + lhs__(1,2) = lhs__(1,2) + comz1 + lhs__(2,2) = lhs__(2,2) - comz4 + lhs__(3,2) = lhs__(3,2) + comz5 + endif + lhsp__(1,2) = lhs__(1,2) + lhsp__(2,2) = lhs__(2,2) - dttx2 * speed(m - 1,j,k) + lhsp__(3,2) = lhs__(3,2) + lhsp__(4,2) = lhs__(4,2) + dttx2 * speed(m + 1,j,k) + lhsp__(5,2) = lhs__(5,2) + lhsm__(1,2) = lhs__(1,2) + lhsm__(2,2) = lhs__(2,2) + dttx2 * speed(m - 1,j,k) + lhsm__(3,2) = lhs__(3,2) + lhsm__(4,2) = lhs__(4,2) - dttx2 * speed(m + 1,j,k) + lhsm__(5,2) = lhs__(5,2) + else if (i + 2 .eq. nx2 + 1) then + lhs__(1,2) = 0.0d0 + lhsp__(1,2) = 0.0d0 + lhsm__(1,2) = 0.0d0 + lhs__(2,2) = 0.0d0 + lhsp__(2,2) = 0.0d0 + lhsm__(2,2) = 0.0d0 + lhs__(3,2) = 1.0d0 + lhsp__(3,2) = 1.0d0 + lhsm__(3,2) = 1.0d0 + lhs__(4,2) = 0.0d0 + lhsp__(4,2) = 0.0d0 + lhsm__(4,2) = 0.0d0 + lhs__(5,2) = 0.0d0 + lhsp__(5,2) = 0.0d0 + lhsm__(5,2) = 0.0d0 + endif + +!********************************** end of init + i1 = i + 1 + i2 = i + 2 + fac1 = 1.d0 / lhs__(3,0) + lhs__(4,0) = fac1 * lhs__(4,0) + lhs__(5,0) = fac1 * lhs__(5,0) + do m = 1,3 + rhs(m,i,j,k) = fac1 * rhs(m,i,j,k) + enddo + if (i .le. nx2 - 1) then + lhs__(3,1) = lhs__(3,1) - lhs__(2,1) * lhs__(4,0) + lhs__(4,1) = lhs__(4,1) - lhs__(2,1) * lhs__(5,0) + lhs__(2,2) = lhs__(2,2) - lhs__(1,2) * lhs__(4,0) + lhs__(3,2) = lhs__(3,2) - lhs__(1,2) * lhs__(5,0) + do m = 1,3 + rhs(m,i1,j,k) = rhs(m,i1,j,k) - lhs__(2,1) * rhs(m, + &i,j,k) + rhs(m,i2,j,k) = rhs(m,i2,j,k) - lhs__(1,2) * rhs(m, + &i,j,k) + enddo + else + lhs__(3,1) = lhs__(3,1) - lhs__(2,1) * lhs__(4,0) + lhs__(4,1) = lhs__(4,1) - lhs__(2,1) * lhs__(5,0) + fac2 = 1.d0 / lhs__(3,1) + do m = 1,3 + rhs(m,i1,j,k) = rhs(m,i1,j,k) - lhs__(2,1) * rhs(m, + &i,j,k) + rhs(m,i1,j,k) = fac2 * rhs(m,i1,j,k) + enddo + endif + m = 4 + fac1 = 1.d0 / lhsp__(3,0) + lhsp__(4,0) = fac1 * lhsp__(4,0) + lhsp__(5,0) = fac1 * lhsp__(5,0) + rhs(m,i,j,k) = fac1 * rhs(m,i,j,k) + lhsp__(3,1) = lhsp__(3,1) - lhsp__(2,1) * lhsp__(4,0) + lhsp__(4,1) = lhsp__(4,1) - lhsp__(2,1) * lhsp__(5,0) + rhs(m,i1,j,k) = rhs(m,i1,j,k) - lhsp__(2,1) * rhs(m,i,j,k + &) + if (i .lt. nx2) then + lhsp__(2,2) = lhsp__(2,2) - lhsp__(1,2) * lhsp__(4,0) + lhsp__(3,2) = lhsp__(3,2) - lhsp__(1,2) * lhsp__(5,0) + rhs(m,i2,j,k) = rhs(m,i2,j,k) - lhsp__(1,2) * rhs(m,i, + &j,k) + endif + m = 5 + fac1 = 1.d0 / lhsm__(3,0) + lhsm__(4,0) = fac1 * lhsm__(4,0) + lhsm__(5,0) = fac1 * lhsm__(5,0) + rhs(m,i,j,k) = fac1 * rhs(m,i,j,k) + lhsm__(3,1) = lhsm__(3,1) - lhsm__(2,1) * lhsm__(4,0) + lhsm__(4,1) = lhsm__(4,1) - lhsm__(2,1) * lhsm__(5,0) + rhs(m,i1,j,k) = rhs(m,i1,j,k) - lhsm__(2,1) * rhs(m,i,j,k + &) + if (i .lt. nx2) then + lhsm__(2,2) = lhsm__(2,2) - lhsm__(1,2) * lhsm__(4,0) + lhsm__(3,2) = lhsm__(3,2) - lhsm__(1,2) * lhsm__(5,0) + rhs(m,i2,j,k) = rhs(m,i2,j,k) - lhsm__(1,2) * rhs(m,i, + &j,k) + endif + if (i .eq. nx2) then + rhs(4,i1,j,k) = rhs(4,i1,j,k) / lhsp__(3,1) + rhs(5,i1,j,k) = rhs(5,i1,j,k) / lhsm__(3,1) + do m = 1,3 + rhs(m,i,j,k) = rhs(m,i,j,k) - lhs__(4,0) * rhs(m,i1 + &,j,k) + enddo + rhs(4,i,j,k) = rhs(4,i,j,k) - lhsp__(4,0) * rhs(4,i1,j + &,k) + rhs(5,i,j,k) = rhs(5,i,j,k) - lhsm__(4,0) * rhs(5,i1,j + &,k) + endif + lhs(0,4,i,j,k) = lhs__(4,0) + lhs(1,4,i,j,k) = lhsp__(4,0) + lhs(2,4,i,j,k) = lhsm__(4,0) + lhs(0,5,i,j,k) = lhs__(5,0) + lhs(1,5,i,j,k) = lhsp__(5,0) + lhs(2,5,i,j,k) = lhsm__(5,0) + lhs__(1,0) = lhs__(1,1) + lhsp__(1,0) = lhsp__(1,1) + lhsm__(1,0) = lhsm__(1,1) + lhs__(1,1) = lhs__(1,2) + lhsp__(1,1) = lhsp__(1,2) + lhsm__(1,1) = lhsm__(1,2) + lhs__(2,0) = lhs__(2,1) + lhsp__(2,0) = lhsp__(2,1) + lhsm__(2,0) = lhsm__(2,1) + lhs__(2,1) = lhs__(2,2) + lhsp__(2,1) = lhsp__(2,2) + lhsm__(2,1) = lhsm__(2,2) + lhs__(3,0) = lhs__(3,1) + lhsp__(3,0) = lhsp__(3,1) + lhsm__(3,0) = lhsm__(3,1) + lhs__(3,1) = lhs__(3,2) + lhsp__(3,1) = lhsp__(3,2) + lhsm__(3,1) = lhsm__(3,2) + lhs__(4,0) = lhs__(4,1) + lhsp__(4,0) = lhsp__(4,1) + lhsm__(4,0) = lhsm__(4,1) + lhs__(4,1) = lhs__(4,2) + lhsp__(4,1) = lhsp__(4,2) + lhsm__(4,1) = lhsm__(4,2) + lhs__(5,0) = lhs__(5,1) + lhsp__(5,0) = lhsp__(5,1) + lhsm__(5,0) = lhsm__(5,1) + lhs__(5,1) = lhs__(5,2) + lhsp__(5,1) = lhsp__(5,2) + lhsm__(5,1) = lhsm__(5,2) + enddo + i = problem_size - 3 + rhs__(1,2) = rhs(1,i + 2,j,k) + rhs__(2,2) = rhs(2,i + 2,j,k) + rhs__(3,2) = rhs(3,i + 2,j,k) + rhs__(4,2) = rhs(4,i + 2,j,k) + rhs__(5,2) = rhs(5,i + 2,j,k) + rhs__(1,1) = rhs(1,i + 1,j,k) + rhs__(2,1) = rhs(2,i + 1,j,k) + rhs__(3,1) = rhs(3,i + 1,j,k) + rhs__(4,1) = rhs(4,i + 1,j,k) + rhs__(5,1) = rhs(5,i + 1,j,k) + rhs__(1,0) = rhs(1,i,j,k) + rhs__(2,0) = rhs(2,i,j,k) + rhs__(3,0) = rhs(3,i,j,k) + rhs__(4,0) = rhs(4,i,j,k) + rhs__(5,0) = rhs(5,i,j,k) + rhs__(1,0) = rhs__(1,0) - lhs(0,4,i,j,k) * rhs__(1,1) - lhs( + &0,5,i,j,k) * rhs__(1,2) + rhs__(2,0) = rhs__(2,0) - lhs(0,4,i,j,k) * rhs__(2,1) - lhs( + &0,5,i,j,k) * rhs__(2,2) + rhs__(3,0) = rhs__(3,0) - lhs(0,4,i,j,k) * rhs__(3,1) - lhs( + &0,5,i,j,k) * rhs__(3,2) + rhs__(4,0) = rhs__(4,0) - lhs(1,4,i,j,k) * rhs__(4,1) - lhs( + &1,5,i,j,k) * rhs__(4,2) + rhs__(5,0) = rhs__(5,0) - lhs(2,4,i,j,k) * rhs__(5,1) - lhs( + &2,5,i,j,k) * rhs__(5,2) + rhs__(1,2) = rhs__(1,1) + rhs__(2,2) = rhs__(2,1) + rhs__(3,2) = rhs__(3,1) + rhs__(4,2) = rhs__(4,1) + rhs__(5,2) = rhs__(5,1) + rhs__(1,1) = rhs__(1,0) + rhs__(2,1) = rhs__(2,0) + rhs__(3,1) = rhs__(3,0) + rhs__(4,1) = rhs__(4,0) + rhs__(5,1) = rhs__(5,0) + do i = problem_size - 4,0,(-(1)) + rhs__(1,0) = rhs(1,i,j,k) + rhs__(2,0) = rhs(2,i,j,k) + rhs__(3,0) = rhs(3,i,j,k) + rhs__(4,0) = rhs(4,i,j,k) + rhs__(5,0) = rhs(5,i,j,k) + rhs__(1,0) = rhs__(1,0) - lhs(0,4,i,j,k) * rhs__(1,1) - l + &hs(0,5,i,j,k) * rhs__(1,2) + rhs__(2,0) = rhs__(2,0) - lhs(0,4,i,j,k) * rhs__(2,1) - l + &hs(0,5,i,j,k) * rhs__(2,2) + rhs__(3,0) = rhs__(3,0) - lhs(0,4,i,j,k) * rhs__(3,1) - l + &hs(0,5,i,j,k) * rhs__(3,2) + rhs__(4,0) = rhs__(4,0) - lhs(1,4,i,j,k) * rhs__(4,1) - l + &hs(1,5,i,j,k) * rhs__(4,2) + rhs__(5,0) = rhs__(5,0) - lhs(2,4,i,j,k) * rhs__(5,1) - l + &hs(2,5,i,j,k) * rhs__(5,2) + t1 = bt * rhs__(3,2) + t2 = 0.5d0 * (rhs__(4,2) + rhs__(5,2)) + rhs(1,i + 2,j,k) = (-(rhs__(2,2))) + rhs(2,i + 2,j,k) = rhs__(1,2) + rhs(3,i + 2,j,k) = bt * (rhs__(4,2) - rhs__(5,2)) + rhs(4,i + 2,j,k) = (-(t1)) + t2 + rhs(5,i + 2,j,k) = t1 + t2 + rhs__(1,2) = rhs__(1,1) + rhs__(2,2) = rhs__(2,1) + rhs__(3,2) = rhs__(3,1) + rhs__(4,2) = rhs__(4,1) + rhs__(5,2) = rhs__(5,1) + rhs__(1,1) = rhs__(1,0) + rhs__(2,1) = rhs__(2,0) + rhs__(3,1) = rhs__(3,0) + rhs__(4,1) = rhs__(4,0) + rhs__(5,1) = rhs__(5,0) + enddo + t1 = bt * rhs__(3,2) + t2 = 0.5d0 * (rhs__(4,2) + rhs__(5,2)) + rhs(1,1,j,k) = (-(rhs__(2,2))) + rhs(2,1,j,k) = rhs__(1,2) + rhs(3,1,j,k) = bt * (rhs__(4,2) - rhs__(5,2)) + rhs(4,1,j,k) = (-(t1)) + t2 + rhs(5,1,j,k) = t1 + t2 + enddo + enddo +!$SPF END PARALLEL_REG +! DVM$ END REGION + return + end + diff --git a/y_solve_bt.for b/y_solve_bt.for new file mode 100644 index 0000000..6748250 --- /dev/null +++ b/y_solve_bt.for @@ -0,0 +1,636 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +! performs guaussian elimination on this cell. +! +! assumes that unpacking routines for non-first cells +! preload C' and rhs' from previous cell. +! +! assumed send happens outside this routine, but that +! c'(JMAX) and rhs'(JMAX) will be sent to next cell +!--------------------------------------------------------------------- + subroutine y_solve_bt () + + include 'header3d_bt.h' + double precision :: coeff + double precision :: pivot + integer :: i__0 + integer :: j__1,m,n + double precision :: coeff__2 + double precision :: pivot__3 + double precision :: lhs_(5,5,3),rhs_(5),u_(0:3,5) + integer :: i,j,k,jsize,jstart + jstart = 0 + jsize = problem_size - 1 + +!$SPF PARALLEL_REG r0 +!$SPF ANALYSIS(PRIVATE(U_,RHS_,LHS_)) +! DVM$ PARALLEL (K,I) ON RHS(*,I,*,K), PRIVATE (U_,J,RHS_,PIVOT,COEFF,TMP +! DVM$&1,TMP2,TMP3,T1,T2,T3,TM1,TM2,TM3,TMP11,TMP22,I__0,J__1,LHS_,COEFF_ +! DVM$&_2,PIVOT__3,M,N) +! DVM$ REGION LOCAL (LHS__) +!ACROSS (rhs(0:0,1:0,0:0,0:0),lhs__(0:0,1:0,0:0,0:0,0:0)) + do k = 1,problem_size - 2 + do i = 1,problem_size - 2 + do m = 1,5 + u_(0,m) = u(m,i,0,k) + u_(1,m) = u(m,i,1,k) + enddo + do j = 1,jsize - 1 + do m = 1,5 + u_(2,m) = u(m,i,j + 1,k) + enddo + +! if(j .ne. jsize) then + tmp1 = 1.0d+00 / u_(1,1) + tmp2 = tmp1 * tmp1 + tmp3 = tmp1 * tmp2 + t1 = 1.0d+00 / u_(0,1) + t2 = t1 * t1 + t3 = t1 * t2 + tm1 = 1.0d+00 / u_(2,1) + tm2 = tm1 * tm1 + tm3 = tm1 * tm2 + tmp11 = dt * ty1 + tmp22 = dt * ty2 + lhs_(1,1,1) = (-(tmp11)) * dy1 + lhs_(1,2,1) = 0. + lhs_(1,3,1) = (-(tmp22)) + lhs_(1,4,1) = 0. + lhs_(1,5,1) = 0. + lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2) + &- tmp11 * ((-(c3c4)) * t2 * u_(0,2)) + lhs_(2,2,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 * + &t1 - tmp11 * dy2 + lhs_(2,3,1) = (-(tmp22)) * u_(0,2) * t1 + lhs_(2,4,1) = 0. + lhs_(2,5,1) = 0. + lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,3) * t2)) + + & 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4) + & * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,3)) + lhs_(3,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1) + lhs_(3,3,1) = (-(tmp22)) * ((2.0d+00 - c2) * u_(0,3) * t1 + &) - tmp11 * con43 * c3c4 * t1 - tmp11 * dy3 + lhs_(3,4,1) = (-(tmp22)) * ((-(c2)) * u_(0,4) * t1) + lhs_(3,5,1) = (-(tmp22)) * c2 + lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2) + &- tmp11 * ((-(c3c4)) * t2 * u_(0,4)) + lhs_(4,2,1) = 0. + lhs_(4,3,1) = (-(tmp22)) * u_(0,4) * t1 + lhs_(4,4,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 * + &t1 - tmp11 * dy4 + lhs_(4,5,1) = 0. + lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_ + &(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * u_(0,5) * t1) * u + &_(0,3) * t1) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 - (co + &n43 * c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 * u_( + &0,4)** 2 - c1345 * t2 * u_(0,5)) + lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * u_(0,3) * + & t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2) + lhs_(5,3,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00 + &* c2 * ((u_(0,2) * u_(0,2) + 3.0d+00 * u_(0,3) * u_(0,3) + u_(0,4) + & * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,3) + lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4)) + & * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4) + lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,3) * t1 - tmp11 * c1 + &345 * t1 - tmp11 * dy5 + lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dy1 + lhs_(1,2,2) = 0. + lhs_(1,3,2) = 0. + lhs_(1,4,2) = 0. + lhs_(1,5,2) = 0. + lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1, + &2)) + lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t + &mp11 * 2.0d+00 * dy2 + lhs_(2,3,2) = 0. + lhs_(2,4,2) = 0. + lhs_(2,5,2) = 0. + lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2 + & * u_(1,3)) + lhs_(3,2,2) = 0. + lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 * + &tmp1 + tmp11 * 2.0d+00 * dy3 + lhs_(3,4,2) = 0. + lhs_(3,5,2) = 0. + lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1, + &4)) + lhs_(4,2,2) = 0. + lhs_(4,3,2) = 0. + lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t + &mp11 * 2.0d+00 * dy4 + lhs_(4,5,2) = 0. + lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3 + & * u_(1,2)** 2 - (con43 * c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3 + &c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5)) + lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u + &_(1,2) + lhs_(5,3,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) * + &tmp2 * u_(1,3) + lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u + &_(1,4) + lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 + + &tmp11 * 2.0d+00 * dy5 + if (j .ne. 1) then + do j__1 = 1,5 + lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs + &_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3, + &j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1 + &,3) + lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs + &_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3, + &j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1 + &,3) + lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs + &_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3, + &j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1 + &,3) + lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs + &_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3, + &j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1 + &,3) + lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs + &_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3, + &j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1 + &,3) + enddo + endif + lhs_(1,1,3) = (-(tmp11)) * dy1 + lhs_(1,2,3) = 0. + lhs_(1,3,3) = tmp22 + lhs_(1,4,3) = 0. + lhs_(1,5,3) = 0. + lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm + &p11 * ((-(c3c4)) * tm2 * u_(2,2)) + lhs_(2,2,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1 + &- tmp11 * dy2 + lhs_(2,3,3) = tmp22 * u_(2,2) * tm1 + lhs_(2,4,3) = 0. + lhs_(2,5,3) = 0. + lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,3) * tm2)) + 0.5 + &0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u + &_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,3)) + lhs_(3,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1) + lhs_(3,3,3) = tmp22 * ((2.0d+00 - c2) * u_(2,3) * tm1) - + &tmp11 * con43 * c3c4 * tm1 - tmp11 * dy3 + lhs_(3,4,3) = tmp22 * ((-(c2)) * u_(2,4) * tm1) + lhs_(3,5,3) = tmp22 * c2 + lhs_(4,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm + &p11 * ((-(c3c4)) * tm2 * u_(2,4)) + lhs_(4,2,3) = 0. + lhs_(4,3,3) = tmp22 * u_(2,4) * tm1 + lhs_(4,4,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1 + &- tmp11 * dy4 + lhs_(4,5,3) = 0. + lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3) + & * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * u_(2,5) * tm1) * u_(2 + &,3) * tm1) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 - (con + &43 * c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3 * u_ + &(2,4)** 2 - c1345 * tm2 * u_(2,5)) + lhs_(5,2,3) = tmp22 * ((-(c2)) * u_(2,2) * u_(2,3) * tm2) + & - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2) + lhs_(5,3,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2 + & * ((u_(2,2) * u_(2,2) + 3.0d+00 * u_(2,3) * u_(2,3) + u_(2,4) * u + &_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,3) + lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm + &2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4) + lhs_(5,5,3) = tmp22 * c1 * u_(2,3) * tm1 - tmp11 * c1345 + &* tm1 - tmp11 * dy5 + do m = 1,5 + rhs_(m) = rhs(m,i,j,k) + enddo + do i__0 = 1,5 + rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j - + & 1,k) - lhs_(i__0,2,1) * rhs(2,i,j - 1,k) - lhs_(i__0,3,1) * rhs(3 + &,i,j - 1,k) - lhs_(i__0,4,1) * rhs(4,i,j - 1,k) - lhs_(i__0,5,1) * + & rhs(5,i,j - 1,k) + enddo + +!--------------------------------------------------------------------- +! +!--------------------------------------------------------------------- + pivot = 1.00d0 / lhs_(1,1,2) + lhs_(1,2,2) = lhs_(1,2,2) * pivot + lhs_(1,3,2) = lhs_(1,3,2) * pivot + lhs_(1,4,2) = lhs_(1,4,2) * pivot + lhs_(1,5,2) = lhs_(1,5,2) * pivot + lhs_(1,1,3) = lhs_(1,1,3) * pivot + lhs_(1,2,3) = lhs_(1,2,3) * pivot + lhs_(1,3,3) = lhs_(1,3,3) * pivot + lhs_(1,4,3) = lhs_(1,4,3) * pivot + lhs_(1,5,3) = lhs_(1,5,3) * pivot + rhs_(1) = rhs_(1) * pivot + coeff = lhs_(2,1,2) + lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2) + lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2) + lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2) + lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3) + lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3) + lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3) + lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3) + lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3) + rhs_(2) = rhs_(2) - coeff * rhs_(1) + coeff = lhs_(3,1,2) + lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2) + lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2) + lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2) + lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3) + lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3) + lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3) + lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3) + lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3) + rhs_(3) = rhs_(3) - coeff * rhs_(1) + coeff = lhs_(4,1,2) + lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2) + lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2) + lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3) + lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3) + lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3) + lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3) + lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3) + rhs_(4) = rhs_(4) - coeff * rhs_(1) + coeff = lhs_(5,1,2) + lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2) + lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2) + lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3) + lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3) + lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3) + lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3) + lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3) + rhs_(5) = rhs_(5) - coeff * rhs_(1) + pivot = 1.00d0 / lhs_(2,2,2) + lhs_(2,3,2) = lhs_(2,3,2) * pivot + lhs_(2,4,2) = lhs_(2,4,2) * pivot + lhs_(2,5,2) = lhs_(2,5,2) * pivot + lhs_(2,1,3) = lhs_(2,1,3) * pivot + lhs_(2,2,3) = lhs_(2,2,3) * pivot + lhs_(2,3,3) = lhs_(2,3,3) * pivot + lhs_(2,4,3) = lhs_(2,4,3) * pivot + lhs_(2,5,3) = lhs_(2,5,3) * pivot + rhs_(2) = rhs_(2) * pivot + coeff = lhs_(1,2,2) + lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2) + lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2) + lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3) + lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3) + lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3) + lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3) + lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3) + rhs_(1) = rhs_(1) - coeff * rhs_(2) + coeff = lhs_(3,2,2) + lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2) + lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2) + lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3) + lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3) + lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3) + lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3) + lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3) + rhs_(3) = rhs_(3) - coeff * rhs_(2) + coeff = lhs_(4,2,2) + lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2) + lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3) + lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3) + lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3) + lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3) + lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3) + rhs_(4) = rhs_(4) - coeff * rhs_(2) + coeff = lhs_(5,2,2) + lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2) + lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3) + lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3) + lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3) + lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3) + lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3) + rhs_(5) = rhs_(5) - coeff * rhs_(2) + pivot = 1.00d0 / lhs_(3,3,2) + lhs_(3,4,2) = lhs_(3,4,2) * pivot + lhs_(3,5,2) = lhs_(3,5,2) * pivot + lhs_(3,1,3) = lhs_(3,1,3) * pivot + lhs_(3,2,3) = lhs_(3,2,3) * pivot + lhs_(3,3,3) = lhs_(3,3,3) * pivot + lhs_(3,4,3) = lhs_(3,4,3) * pivot + lhs_(3,5,3) = lhs_(3,5,3) * pivot + rhs_(3) = rhs_(3) * pivot + coeff = lhs_(1,3,2) + lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2) + lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3) + lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3) + lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3) + lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3) + lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3) + rhs_(1) = rhs_(1) - coeff * rhs_(3) + coeff = lhs_(2,3,2) + lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2) + lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3) + lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3) + lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3) + lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3) + lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3) + rhs_(2) = rhs_(2) - coeff * rhs_(3) + coeff = lhs_(4,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2) + lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3) + lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3) + lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3) + lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3) + lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3) + rhs_(4) = rhs_(4) - coeff * rhs_(3) + coeff = lhs_(5,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2) + lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3) + lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3) + lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3) + lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3) + lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3) + rhs_(5) = rhs_(5) - coeff * rhs_(3) + pivot = 1.00d0 / lhs_(4,4,2) + lhs_(4,5,2) = lhs_(4,5,2) * pivot + lhs_(4,1,3) = lhs_(4,1,3) * pivot + lhs_(4,2,3) = lhs_(4,2,3) * pivot + lhs_(4,3,3) = lhs_(4,3,3) * pivot + lhs_(4,4,3) = lhs_(4,4,3) * pivot + lhs_(4,5,3) = lhs_(4,5,3) * pivot + rhs_(4) = rhs_(4) * pivot + coeff = lhs_(1,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2) + lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3) + lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3) + lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3) + lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3) + lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3) + rhs_(1) = rhs_(1) - coeff * rhs_(4) + coeff = lhs_(2,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2) + lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3) + lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3) + lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3) + lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3) + lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3) + rhs_(2) = rhs_(2) - coeff * rhs_(4) + coeff = lhs_(3,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2) + lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3) + lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3) + lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3) + lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3) + lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3) + rhs_(3) = rhs_(3) - coeff * rhs_(4) + coeff = lhs_(5,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2) + lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3) + lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3) + lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3) + lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3) + lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3) + rhs_(5) = rhs_(5) - coeff * rhs_(4) + pivot = 1.00d0 / lhs_(5,5,2) + lhs_(5,1,3) = lhs_(5,1,3) * pivot + lhs_(5,2,3) = lhs_(5,2,3) * pivot + lhs_(5,3,3) = lhs_(5,3,3) * pivot + lhs_(5,4,3) = lhs_(5,4,3) * pivot + lhs_(5,5,3) = lhs_(5,5,3) * pivot + rhs_(5) = rhs_(5) * pivot + coeff = lhs_(1,5,2) + lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3) + lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3) + lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3) + lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3) + lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3) + rhs_(1) = rhs_(1) - coeff * rhs_(5) + coeff = lhs_(2,5,2) + lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3) + lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3) + lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3) + lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3) + lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3) + rhs_(2) = rhs_(2) - coeff * rhs_(5) + coeff = lhs_(3,5,2) + lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3) + lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3) + lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3) + lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3) + lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3) + rhs_(3) = rhs_(3) - coeff * rhs_(5) + coeff = lhs_(4,5,2) + lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3) + lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3) + lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3) + lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3) + lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3) + rhs_(4) = rhs_(4) - coeff * rhs_(5) + do i__0 = 1,5 + lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3) + lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3) + lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3) + lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3) + lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3) + enddo + do m = 1,5 + rhs(m,i,j,k) = rhs_(m) + u_(0,m) = u_(1,m) + u_(1,m) = u_(2,m) + enddo + enddo + +! else ! ******************* else case ************************* + do n = 1,5 + lhs_(1,n,1) = 0.0d0 + lhs_(1,n,2) = 0.0d0 + lhs_(1,n,3) = 0.0d0 + lhs_(2,n,1) = 0.0d0 + lhs_(2,n,2) = 0.0d0 + lhs_(2,n,3) = 0.0d0 + lhs_(3,n,1) = 0.0d0 + lhs_(3,n,2) = 0.0d0 + lhs_(3,n,3) = 0.0d0 + lhs_(4,n,1) = 0.0d0 + lhs_(4,n,2) = 0.0d0 + lhs_(4,n,3) = 0.0d0 + lhs_(5,n,1) = 0.0d0 + lhs_(5,n,2) = 0.0d0 + lhs_(5,n,3) = 0.0d0 + enddo + do m = 1,5 + lhs_(m,m,2) = 1.0d0 + enddo + do m = 1,5 + rhs_(m) = rhs(m,i,j,k) + enddo + do i__0 = 1,5 + rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,jsize + &- 1,k) - lhs_(i__0,2,1) * rhs(2,i,jsize - 1,k) - lhs_(i__0,3,1) * + &rhs(3,i,jsize - 1,k) - lhs_(i__0,4,1) * rhs(4,i,jsize - 1,k) - lhs + &_(i__0,5,1) * rhs(5,i,jsize - 1,k) + enddo + do j__1 = 1,5 + lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_ + &_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3 + &) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3) + lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_ + &_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3 + &) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3) + lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_ + &_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3 + &) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3) + lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_ + &_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3 + &) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3) + lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_ + &_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3 + &) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3) + enddo + pivot__3 = 1.00d0 / lhs_(1,1,2) + lhs_(1,2,2) = lhs_(1,2,2) * pivot__3 + lhs_(1,3,2) = lhs_(1,3,2) * pivot__3 + lhs_(1,4,2) = lhs_(1,4,2) * pivot__3 + lhs_(1,5,2) = lhs_(1,5,2) * pivot__3 + rhs_(1) = rhs_(1) * pivot__3 + coeff__2 = lhs_(2,1,2) + lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2) + lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2) + lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2) + rhs_(2) = rhs_(2) - coeff__2 * rhs_(1) + coeff__2 = lhs_(3,1,2) + lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2) + lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2) + lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2) + rhs_(3) = rhs_(3) - coeff__2 * rhs_(1) + coeff__2 = lhs_(4,1,2) + lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2) + lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2) + rhs_(4) = rhs_(4) - coeff__2 * rhs_(1) + coeff__2 = lhs_(5,1,2) + lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2) + lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2) + rhs_(5) = rhs_(5) - coeff__2 * rhs_(1) + pivot__3 = 1.00d0 / lhs_(2,2,2) + lhs_(2,3,2) = lhs_(2,3,2) * pivot__3 + lhs_(2,4,2) = lhs_(2,4,2) * pivot__3 + lhs_(2,5,2) = lhs_(2,5,2) * pivot__3 + rhs_(2) = rhs_(2) * pivot__3 + coeff__2 = lhs_(1,2,2) + lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2) + lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2) + rhs_(1) = rhs_(1) - coeff__2 * rhs_(2) + coeff__2 = lhs_(3,2,2) + lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2) + lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2) + rhs_(3) = rhs_(3) - coeff__2 * rhs_(2) + coeff__2 = lhs_(4,2,2) + lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2) + rhs_(4) = rhs_(4) - coeff__2 * rhs_(2) + coeff__2 = lhs_(5,2,2) + lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2) + rhs_(5) = rhs_(5) - coeff__2 * rhs_(2) + pivot__3 = 1.00d0 / lhs_(3,3,2) + lhs_(3,4,2) = lhs_(3,4,2) * pivot__3 + lhs_(3,5,2) = lhs_(3,5,2) * pivot__3 + rhs_(3) = rhs_(3) * pivot__3 + coeff__2 = lhs_(1,3,2) + lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2) + rhs_(1) = rhs_(1) - coeff__2 * rhs_(3) + coeff__2 = lhs_(2,3,2) + lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2) + rhs_(2) = rhs_(2) - coeff__2 * rhs_(3) + coeff__2 = lhs_(4,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2) + rhs_(4) = rhs_(4) - coeff__2 * rhs_(3) + coeff__2 = lhs_(5,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2) + rhs_(5) = rhs_(5) - coeff__2 * rhs_(3) + pivot__3 = 1.00d0 / lhs_(4,4,2) + lhs_(4,5,2) = lhs_(4,5,2) * pivot__3 + rhs_(4) = rhs_(4) * pivot__3 + coeff__2 = lhs_(1,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2) + rhs_(1) = rhs_(1) - coeff__2 * rhs_(4) + coeff__2 = lhs_(2,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2) + rhs_(2) = rhs_(2) - coeff__2 * rhs_(4) + coeff__2 = lhs_(3,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2) + rhs_(3) = rhs_(3) - coeff__2 * rhs_(4) + coeff__2 = lhs_(5,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2) + rhs_(5) = rhs_(5) - coeff__2 * rhs_(4) + pivot__3 = 1.00d0 / lhs_(5,5,2) + rhs_(5) = rhs_(5) * pivot__3 + coeff__2 = lhs_(1,5,2) + rhs_(1) = rhs_(1) - coeff__2 * rhs_(5) + coeff__2 = lhs_(2,5,2) + rhs_(2) = rhs_(2) - coeff__2 * rhs_(5) + coeff__2 = lhs_(3,5,2) + rhs_(3) = rhs_(3) - coeff__2 * rhs_(5) + coeff__2 = lhs_(4,5,2) + rhs_(4) = rhs_(4) - coeff__2 * rhs_(5) + +! endif + do m = 1,5 + rhs(m,i,j,k) = rhs_(m) + enddo + +! enddo + do j = problem_size - 2,0,(-(1)) + do m = 1,5 + rhs_(m) = rhs(m,i,j,k) + enddo + do m = 1,5 + rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhs(1,i,j + 1,k + &) + rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhs(2,i,j + 1,k + &) + rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhs(3,i,j + 1,k + &) + rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhs(4,i,j + 1,k + &) + rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhs(5,i,j + 1,k + &) + enddo + do m = 1,5 + rhs(m,i,j,k) = rhs_(m) + enddo + enddo + enddo + enddo +!$SPF END PARALLEL_REG +! DVM$ END REGION + return + end + diff --git a/y_solve_sp.for b/y_solve_sp.for new file mode 100644 index 0000000..19e6b82 --- /dev/null +++ b/y_solve_sp.for @@ -0,0 +1,332 @@ + +! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine y_solve_sp () + + include 'header_sp.h' + integer :: i,j,k,j1,j2,m,m1 + double precision :: ru1,fac1,fac2,rhs__(5,0:2),t1,t2 + double precision :: lhs__(5,0:2),lhsm__(5,0:2),lhsp__(5,0:2) + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + +!$SPF PARALLEL_REG r0 +!$SPF ANALYSIS(PRIVATE(LHS__,LHSP__,LHSM__,RHS__)) +! DVM$ PARALLEL (K,I) ON U(*,I,*,K), CUDA_BLOCK (32,4),PRIVATE (M,J1,J2,F +! DVM$&AC1,FAC2,RU1,LHS__,LHSP__,LHSM__,J,RHS__,T1,T2) +! DVM$ REGION LOCAL (LHS) + do k = 1,nz2 + do i = 1,nx2 + lhs__(1,0) = 0.0d0 + lhsp__(1,0) = 0.0d0 + lhsm__(1,0) = 0.0d0 + lhs__(2,0) = 0.0d0 + lhsp__(2,0) = 0.0d0 + lhsm__(2,0) = 0.0d0 + lhs__(3,0) = 1.0d0 + lhsp__(3,0) = 1.0d0 + lhsm__(3,0) = 1.0d0 + lhs__(4,0) = 0.0d0 + lhsp__(4,0) = 0.0d0 + lhsm__(4,0) = 0.0d0 + lhs__(5,0) = 0.0d0 + lhsp__(5,0) = 0.0d0 + lhsm__(5,0) = 0.0d0 + lhs__(1,1) = 0.0d0 + ru1 = c3c4 * 1.0d0 / u(1,i,1 - 1,k) + ru1 = dmax1 (dy3 + con43 * ru1,dy5 + c1c5 * ru1,dymax + ru1, + &dy1) + lhs__(2,1) = (-(dtty2)) * vs(i,1 - 1,k) - dtty1 * ru1 + ru1 = c3c4 * 1.0d0 / u(1,i,1,k) + ru1 = dmax1 (dy3 + con43 * ru1,dy5 + c1c5 * ru1,dymax + ru1, + &dy1) + lhs__(3,1) = 1.0d0 + c2dtty1 * ru1 + ru1 = c3c4 * 1.0d0 / u(1,i,1 + 1,k) + ru1 = dmax1 (dy3 + con43 * ru1,dy5 + c1c5 * ru1,dymax + ru1, + &dy1) + lhs__(4,1) = dtty2 * vs(i,1 + 1,k) - dtty1 * ru1 + lhs__(5,1) = 0.0d0 + lhs__(3,1) = lhs__(3,1) + comz5 + lhs__(4,1) = lhs__(4,1) - comz4 + lhs__(5,1) = lhs__(5,1) + comz1 + lhsp__(1,1) = lhs__(1,1) + lhsp__(2,1) = lhs__(2,1) - dtty2 * speed(i,1 - 1,k) + lhsp__(3,1) = lhs__(3,1) + lhsp__(4,1) = lhs__(4,1) + dtty2 * speed(i,1 + 1,k) + lhsp__(5,1) = lhs__(5,1) + lhsm__(1,1) = lhs__(1,1) + lhsm__(2,1) = lhs__(2,1) + dtty2 * speed(i,1 - 1,k) + lhsm__(3,1) = lhs__(3,1) + lhsm__(4,1) = lhs__(4,1) - dtty2 * speed(i,1 + 1,k) + lhsm__(5,1) = lhs__(5,1) + do j = 0,ny2 + 1 + if (j + 2 .lt. ny2 + 1) then + m = j + 2 + lhs__(1,2) = 0.0d0 + ru1 = c3c4 * 1.0d0 / u(1,i,m - 1,k) + ru1 = dmax1 (dy3 + con43 * ru1,dy5 + c1c5 * ru1,dymax + &+ ru1,dy1) + lhs__(2,2) = (-(dtty2)) * vs(i,m - 1,k) - dtty1 * ru1 + ru1 = c3c4 * 1.0d0 / u(1,i,m,k) + ru1 = dmax1 (dy3 + con43 * ru1,dy5 + c1c5 * ru1,dymax + &+ ru1,dy1) + lhs__(3,2) = 1.0d0 + c2dtty1 * ru1 + ru1 = c3c4 * 1.0d0 / u(1,i,m + 1,k) + ru1 = dmax1 (dy3 + con43 * ru1,dy5 + c1c5 * ru1,dymax + &+ ru1,dy1) + lhs__(4,2) = dtty2 * vs(i,m + 1,k) - dtty1 * ru1 + lhs__(5,2) = 0.0d0 + if (m .eq. 1) then + lhs__(3,2) = lhs__(3,2) + comz5 + lhs__(4,2) = lhs__(4,2) - comz4 + lhs__(5,2) = lhs__(5,2) + comz1 + else if (m .eq. 2) then + lhs__(2,2) = lhs__(2,2) - comz4 + lhs__(3,2) = lhs__(3,2) + comz6 + lhs__(4,2) = lhs__(4,2) - comz4 + lhs__(5,2) = lhs__(5,2) + comz1 + else if (m .ge. 3 .and. m .le. ny2 - 2) then + lhs__(1,2) = lhs__(1,2) + comz1 + lhs__(2,2) = lhs__(2,2) - comz4 + lhs__(3,2) = lhs__(3,2) + comz6 + lhs__(4,2) = lhs__(4,2) - comz4 + lhs__(5,2) = lhs__(5,2) + comz1 + else if (m .eq. ny2 - 1) then + lhs__(1,2) = lhs__(1,2) + comz1 + lhs__(2,2) = lhs__(2,2) - comz4 + lhs__(3,2) = lhs__(3,2) + comz6 + lhs__(4,2) = lhs__(4,2) - comz4 + else if (m .eq. ny2) then + lhs__(1,2) = lhs__(1,2) + comz1 + lhs__(2,2) = lhs__(2,2) - comz4 + lhs__(3,2) = lhs__(3,2) + comz5 + endif + lhsp__(1,2) = lhs__(1,2) + lhsp__(2,2) = lhs__(2,2) - dtty2 * speed(i,m - 1,k) + lhsp__(3,2) = lhs__(3,2) + lhsp__(4,2) = lhs__(4,2) + dtty2 * speed(i,m + 1,k) + lhsp__(5,2) = lhs__(5,2) + lhsm__(1,2) = lhs__(1,2) + lhsm__(2,2) = lhs__(2,2) + dtty2 * speed(i,m - 1,k) + lhsm__(3,2) = lhs__(3,2) + lhsm__(4,2) = lhs__(4,2) - dtty2 * speed(i,m + 1,k) + lhsm__(5,2) = lhs__(5,2) + else if (j + 2 .eq. ny2 + 1) then + lhs__(1,2) = 0.0d0 + lhsp__(1,2) = 0.0d0 + lhsm__(1,2) = 0.0d0 + lhs__(2,2) = 0.0d0 + lhsp__(2,2) = 0.0d0 + lhsm__(2,2) = 0.0d0 + lhs__(3,2) = 1.0d0 + lhsp__(3,2) = 1.0d0 + lhsm__(3,2) = 1.0d0 + lhs__(4,2) = 0.0d0 + lhsp__(4,2) = 0.0d0 + lhsm__(4,2) = 0.0d0 + lhs__(5,2) = 0.0d0 + lhsp__(5,2) = 0.0d0 + lhsm__(5,2) = 0.0d0 + endif + +!********************************** end of init + j1 = j + 1 + j2 = j + 2 + fac1 = 1.d0 / lhs__(3,0) + lhs__(4,0) = fac1 * lhs__(4,0) + lhs__(5,0) = fac1 * lhs__(5,0) + do m = 1,3 + rhs(m,i,j,k) = fac1 * rhs(m,i,j,k) + enddo + if (j .le. ny2 - 1) then + lhs__(3,1) = lhs__(3,1) - lhs__(2,1) * lhs__(4,0) + lhs__(4,1) = lhs__(4,1) - lhs__(2,1) * lhs__(5,0) + lhs__(2,2) = lhs__(2,2) - lhs__(1,2) * lhs__(4,0) + lhs__(3,2) = lhs__(3,2) - lhs__(1,2) * lhs__(5,0) + do m = 1,3 + rhs(m,i,j1,k) = rhs(m,i,j1,k) - lhs__(2,1) * rhs(m, + &i,j,k) + rhs(m,i,j2,k) = rhs(m,i,j2,k) - lhs__(1,2) * rhs(m, + &i,j,k) + enddo + else + lhs__(3,1) = lhs__(3,1) - lhs__(2,1) * lhs__(4,0) + lhs__(4,1) = lhs__(4,1) - lhs__(2,1) * lhs__(5,0) + fac2 = 1.d0 / lhs__(3,1) + do m = 1,3 + rhs(m,i,j1,k) = rhs(m,i,j1,k) - lhs__(2,1) * rhs(m, + &i,j,k) + rhs(m,i,j1,k) = fac2 * rhs(m,i,j1,k) + enddo + endif + m = 4 + fac1 = 1.d0 / lhsp__(3,0) + lhsp__(4,0) = fac1 * lhsp__(4,0) + lhsp__(5,0) = fac1 * lhsp__(5,0) + rhs(m,i,j,k) = fac1 * rhs(m,i,j,k) + lhsp__(3,1) = lhsp__(3,1) - lhsp__(2,1) * lhsp__(4,0) + lhsp__(4,1) = lhsp__(4,1) - lhsp__(2,1) * lhsp__(5,0) + rhs(m,i,j1,k) = rhs(m,i,j1,k) - lhsp__(2,1) * rhs(m,i,j,k + &) + if (j .lt. ny2) then + lhsp__(2,2) = lhsp__(2,2) - lhsp__(1,2) * lhsp__(4,0) + lhsp__(3,2) = lhsp__(3,2) - lhsp__(1,2) * lhsp__(5,0) + rhs(m,i,j2,k) = rhs(m,i,j2,k) - lhsp__(1,2) * rhs(m,i, + &j,k) + endif + m = 5 + fac1 = 1.d0 / lhsm__(3,0) + lhsm__(4,0) = fac1 * lhsm__(4,0) + lhsm__(5,0) = fac1 * lhsm__(5,0) + rhs(m,i,j,k) = fac1 * rhs(m,i,j,k) + lhsm__(3,1) = lhsm__(3,1) - lhsm__(2,1) * lhsm__(4,0) + lhsm__(4,1) = lhsm__(4,1) - lhsm__(2,1) * lhsm__(5,0) + rhs(m,i,j1,k) = rhs(m,i,j1,k) - lhsm__(2,1) * rhs(m,i,j,k + &) + if (j .lt. ny2) then + lhsm__(2,2) = lhsm__(2,2) - lhsm__(1,2) * lhsm__(4,0) + lhsm__(3,2) = lhsm__(3,2) - lhsm__(1,2) * lhsm__(5,0) + rhs(m,i,j2,k) = rhs(m,i,j2,k) - lhsm__(1,2) * rhs(m,i, + &j,k) + endif + if (j .eq. ny2) then + rhs(4,i,j1,k) = rhs(4,i,j1,k) / lhsp__(3,1) + rhs(5,i,j1,k) = rhs(5,i,j1,k) / lhsm__(3,1) + do m = 1,3 + rhs(m,i,j,k) = rhs(m,i,j,k) - lhs__(4,0) * rhs(m,i, + &j1,k) + enddo + rhs(4,i,j,k) = rhs(4,i,j,k) - lhsp__(4,0) * rhs(4,i,j1 + &,k) + rhs(5,i,j,k) = rhs(5,i,j,k) - lhsm__(4,0) * rhs(5,i,j1 + &,k) + endif + lhs(0,4,i,j,k) = lhs__(4,0) + lhs(1,4,i,j,k) = lhsp__(4,0) + lhs(2,4,i,j,k) = lhsm__(4,0) + lhs(0,5,i,j,k) = lhs__(5,0) + lhs(1,5,i,j,k) = lhsp__(5,0) + lhs(2,5,i,j,k) = lhsm__(5,0) + lhs__(1,0) = lhs__(1,1) + lhsp__(1,0) = lhsp__(1,1) + lhsm__(1,0) = lhsm__(1,1) + lhs__(1,1) = lhs__(1,2) + lhsp__(1,1) = lhsp__(1,2) + lhsm__(1,1) = lhsm__(1,2) + lhs__(2,0) = lhs__(2,1) + lhsp__(2,0) = lhsp__(2,1) + lhsm__(2,0) = lhsm__(2,1) + lhs__(2,1) = lhs__(2,2) + lhsp__(2,1) = lhsp__(2,2) + lhsm__(2,1) = lhsm__(2,2) + lhs__(3,0) = lhs__(3,1) + lhsp__(3,0) = lhsp__(3,1) + lhsm__(3,0) = lhsm__(3,1) + lhs__(3,1) = lhs__(3,2) + lhsp__(3,1) = lhsp__(3,2) + lhsm__(3,1) = lhsm__(3,2) + lhs__(4,0) = lhs__(4,1) + lhsp__(4,0) = lhsp__(4,1) + lhsm__(4,0) = lhsm__(4,1) + lhs__(4,1) = lhs__(4,2) + lhsp__(4,1) = lhsp__(4,2) + lhsm__(4,1) = lhsm__(4,2) + lhs__(5,0) = lhs__(5,1) + lhsp__(5,0) = lhsp__(5,1) + lhsm__(5,0) = lhsm__(5,1) + lhs__(5,1) = lhs__(5,2) + lhsp__(5,1) = lhsp__(5,2) + lhsm__(5,1) = lhsm__(5,2) + enddo + j = problem_size - 3 + rhs__(1,2) = rhs(1,i,j + 2,k) + rhs__(2,2) = rhs(2,i,j + 2,k) + rhs__(3,2) = rhs(3,i,j + 2,k) + rhs__(4,2) = rhs(4,i,j + 2,k) + rhs__(5,2) = rhs(5,i,j + 2,k) + rhs__(1,1) = rhs(1,i,j + 1,k) + rhs__(2,1) = rhs(2,i,j + 1,k) + rhs__(3,1) = rhs(3,i,j + 1,k) + rhs__(4,1) = rhs(4,i,j + 1,k) + rhs__(5,1) = rhs(5,i,j + 1,k) + rhs__(1,0) = rhs(1,i,j,k) + rhs__(2,0) = rhs(2,i,j,k) + rhs__(3,0) = rhs(3,i,j,k) + rhs__(4,0) = rhs(4,i,j,k) + rhs__(5,0) = rhs(5,i,j,k) + rhs__(1,0) = rhs__(1,0) - lhs(0,4,i,j,k) * rhs__(1,1) - lhs( + &0,5,i,j,k) * rhs__(1,2) + rhs__(2,0) = rhs__(2,0) - lhs(0,4,i,j,k) * rhs__(2,1) - lhs( + &0,5,i,j,k) * rhs__(2,2) + rhs__(3,0) = rhs__(3,0) - lhs(0,4,i,j,k) * rhs__(3,1) - lhs( + &0,5,i,j,k) * rhs__(3,2) + rhs__(4,0) = rhs__(4,0) - lhs(1,4,i,j,k) * rhs__(4,1) - lhs( + &1,5,i,j,k) * rhs__(4,2) + rhs__(5,0) = rhs__(5,0) - lhs(2,4,i,j,k) * rhs__(5,1) - lhs( + &2,5,i,j,k) * rhs__(5,2) + rhs__(1,2) = rhs__(1,1) + rhs__(2,2) = rhs__(2,1) + rhs__(3,2) = rhs__(3,1) + rhs__(4,2) = rhs__(4,1) + rhs__(5,2) = rhs__(5,1) + rhs__(1,1) = rhs__(1,0) + rhs__(2,1) = rhs__(2,0) + rhs__(3,1) = rhs__(3,0) + rhs__(4,1) = rhs__(4,0) + rhs__(5,1) = rhs__(5,0) + do j = problem_size - 4,0,(-(1)) + rhs__(1,0) = rhs(1,i,j,k) + rhs__(2,0) = rhs(2,i,j,k) + rhs__(3,0) = rhs(3,i,j,k) + rhs__(4,0) = rhs(4,i,j,k) + rhs__(5,0) = rhs(5,i,j,k) + rhs__(1,0) = rhs__(1,0) - lhs(0,4,i,j,k) * rhs__(1,1) - l + &hs(0,5,i,j,k) * rhs__(1,2) + rhs__(2,0) = rhs__(2,0) - lhs(0,4,i,j,k) * rhs__(2,1) - l + &hs(0,5,i,j,k) * rhs__(2,2) + rhs__(3,0) = rhs__(3,0) - lhs(0,4,i,j,k) * rhs__(3,1) - l + &hs(0,5,i,j,k) * rhs__(3,2) + rhs__(4,0) = rhs__(4,0) - lhs(1,4,i,j,k) * rhs__(4,1) - l + &hs(1,5,i,j,k) * rhs__(4,2) + rhs__(5,0) = rhs__(5,0) - lhs(2,4,i,j,k) * rhs__(5,1) - l + &hs(2,5,i,j,k) * rhs__(5,2) + t1 = bt * rhs__(1,2) + t2 = 0.5d0 * (rhs__(4,2) + rhs__(5,2)) + rhs(1,i,j + 2,k) = bt * (rhs__(4,2) - rhs__(5,2)) + rhs(2,i,j + 2,k) = (-(rhs__(3,2))) + rhs(3,i,j + 2,k) = rhs__(2,2) + rhs(4,i,j + 2,k) = (-(t1)) + t2 + rhs(5,i,j + 2,k) = t1 + t2 + rhs__(1,2) = rhs__(1,1) + rhs__(2,2) = rhs__(2,1) + rhs__(3,2) = rhs__(3,1) + rhs__(4,2) = rhs__(4,1) + rhs__(5,2) = rhs__(5,1) + rhs__(1,1) = rhs__(1,0) + rhs__(2,1) = rhs__(2,0) + rhs__(3,1) = rhs__(3,0) + rhs__(4,1) = rhs__(4,0) + rhs__(5,1) = rhs__(5,0) + enddo + t1 = bt * rhs__(1,2) + t2 = 0.5d0 * (rhs__(4,2) + rhs__(5,2)) + rhs(1,i,j + 2,k) = bt * (rhs__(4,2) - rhs__(5,2)) + rhs(2,i,j + 2,k) = (-(rhs__(3,2))) + rhs(3,i,j + 2,k) = rhs__(2,2) + rhs(4,i,j + 2,k) = (-(t1)) + t2 + rhs(5,i,j + 2,k) = t1 + t2 + enddo + enddo +!$SPF END PARALLEL_REG +! DVM$ END REGION + return + end + diff --git a/z_solve_bt.for b/z_solve_bt.for new file mode 100644 index 0000000..5f9ed19 --- /dev/null +++ b/z_solve_bt.for @@ -0,0 +1,632 @@ + +! *** generated by SAPFOR with version 2412 and build date: Apr 29 2025 22:44:14 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +! performs guaussian elimination on this cell. +! +! assumes that unpacking routines for non-first cells +! preload C' and rhs' from previous cell. +! +! assumed send happens outside this routine, but that +! c'(KMAX) and rhs'(KMAX) will be sent to next cell. +!--------------------------------------------------------------------- + subroutine z_solve_bt () + + include 'header3d_bt.h' + double precision :: coeff + double precision :: pivot + integer :: i__0 + integer :: j__1,m,n + double precision :: coeff__2 + double precision :: pivot__3 + double precision :: lhs_(5,5,3),rhs_(5),rhsp_(5),u_(0:3,5) + integer :: i,j,k,ksize,k1 + ksize = problem_size - 1 + +!$SPF PARALLEL_REG r0 +!$SPF ANALYSIS(PRIVATE(U_,RHS_,LHS_,rhsp_)) +! DVM$ PARALLEL (J,I) ON RHS(*,I,J,*), PRIVATE (K,U_,RHS_,PIVOT,COEFF,TMP +! DVM$&1,TMP2,TMP3,T1,T2,T3,TM1,TM2,TM3,RHSP_,TMP11,TMP22,I__0,J__1,LHS_, +! DVM$&N,M,PIVOT__3,COEFF__2) +! DVM$ REGION LOCAL (LHS__) + do j = 1,problem_size - 2 + do i = 1,problem_size - 2 + do m = 1,5 + u_(0,m) = u(m,i,j,0) + u_(1,m) = u(m,i,j,1) + enddo + do k = 1,ksize - 1 + do m = 1,5 + u_(2,m) = u(m,i,j,k + 1) + enddo + tmp1 = 1.0d+00 / u_(1,1) + tmp2 = tmp1 * tmp1 + tmp3 = tmp1 * tmp2 + t1 = 1.0d+00 / u_(0,1) + t2 = t1 * t1 + t3 = t1 * t2 + tm1 = 1.0d+00 / u_(2,1) + tm2 = tm1 * tm1 + tm3 = tm1 * tm2 + tmp11 = dt * tz1 + tmp22 = dt * tz2 + lhs_(1,1,1) = (-(tmp11)) * dz1 + lhs_(1,2,1) = 0. + lhs_(1,3,1) = 0. + lhs_(1,4,1) = (-(tmp22)) + lhs_(1,5,1) = 0. + lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2) + &- tmp11 * ((-(c3c4)) * t2 * u_(0,2)) + lhs_(2,2,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 * + &t1 - tmp11 * dz2 + lhs_(2,3,1) = 0. + lhs_(2,4,1) = (-(tmp22)) * u_(0,2) * t1 + lhs_(2,5,1) = 0. + lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2) + &- tmp11 * ((-(c3c4)) * t2 * u_(0,3)) + lhs_(3,2,1) = 0. + lhs_(3,3,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 * + &t1 - tmp11 * dz3 + lhs_(3,4,1) = (-(tmp22)) * u_(0,3) * t1 + lhs_(3,5,1) = 0. + lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,4) * u_(0,4) * t2)) + + & 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4) + & * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,4)) + lhs_(4,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1) + lhs_(4,3,1) = (-(tmp22)) * ((-(c2)) * u_(0,3) * t1) + lhs_(4,4,1) = (-(tmp22)) * (2.0d+00 - c2) * u_(0,4) * t1 + &- tmp11 * con43 * c3 * c4 * t1 - tmp11 * dz4 + lhs_(4,5,1) = (-(tmp22)) * c2 + lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_ + &(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) * + & (u_(0,4) * t1)) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 - + & (c3c4 - c1345) * t3 * u_(0,3)** 2 - (con43 * c3c4 - c1345) * t3 * + & u_(0,4)** 2 - c1345 * t2 * u_(0,5)) + lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * (u_(0,2) * u_(0,4)) + & * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2) + lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4)) + & * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3) + lhs_(5,4,1) = (-(tmp22)) * (c1 * (u_(0,5) * t1) - 0.50d+0 + &0 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + 3.0d+00 * u_(0, + &4) * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0, + &4) + lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,4) * t1 - tmp11 * c1 + &345 * t1 - tmp11 * dz5 + lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dz1 + lhs_(1,2,2) = 0. + lhs_(1,3,2) = 0. + lhs_(1,4,2) = 0. + lhs_(1,5,2) = 0. + lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1, + &2)) + lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t + &mp11 * 2.0d+00 * dz2 + lhs_(2,3,2) = 0. + lhs_(2,4,2) = 0. + lhs_(2,5,2) = 0. + lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1, + &3)) + lhs_(3,2,2) = 0. + lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t + &mp11 * 2.0d+00 * dz3 + lhs_(3,4,2) = 0. + lhs_(3,5,2) = 0. + lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2 + & * u_(1,4)) + lhs_(4,2,2) = 0. + lhs_(4,3,2) = 0. + lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3 * c4 + & * tmp1 + tmp11 * 2.0d+00 * dz4 + lhs_(4,5,2) = 0. + lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3 + & * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (con43 * c3 + &c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5)) + lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u + &_(1,2) + lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u + &_(1,3) + lhs_(5,4,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) * + &tmp2 * u_(1,4) + lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 + + &tmp11 * 2.0d+00 * dz5 + if (k .ne. 1) then + do j__1 = 1,5 + lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs + &_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3, + &j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1 + &,3) + lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs + &_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3, + &j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1 + &,3) + lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs + &_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3, + &j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1 + &,3) + lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs + &_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3, + &j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1 + &,3) + lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs + &_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3, + &j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1 + &,3) + enddo + endif + lhs_(1,1,3) = (-(tmp11)) * dz1 + lhs_(1,2,3) = 0. + lhs_(1,3,3) = 0. + lhs_(1,4,3) = tmp22 + lhs_(1,5,3) = 0. + lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm + &p11 * ((-(c3c4)) * tm2 * u_(2,2)) + lhs_(2,2,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1 + &- tmp11 * dz2 + lhs_(2,3,3) = 0. + lhs_(2,4,3) = tmp22 * u_(2,2) * tm1 + lhs_(2,5,3) = 0. + lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm + &p11 * ((-(c3c4)) * tm2 * u_(2,3)) + lhs_(3,2,3) = 0. + lhs_(3,3,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1 + &- tmp11 * dz3 + lhs_(3,4,3) = tmp22 * u_(2,3) * tm1 + lhs_(3,5,3) = 0. + lhs_(4,1,3) = tmp22 * ((-(u_(2,4) * u_(2,4) * tm2)) + 0.5 + &0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u + &_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,4)) + lhs_(4,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1) + lhs_(4,3,3) = tmp22 * ((-(c2)) * u_(2,3) * tm1) + lhs_(4,4,3) = tmp22 * (2.0d+00 - c2) * u_(2,4) * tm1 - tm + &p11 * con43 * c3 * c4 * tm1 - tmp11 * dz4 + lhs_(4,5,3) = tmp22 * c2 + lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3) + & * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u + &_(2,4) * tm1)) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 - + &(c3c4 - c1345) * tm3 * u_(2,3)** 2 - (con43 * c3c4 - c1345) * tm3 + &* u_(2,4)** 2 - c1345 * tm2 * u_(2,5)) + lhs_(5,2,3) = tmp22 * ((-(c2)) * (u_(2,2) * u_(2,4)) * tm + &2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2) + lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm + &2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3) + lhs_(5,4,3) = tmp22 * (c1 * (u_(2,5) * tm1) - 0.50d+00 * + &c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + 3.0d+00 * u_(2,4) * + & u_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,4) + lhs_(5,5,3) = tmp22 * c1 * u_(2,4) * tm1 - tmp11 * c1345 + &* tm1 - tmp11 * dz5 + do m = 1,5 + rhs_(m) = rhs(m,i,j,k) + enddo + do i__0 = 1,5 + rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,k + & - 1) - lhs_(i__0,2,1) * rhs(2,i,j,k - 1) - lhs_(i__0,3,1) * rhs(3 + &,i,j,k - 1) - lhs_(i__0,4,1) * rhs(4,i,j,k - 1) - lhs_(i__0,5,1) * + & rhs(5,i,j,k - 1) + enddo + +!--------------------------------------------------------------------- +! +!--------------------------------------------------------------------- + pivot = 1.00d0 / lhs_(1,1,2) + lhs_(1,2,2) = lhs_(1,2,2) * pivot + lhs_(1,3,2) = lhs_(1,3,2) * pivot + lhs_(1,4,2) = lhs_(1,4,2) * pivot + lhs_(1,5,2) = lhs_(1,5,2) * pivot + lhs_(1,1,3) = lhs_(1,1,3) * pivot + lhs_(1,2,3) = lhs_(1,2,3) * pivot + lhs_(1,3,3) = lhs_(1,3,3) * pivot + lhs_(1,4,3) = lhs_(1,4,3) * pivot + lhs_(1,5,3) = lhs_(1,5,3) * pivot + rhs_(1) = rhs_(1) * pivot + coeff = lhs_(2,1,2) + lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2) + lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2) + lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2) + lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3) + lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3) + lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3) + lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3) + lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3) + rhs_(2) = rhs_(2) - coeff * rhs_(1) + coeff = lhs_(3,1,2) + lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2) + lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2) + lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2) + lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3) + lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3) + lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3) + lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3) + lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3) + rhs_(3) = rhs_(3) - coeff * rhs_(1) + coeff = lhs_(4,1,2) + lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2) + lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2) + lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3) + lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3) + lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3) + lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3) + lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3) + rhs_(4) = rhs_(4) - coeff * rhs_(1) + coeff = lhs_(5,1,2) + lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2) + lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2) + lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3) + lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3) + lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3) + lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3) + lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3) + rhs_(5) = rhs_(5) - coeff * rhs_(1) + pivot = 1.00d0 / lhs_(2,2,2) + lhs_(2,3,2) = lhs_(2,3,2) * pivot + lhs_(2,4,2) = lhs_(2,4,2) * pivot + lhs_(2,5,2) = lhs_(2,5,2) * pivot + lhs_(2,1,3) = lhs_(2,1,3) * pivot + lhs_(2,2,3) = lhs_(2,2,3) * pivot + lhs_(2,3,3) = lhs_(2,3,3) * pivot + lhs_(2,4,3) = lhs_(2,4,3) * pivot + lhs_(2,5,3) = lhs_(2,5,3) * pivot + rhs_(2) = rhs_(2) * pivot + coeff = lhs_(1,2,2) + lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2) + lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2) + lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3) + lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3) + lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3) + lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3) + lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3) + rhs_(1) = rhs_(1) - coeff * rhs_(2) + coeff = lhs_(3,2,2) + lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2) + lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2) + lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3) + lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3) + lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3) + lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3) + lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3) + rhs_(3) = rhs_(3) - coeff * rhs_(2) + coeff = lhs_(4,2,2) + lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2) + lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3) + lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3) + lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3) + lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3) + lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3) + rhs_(4) = rhs_(4) - coeff * rhs_(2) + coeff = lhs_(5,2,2) + lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2) + lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3) + lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3) + lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3) + lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3) + lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3) + rhs_(5) = rhs_(5) - coeff * rhs_(2) + pivot = 1.00d0 / lhs_(3,3,2) + lhs_(3,4,2) = lhs_(3,4,2) * pivot + lhs_(3,5,2) = lhs_(3,5,2) * pivot + lhs_(3,1,3) = lhs_(3,1,3) * pivot + lhs_(3,2,3) = lhs_(3,2,3) * pivot + lhs_(3,3,3) = lhs_(3,3,3) * pivot + lhs_(3,4,3) = lhs_(3,4,3) * pivot + lhs_(3,5,3) = lhs_(3,5,3) * pivot + rhs_(3) = rhs_(3) * pivot + coeff = lhs_(1,3,2) + lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2) + lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3) + lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3) + lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3) + lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3) + lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3) + rhs_(1) = rhs_(1) - coeff * rhs_(3) + coeff = lhs_(2,3,2) + lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2) + lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3) + lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3) + lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3) + lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3) + lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3) + rhs_(2) = rhs_(2) - coeff * rhs_(3) + coeff = lhs_(4,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2) + lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3) + lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3) + lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3) + lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3) + lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3) + rhs_(4) = rhs_(4) - coeff * rhs_(3) + coeff = lhs_(5,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2) + lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3) + lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3) + lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3) + lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3) + lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3) + rhs_(5) = rhs_(5) - coeff * rhs_(3) + pivot = 1.00d0 / lhs_(4,4,2) + lhs_(4,5,2) = lhs_(4,5,2) * pivot + lhs_(4,1,3) = lhs_(4,1,3) * pivot + lhs_(4,2,3) = lhs_(4,2,3) * pivot + lhs_(4,3,3) = lhs_(4,3,3) * pivot + lhs_(4,4,3) = lhs_(4,4,3) * pivot + lhs_(4,5,3) = lhs_(4,5,3) * pivot + rhs_(4) = rhs_(4) * pivot + coeff = lhs_(1,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2) + lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3) + lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3) + lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3) + lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3) + lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3) + rhs_(1) = rhs_(1) - coeff * rhs_(4) + coeff = lhs_(2,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2) + lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3) + lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3) + lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3) + lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3) + lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3) + rhs_(2) = rhs_(2) - coeff * rhs_(4) + coeff = lhs_(3,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2) + lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3) + lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3) + lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3) + lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3) + lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3) + rhs_(3) = rhs_(3) - coeff * rhs_(4) + coeff = lhs_(5,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2) + lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3) + lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3) + lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3) + lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3) + lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3) + rhs_(5) = rhs_(5) - coeff * rhs_(4) + pivot = 1.00d0 / lhs_(5,5,2) + lhs_(5,1,3) = lhs_(5,1,3) * pivot + lhs_(5,2,3) = lhs_(5,2,3) * pivot + lhs_(5,3,3) = lhs_(5,3,3) * pivot + lhs_(5,4,3) = lhs_(5,4,3) * pivot + lhs_(5,5,3) = lhs_(5,5,3) * pivot + rhs_(5) = rhs_(5) * pivot + coeff = lhs_(1,5,2) + lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3) + lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3) + lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3) + lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3) + lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3) + rhs_(1) = rhs_(1) - coeff * rhs_(5) + coeff = lhs_(2,5,2) + lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3) + lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3) + lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3) + lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3) + lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3) + rhs_(2) = rhs_(2) - coeff * rhs_(5) + coeff = lhs_(3,5,2) + lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3) + lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3) + lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3) + lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3) + lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3) + rhs_(3) = rhs_(3) - coeff * rhs_(5) + coeff = lhs_(4,5,2) + lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3) + lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3) + lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3) + lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3) + lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3) + rhs_(4) = rhs_(4) - coeff * rhs_(5) + do i__0 = 1,5 + lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3) + lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3) + lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3) + lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3) + lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3) + enddo + do m = 1,5 + rhs(m,i,j,k) = rhs_(m) + u_(0,m) = u_(1,m) + u_(1,m) = u_(2,m) + enddo + enddo + do n = 1,5 + lhs_(1,n,1) = 0.0d0 + lhs_(1,n,2) = 0.0d0 + lhs_(1,n,3) = 0.0d0 + lhs_(2,n,1) = 0.0d0 + lhs_(2,n,2) = 0.0d0 + lhs_(2,n,3) = 0.0d0 + lhs_(3,n,1) = 0.0d0 + lhs_(3,n,2) = 0.0d0 + lhs_(3,n,3) = 0.0d0 + lhs_(4,n,1) = 0.0d0 + lhs_(4,n,2) = 0.0d0 + lhs_(4,n,3) = 0.0d0 + lhs_(5,n,1) = 0.0d0 + lhs_(5,n,2) = 0.0d0 + lhs_(5,n,3) = 0.0d0 + enddo + do m = 1,5 + lhs_(m,m,2) = 1.0d0 + enddo + do m = 1,5 + rhs_(m) = rhs(m,i,j,k) + enddo + do i__0 = 1,5 + rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,ksiz + &e - 1) - lhs_(i__0,2,1) * rhs(2,i,j,ksize - 1) - lhs_(i__0,3,1) * + &rhs(3,i,j,ksize - 1) - lhs_(i__0,4,1) * rhs(4,i,j,ksize - 1) - lhs + &_(i__0,5,1) * rhs(5,i,j,ksize - 1) + enddo + do j__1 = 1,5 + lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_ + &_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3 + &) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3) + lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_ + &_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3 + &) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3) + lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_ + &_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3 + &) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3) + lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_ + &_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3 + &) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3) + lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_ + &_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3 + &) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3) + enddo + +!--------------------------------------------------------------------- +! +!--------------------------------------------------------------------- + pivot__3 = 1.00d0 / lhs_(1,1,2) + lhs_(1,2,2) = lhs_(1,2,2) * pivot__3 + lhs_(1,3,2) = lhs_(1,3,2) * pivot__3 + lhs_(1,4,2) = lhs_(1,4,2) * pivot__3 + lhs_(1,5,2) = lhs_(1,5,2) * pivot__3 + rhs_(1) = rhs_(1) * pivot__3 + coeff__2 = lhs_(2,1,2) + lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2) + lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2) + lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2) + rhs_(2) = rhs_(2) - coeff__2 * rhs_(1) + coeff__2 = lhs_(3,1,2) + lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2) + lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2) + lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2) + rhs_(3) = rhs_(3) - coeff__2 * rhs_(1) + coeff__2 = lhs_(4,1,2) + lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2) + lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2) + rhs_(4) = rhs_(4) - coeff__2 * rhs_(1) + coeff__2 = lhs_(5,1,2) + lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2) + lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2) + rhs_(5) = rhs_(5) - coeff__2 * rhs_(1) + pivot__3 = 1.00d0 / lhs_(2,2,2) + lhs_(2,3,2) = lhs_(2,3,2) * pivot__3 + lhs_(2,4,2) = lhs_(2,4,2) * pivot__3 + lhs_(2,5,2) = lhs_(2,5,2) * pivot__3 + rhs_(2) = rhs_(2) * pivot__3 + coeff__2 = lhs_(1,2,2) + lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2) + lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2) + rhs_(1) = rhs_(1) - coeff__2 * rhs_(2) + coeff__2 = lhs_(3,2,2) + lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2) + lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2) + rhs_(3) = rhs_(3) - coeff__2 * rhs_(2) + coeff__2 = lhs_(4,2,2) + lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2) + rhs_(4) = rhs_(4) - coeff__2 * rhs_(2) + coeff__2 = lhs_(5,2,2) + lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2) + rhs_(5) = rhs_(5) - coeff__2 * rhs_(2) + pivot__3 = 1.00d0 / lhs_(3,3,2) + lhs_(3,4,2) = lhs_(3,4,2) * pivot__3 + lhs_(3,5,2) = lhs_(3,5,2) * pivot__3 + rhs_(3) = rhs_(3) * pivot__3 + coeff__2 = lhs_(1,3,2) + lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2) + rhs_(1) = rhs_(1) - coeff__2 * rhs_(3) + coeff__2 = lhs_(2,3,2) + lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2) + rhs_(2) = rhs_(2) - coeff__2 * rhs_(3) + coeff__2 = lhs_(4,3,2) + lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2) + lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2) + rhs_(4) = rhs_(4) - coeff__2 * rhs_(3) + coeff__2 = lhs_(5,3,2) + lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2) + rhs_(5) = rhs_(5) - coeff__2 * rhs_(3) + pivot__3 = 1.00d0 / lhs_(4,4,2) + lhs_(4,5,2) = lhs_(4,5,2) * pivot__3 + rhs_(4) = rhs_(4) * pivot__3 + coeff__2 = lhs_(1,4,2) + lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2) + rhs_(1) = rhs_(1) - coeff__2 * rhs_(4) + coeff__2 = lhs_(2,4,2) + lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2) + rhs_(2) = rhs_(2) - coeff__2 * rhs_(4) + coeff__2 = lhs_(3,4,2) + lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2) + rhs_(3) = rhs_(3) - coeff__2 * rhs_(4) + coeff__2 = lhs_(5,4,2) + lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2) + rhs_(5) = rhs_(5) - coeff__2 * rhs_(4) + pivot__3 = 1.00d0 / lhs_(5,5,2) + rhs_(5) = rhs_(5) * pivot__3 + coeff__2 = lhs_(1,5,2) + rhs_(1) = rhs_(1) - coeff__2 * rhs_(5) + coeff__2 = lhs_(2,5,2) + rhs_(2) = rhs_(2) - coeff__2 * rhs_(5) + coeff__2 = lhs_(3,5,2) + rhs_(3) = rhs_(3) - coeff__2 * rhs_(5) + coeff__2 = lhs_(4,5,2) + rhs_(4) = rhs_(4) - coeff__2 * rhs_(5) + do m = 1,5 + rhs(m,i,j,k) = rhs_(m) + enddo + k = ksize - 1 + do m = 1,5 + rhs_(m) = rhs(m,i,j,k) + rhsp_(m) = rhs(m,i,j,k + 1) + enddo + do k = ksize - 1,1,(-(1)) + do m = 1,5 + rhs_(m) = rhs(m,i,j,k) + enddo + do m = 1,5 + rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhsp_(1) + rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhsp_(2) + rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhsp_(3) + rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhsp_(4) + rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhsp_(5) + enddo + do m = 1,5 + rhsp_(m) = rhs_(m) + u(m,i,j,k) = u(m,i,j,k) + rhs_(m) + enddo + enddo + enddo + enddo +!$SPF END PARALLEL_REG +! DVM$ END REGION + return + end + diff --git a/z_solve_sp.for b/z_solve_sp.for new file mode 100644 index 0000000..ea90b0a --- /dev/null +++ b/z_solve_sp.for @@ -0,0 +1,363 @@ + +! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40 +! *** Enabled options ***: +! *** maximum shadow width is 50 percent +! *** generated by SAPFOR + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- + subroutine z_solve_sp () + + include 'header_sp.h' + integer :: i,j,k,k1,k2,m,m1 + double precision :: ru1,fac1,fac2,rhs__(5,0:2) + double precision :: lhs__(5,0:2),lhsm__(5,0:2),lhsp__(5,0:2) + double precision :: t1,t2,t3,ac,xvel,yvel,zvel,btuz,ac2u,uzik1 + +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +!--------------------------------------------------------------------- +! Prepare for z-solve, array redistribution +!--------------------------------------------------------------------- + +!$SPF PARALLEL_REG r0 +!$SPF ANALYSIS(PRIVATE(LHS__,LHSP__,LHSM__,RHS__)) +! DVM$ PARALLEL (J,I) ON U(*,I,J,*), CUDA_BLOCK (32,4),PRIVATE (M,K1,K2,R +! DVM$&U1,FAC1,FAC2,K,LHS__,LHSP__,LHSM__,RHS__,T1,T2,T3,AC,XVEL,YVEL,ZVE +! DVM$&L,BTUZ,AC2U,UZIK1) +! DVM$ REGION LOCAL (LHS) + do j = 1,ny2 + do i = 1,nx2 + lhs__(1,0) = 0.0d0 + lhsp__(1,0) = 0.0d0 + lhsm__(1,0) = 0.0d0 + lhs__(2,0) = 0.0d0 + lhsp__(2,0) = 0.0d0 + lhsm__(2,0) = 0.0d0 + lhs__(3,0) = 1.0d0 + lhsp__(3,0) = 1.0d0 + lhsm__(3,0) = 1.0d0 + lhs__(4,0) = 0.0d0 + lhsp__(4,0) = 0.0d0 + lhsm__(4,0) = 0.0d0 + lhs__(5,0) = 0.0d0 + lhsp__(5,0) = 0.0d0 + lhsm__(5,0) = 0.0d0 + lhs__(1,1) = 0.0d0 + ru1 = c3c4 * 1.0d0 / u(1,i,j,0) + ru1 = dmax1 (dz4 + con43 * ru1,dz5 + c1c5 * ru1,dzmax + ru1, + &dz1) + lhs__(2,1) = (-(dttz2)) * ws(i,j,0) - dttz1 * ru1 + ru1 = c3c4 * 1.0d0 / u(1,i,j,1) + ru1 = dmax1 (dz4 + con43 * ru1,dz5 + c1c5 * ru1,dzmax + ru1, + &dz1) + lhs__(3,1) = 1.0d0 + c2dttz1 * ru1 + ru1 = c3c4 * 1.0d0 / u(1,i,j,2) + ru1 = dmax1 (dz4 + con43 * ru1,dz5 + c1c5 * ru1,dzmax + ru1, + &dz1) + lhs__(4,1) = dttz2 * ws(i,j,2) - dttz1 * ru1 + lhs__(5,1) = 0.0d0 + lhs__(3,1) = lhs__(3,1) + comz5 + lhs__(4,1) = lhs__(4,1) - comz4 + lhs__(5,1) = lhs__(5,1) + comz1 + lhsp__(1,1) = lhs__(1,1) + lhsp__(2,1) = lhs__(2,1) - dttz2 * speed(i,j,1 - 1) + lhsp__(3,1) = lhs__(3,1) + lhsp__(4,1) = lhs__(4,1) + dttz2 * speed(i,j,1 + 1) + lhsp__(5,1) = lhs__(5,1) + lhsm__(1,1) = lhs__(1,1) + lhsm__(2,1) = lhs__(2,1) + dttz2 * speed(i,j,1 - 1) + lhsm__(3,1) = lhs__(3,1) + lhsm__(4,1) = lhs__(4,1) - dttz2 * speed(i,j,1 + 1) + lhsm__(5,1) = lhs__(5,1) + do k = 0,nz2 + 1 + if (k + 2 .lt. nz2 + 1) then + m = k + 2 + lhs__(1,2) = 0.0d0 + ru1 = c3c4 * 1.0d0 / u(1,i,j,m - 1) + ru1 = dmax1 (dz4 + con43 * ru1,dz5 + c1c5 * ru1,dzmax + &+ ru1,dz1) + lhs__(2,2) = (-(dttz2)) * ws(i,j,m - 1) - dttz1 * ru1 + ru1 = c3c4 * 1.0d0 / u(1,i,j,m) + ru1 = dmax1 (dz4 + con43 * ru1,dz5 + c1c5 * ru1,dzmax + &+ ru1,dz1) + lhs__(3,2) = 1.0d0 + c2dttz1 * ru1 + ru1 = c3c4 * 1.0d0 / u(1,i,j,m + 1) + ru1 = dmax1 (dz4 + con43 * ru1,dz5 + c1c5 * ru1,dzmax + &+ ru1,dz1) + lhs__(4,2) = dttz2 * ws(i,j,m + 1) - dttz1 * ru1 + lhs__(5,2) = 0.0d0 + if (m .eq. 1) then + lhs__(3,2) = lhs__(3,2) + comz5 + lhs__(4,2) = lhs__(4,2) - comz4 + lhs__(5,2) = lhs__(5,2) + comz1 + else if (m .eq. 2) then + lhs__(2,2) = lhs__(2,2) - comz4 + lhs__(3,2) = lhs__(3,2) + comz6 + lhs__(4,2) = lhs__(4,2) - comz4 + lhs__(5,2) = lhs__(5,2) + comz1 + else if (m .ge. 3 .and. m .le. nz2 - 2) then + lhs__(1,2) = lhs__(1,2) + comz1 + lhs__(2,2) = lhs__(2,2) - comz4 + lhs__(3,2) = lhs__(3,2) + comz6 + lhs__(4,2) = lhs__(4,2) - comz4 + lhs__(5,2) = lhs__(5,2) + comz1 + else if (m .eq. nz2 - 1) then + lhs__(1,2) = lhs__(1,2) + comz1 + lhs__(2,2) = lhs__(2,2) - comz4 + lhs__(3,2) = lhs__(3,2) + comz6 + lhs__(4,2) = lhs__(4,2) - comz4 + else if (m .eq. nz2) then + lhs__(1,2) = lhs__(1,2) + comz1 + lhs__(2,2) = lhs__(2,2) - comz4 + lhs__(3,2) = lhs__(3,2) + comz5 + endif + lhsp__(1,2) = lhs__(1,2) + lhsp__(2,2) = lhs__(2,2) - dttz2 * speed(i,j,m - 1) + lhsp__(3,2) = lhs__(3,2) + lhsp__(4,2) = lhs__(4,2) + dttz2 * speed(i,j,m + 1) + lhsp__(5,2) = lhs__(5,2) + lhsm__(1,2) = lhs__(1,2) + lhsm__(2,2) = lhs__(2,2) + dttz2 * speed(i,j,m - 1) + lhsm__(3,2) = lhs__(3,2) + lhsm__(4,2) = lhs__(4,2) - dttz2 * speed(i,j,m + 1) + lhsm__(5,2) = lhs__(5,2) + else if (k + 2 .eq. nz2 + 1) then + lhs__(1,2) = 0.0d0 + lhsp__(1,2) = 0.0d0 + lhsm__(1,2) = 0.0d0 + lhs__(2,2) = 0.0d0 + lhsp__(2,2) = 0.0d0 + lhsm__(2,2) = 0.0d0 + lhs__(3,2) = 1.0d0 + lhsp__(3,2) = 1.0d0 + lhsm__(3,2) = 1.0d0 + lhs__(4,2) = 0.0d0 + lhsp__(4,2) = 0.0d0 + lhsm__(4,2) = 0.0d0 + lhs__(5,2) = 0.0d0 + lhsp__(5,2) = 0.0d0 + lhsm__(5,2) = 0.0d0 + endif + +!********************************** end of init + k1 = k + 1 + k2 = k + 2 + fac1 = 1.d0 / lhs__(3,0) + lhs__(4,0) = fac1 * lhs__(4,0) + lhs__(5,0) = fac1 * lhs__(5,0) + do m = 1,3 + rhs(m,i,j,k) = fac1 * rhs(m,i,j,k) + enddo + if (k .le. nz2 - 1) then + lhs__(3,1) = lhs__(3,1) - lhs__(2,1) * lhs__(4,0) + lhs__(4,1) = lhs__(4,1) - lhs__(2,1) * lhs__(5,0) + lhs__(2,2) = lhs__(2,2) - lhs__(1,2) * lhs__(4,0) + lhs__(3,2) = lhs__(3,2) - lhs__(1,2) * lhs__(5,0) + do m = 1,3 + rhs(m,i,j,k1) = rhs(m,i,j,k1) - lhs__(2,1) * rhs(m, + &i,j,k) + rhs(m,i,j,k2) = rhs(m,i,j,k2) - lhs__(1,2) * rhs(m, + &i,j,k) + enddo + else + lhs__(3,1) = lhs__(3,1) - lhs__(2,1) * lhs__(4,0) + lhs__(4,1) = lhs__(4,1) - lhs__(2,1) * lhs__(5,0) + fac2 = 1.d0 / lhs__(3,1) + do m = 1,3 + rhs(m,i,j,k1) = rhs(m,i,j,k1) - lhs__(2,1) * rhs(m, + &i,j,k) + rhs(m,i,j,k1) = fac2 * rhs(m,i,j,k1) + enddo + endif + m = 4 + fac1 = 1.d0 / lhsp__(3,0) + lhsp__(4,0) = fac1 * lhsp__(4,0) + lhsp__(5,0) = fac1 * lhsp__(5,0) + rhs(m,i,j,k) = fac1 * rhs(m,i,j,k) + lhsp__(3,1) = lhsp__(3,1) - lhsp__(2,1) * lhsp__(4,0) + lhsp__(4,1) = lhsp__(4,1) - lhsp__(2,1) * lhsp__(5,0) + rhs(m,i,j,k1) = rhs(m,i,j,k1) - lhsp__(2,1) * rhs(m,i,j,k + &) + if (k .lt. nz2) then + lhsp__(2,2) = lhsp__(2,2) - lhsp__(1,2) * lhsp__(4,0) + lhsp__(3,2) = lhsp__(3,2) - lhsp__(1,2) * lhsp__(5,0) + rhs(m,i,j,k2) = rhs(m,i,j,k2) - lhsp__(1,2) * rhs(m,i, + &j,k) + endif + m = 5 + fac1 = 1.d0 / lhsm__(3,0) + lhsm__(4,0) = fac1 * lhsm__(4,0) + lhsm__(5,0) = fac1 * lhsm__(5,0) + rhs(m,i,j,k) = fac1 * rhs(m,i,j,k) + lhsm__(3,1) = lhsm__(3,1) - lhsm__(2,1) * lhsm__(4,0) + lhsm__(4,1) = lhsm__(4,1) - lhsm__(2,1) * lhsm__(5,0) + rhs(m,i,j,k1) = rhs(m,i,j,k1) - lhsm__(2,1) * rhs(m,i,j,k + &) + if (k .lt. nz2) then + lhsm__(2,2) = lhsm__(2,2) - lhsm__(1,2) * lhsm__(4,0) + lhsm__(3,2) = lhsm__(3,2) - lhsm__(1,2) * lhsm__(5,0) + rhs(m,i,j,k2) = rhs(m,i,j,k2) - lhsm__(1,2) * rhs(m,i, + &j,k) + endif + if (k .eq. nz2) then + rhs(4,i,j,k1) = rhs(4,i,j,k1) / lhsp__(3,1) + rhs(5,i,j,k1) = rhs(5,i,j,k1) / lhsm__(3,1) + do m = 1,3 + rhs(m,i,j,k) = rhs(m,i,j,k) - lhs__(4,0) * rhs(m,i, + &j,k1) + enddo + rhs(4,i,j,k) = rhs(4,i,j,k) - lhsp__(4,0) * rhs(4,i,j, + &k1) + rhs(5,i,j,k) = rhs(5,i,j,k) - lhsm__(4,0) * rhs(5,i,j, + &k1) + endif + lhs(0,4,i,j,k) = lhs__(4,0) + lhs(1,4,i,j,k) = lhsp__(4,0) + lhs(2,4,i,j,k) = lhsm__(4,0) + lhs(0,5,i,j,k) = lhs__(5,0) + lhs(1,5,i,j,k) = lhsp__(5,0) + lhs(2,5,i,j,k) = lhsm__(5,0) + lhs__(1,0) = lhs__(1,1) + lhsp__(1,0) = lhsp__(1,1) + lhsm__(1,0) = lhsm__(1,1) + lhs__(1,1) = lhs__(1,2) + lhsp__(1,1) = lhsp__(1,2) + lhsm__(1,1) = lhsm__(1,2) + lhs__(2,0) = lhs__(2,1) + lhsp__(2,0) = lhsp__(2,1) + lhsm__(2,0) = lhsm__(2,1) + lhs__(2,1) = lhs__(2,2) + lhsp__(2,1) = lhsp__(2,2) + lhsm__(2,1) = lhsm__(2,2) + lhs__(3,0) = lhs__(3,1) + lhsp__(3,0) = lhsp__(3,1) + lhsm__(3,0) = lhsm__(3,1) + lhs__(3,1) = lhs__(3,2) + lhsp__(3,1) = lhsp__(3,2) + lhsm__(3,1) = lhsm__(3,2) + lhs__(4,0) = lhs__(4,1) + lhsp__(4,0) = lhsp__(4,1) + lhsm__(4,0) = lhsm__(4,1) + lhs__(4,1) = lhs__(4,2) + lhsp__(4,1) = lhsp__(4,2) + lhsm__(4,1) = lhsm__(4,2) + lhs__(5,0) = lhs__(5,1) + lhsp__(5,0) = lhsp__(5,1) + lhsm__(5,0) = lhsm__(5,1) + lhs__(5,1) = lhs__(5,2) + lhsp__(5,1) = lhsp__(5,2) + lhsm__(5,1) = lhsm__(5,2) + enddo + k = problem_size - 3 + rhs__(1,2) = rhs(1,i,j,k + 2) + rhs__(2,2) = rhs(2,i,j,k + 2) + rhs__(3,2) = rhs(3,i,j,k + 2) + rhs__(4,2) = rhs(4,i,j,k + 2) + rhs__(5,2) = rhs(5,i,j,k + 2) + rhs__(1,1) = rhs(1,i,j,k + 1) + rhs__(2,1) = rhs(2,i,j,k + 1) + rhs__(3,1) = rhs(3,i,j,k + 1) + rhs__(4,1) = rhs(4,i,j,k + 1) + rhs__(5,1) = rhs(5,i,j,k + 1) + rhs__(1,0) = rhs(1,i,j,k) + rhs__(2,0) = rhs(2,i,j,k) + rhs__(3,0) = rhs(3,i,j,k) + rhs__(4,0) = rhs(4,i,j,k) + rhs__(5,0) = rhs(5,i,j,k) + rhs__(1,0) = rhs__(1,0) - lhs(0,4,i,j,k) * rhs__(1,1) - lhs( + &0,5,i,j,k) * rhs__(1,2) + rhs__(2,0) = rhs__(2,0) - lhs(0,4,i,j,k) * rhs__(2,1) - lhs( + &0,5,i,j,k) * rhs__(2,2) + rhs__(3,0) = rhs__(3,0) - lhs(0,4,i,j,k) * rhs__(3,1) - lhs( + &0,5,i,j,k) * rhs__(3,2) + rhs__(4,0) = rhs__(4,0) - lhs(1,4,i,j,k) * rhs__(4,1) - lhs( + &1,5,i,j,k) * rhs__(4,2) + rhs__(5,0) = rhs__(5,0) - lhs(2,4,i,j,k) * rhs__(5,1) - lhs( + &2,5,i,j,k) * rhs__(5,2) + rhs__(1,2) = rhs__(1,1) + rhs__(2,2) = rhs__(2,1) + rhs__(3,2) = rhs__(3,1) + rhs__(4,2) = rhs__(4,1) + rhs__(5,2) = rhs__(5,1) + rhs__(1,1) = rhs__(1,0) + rhs__(2,1) = rhs__(2,0) + rhs__(3,1) = rhs__(3,0) + rhs__(4,1) = rhs__(4,0) + rhs__(5,1) = rhs__(5,0) + do k = problem_size - 4,0,(-(1)) + rhs__(1,0) = rhs(1,i,j,k) + rhs__(2,0) = rhs(2,i,j,k) + rhs__(3,0) = rhs(3,i,j,k) + rhs__(4,0) = rhs(4,i,j,k) + rhs__(5,0) = rhs(5,i,j,k) + rhs__(1,0) = rhs__(1,0) - lhs(0,4,i,j,k) * rhs__(1,1) - l + &hs(0,5,i,j,k) * rhs__(1,2) + rhs__(2,0) = rhs__(2,0) - lhs(0,4,i,j,k) * rhs__(2,1) - l + &hs(0,5,i,j,k) * rhs__(2,2) + rhs__(3,0) = rhs__(3,0) - lhs(0,4,i,j,k) * rhs__(3,1) - l + &hs(0,5,i,j,k) * rhs__(3,2) + rhs__(4,0) = rhs__(4,0) - lhs(1,4,i,j,k) * rhs__(4,1) - l + &hs(1,5,i,j,k) * rhs__(4,2) + rhs__(5,0) = rhs__(5,0) - lhs(2,4,i,j,k) * rhs__(5,1) - l + &hs(2,5,i,j,k) * rhs__(5,2) + xvel = us(i,j,k + 2) + yvel = vs(i,j,k + 2) + zvel = ws(i,j,k + 2) + ac = speed(i,j,k + 2) + ac2u = ac * ac + uzik1 = u(1,i,j,k + 2) + btuz = bt * uzik1 + t1 = btuz / ac * (rhs__(4,2) + rhs__(5,2)) + t2 = rhs__(3,2) + t1 + t3 = btuz * (rhs__(4,2) - rhs__(5,2)) + rhs__(3,2) = uzik1 * rhs__(1,2) + yvel * t2 + rhs__(4,2) = zvel * t2 + t3 + rhs__(5,2) = uzik1 * ((-(xvel)) * rhs__(2,2) + yvel * rhs + &__(1,2)) + qs(i,j,k + 2) * t2 + c2iv * ac2u * t1 + zvel * t3 + rhs__(1,2) = t2 + rhs__(2,2) = (-(uzik1)) * rhs__(2,2) + xvel * t2 + u(1,i,j,k + 2) = u(1,i,j,k + 2) + rhs__(1,2) + u(2,i,j,k + 2) = u(2,i,j,k + 2) + rhs__(2,2) + u(3,i,j,k + 2) = u(3,i,j,k + 2) + rhs__(3,2) + u(4,i,j,k + 2) = u(4,i,j,k + 2) + rhs__(4,2) + u(5,i,j,k + 2) = u(5,i,j,k + 2) + rhs__(5,2) + rhs__(1,2) = rhs__(1,1) + rhs__(2,2) = rhs__(2,1) + rhs__(3,2) = rhs__(3,1) + rhs__(4,2) = rhs__(4,1) + rhs__(5,2) = rhs__(5,1) + rhs__(1,1) = rhs__(1,0) + rhs__(2,1) = rhs__(2,0) + rhs__(3,1) = rhs__(3,0) + rhs__(4,1) = rhs__(4,0) + rhs__(5,1) = rhs__(5,0) + enddo + xvel = us(i,j,k + 2) + yvel = vs(i,j,k + 2) + zvel = ws(i,j,k + 2) + ac = speed(i,j,k + 2) + ac2u = ac * ac + uzik1 = u(1,i,j,k + 2) + btuz = bt * uzik1 + t1 = btuz / ac * (rhs__(4,2) + rhs__(5,2)) + t2 = rhs__(3,2) + t1 + t3 = btuz * (rhs__(4,2) - rhs__(5,2)) + rhs__(3,2) = uzik1 * rhs__(1,2) + yvel * t2 + rhs__(4,2) = zvel * t2 + t3 + rhs__(5,2) = uzik1 * ((-(xvel)) * rhs__(2,2) + yvel * rhs__( + &1,2)) + qs(i,j,k + 2) * t2 + c2iv * ac2u * t1 + zvel * t3 + rhs__(1,2) = t2 + rhs__(2,2) = (-(uzik1)) * rhs__(2,2) + xvel * t2 + u(1,i,j,k + 2) = u(1,i,j,k + 2) + rhs__(1,2) + u(2,i,j,k + 2) = u(2,i,j,k + 2) + rhs__(2,2) + u(3,i,j,k + 2) = u(3,i,j,k + 2) + rhs__(3,2) + u(4,i,j,k + 2) = u(4,i,j,k + 2) + rhs__(4,2) + u(5,i,j,k + 2) = u(5,i,j,k + 2) + rhs__(5,2) + enddo + enddo +!$SPF END PARALLEL_REG +! DVM$ END REGION + return + end +