The heat equation example.
This example depends on simple-solver, three-pt-stencil-solver.
 
 Introduction 
This example solves a 2D heat conduction equation
with Dirichlet boundary conditions and given initial condition and constant-in-time source function f.
The partial differential equation (PDE) is solved with a finite difference spatial discretization on an equidistant grid: For n grid points, and grid distance 
We then build an implicit Euler integrator by discretizing with time step 
and solve the resulting linear system for 
The intention of this example is to provide a mini-app showing matrix assembly, vector initialization, solver setup and the use of Ginkgo in a more complex setting.
About the example  
The commented program 
/ *****************************<DESCRIPTION>***********************************
This example solves a 2D heat conduction equation
 
    u : [0, d]^2 \rightarrow R\\
    \partial_t u = \delta u + f
 
with Dirichlet boundary conditions and given initial condition and
constant-in-time source function f.
 
The partial differential equation (PDE) is solved with a finite difference
spatial discretization on an equidistant grid: For `n` grid points,
and grid distance h = 1/n we write
 
    u_{i,j}' = \alpha (u_{i-1,j} + u_{i+1,j} + u_{i,j-1} + u_{i,j+1} 
                   - 4 u_{i,j}) / h^2 
               + f_{i,j} 
We then build an implicit Euler integrator by discretizing with time step \tau 
    (u_{i,j}^{k+1} - u_{i,j}^k) / \tau = 
    \alpha (u_{i-1,j}^{k+1} - u_{i+1,j}^{k+1} 
          + u_{i,j-1}^{k+1} - u_{i,j+1}^{k+1} - 4 u_{i,j}^{k+1}) / h^2 
    + f_{i,j} 
and solve the resulting linear system for  u_{\cdot}^{k+1} using Ginkgo' s CG
solver preconditioned with an incomplete Cholesky factorization for each  time
step, occasionally writing the resulting grid values into a video file using 
 
The intention of this  example is to provide a mini-app showing matrix assembly,
vector initialization, solver setup and the use of Ginkgo in a more complex
setting.
*****************************<DESCRIPTION>********************************** /
 
#include <ginkgo/ginkgo.hpp> 
 
 
#include <chrono> 
#include <fstream> 
#include <iostream> 
 
 
#include <opencv2/core.hpp> 
#include <opencv2/videoio.hpp> 
This function implements a simple Ginkgo-themed clamped color mapping for values in the range [0,5].
void  set_val(unsigned  char * data, double  value)
{
RGB values for the 6 colors used for values 0, 1, ..., 5 We will interpolate linearly between these values.
double  col_r[] = {255, 221, 129, 201, 249, 255};
double  col_g[] = {255, 220, 130, 161, 158, 204};
double  col_b[] = {255, 220, 133, 93, 24, 8};
value = std::max(0.0, value);
auto  i = std::max(0, std::min(4, int (value)));
auto  d = std::max(0.0, std::min(1.0, value - i));
OpenCV uses BGR instead of RGB by default, revert indices
    data[2] = static_cast< unsigned  char > (col_r[i + 1] * d + col_r[i] * (1 - d));
    data[1] = static_cast< unsigned  char > (col_g[i + 1] * d + col_g[i] * (1 - d));
    data[0] = static_cast< unsigned  char > (col_b[i + 1] * d + col_b[i] * (1 - d));
}
Initialize video output with given dimension and FPS (frames per seconds)
std::pair<cv::VideoWriter, cv::Mat> build_output(int  n, double  fps)
{
    cv::Size videosize{n, n};
    auto  output =
        std::make_pair(cv::VideoWriter{}, cv::Mat{videosize, CV_8UC3});
    auto  fourcc = cv::VideoWriter::fourcc('a' , 'v' , 'c' , '1' );
    output.first.open("heat.mp4" , fourcc, fps, videosize);
    return  output;
}
Write the current frame to video output using the above color mapping
void  output_timestep(std::pair<cv::VideoWriter, cv::Mat>& output, int  n,
                     const  double * data)
{
    for  (int  i = 0; i < n; i++) {
        auto  row = output.second.ptr(i);
        for  (int  j = 0; j < n; j++) {
            set_val(&row[3 * j], data[i * n + j]);
        }
    }
    output.first.write(output.second);
}
 
 
int  main(int  argc, char * argv[])
{
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition  csr.hpp:146
Dense is a matrix format which explicitly stores all values of the matrix.
Definition  dense.hpp:136
Problem parameters: simulation length
diffusion factor
scaling factor for heat source
Simulation parameters: inner grid points per discretization direction
number of simulation steps per second
auto  steps_per_sec = 500;
number of video frames per second
number of grid points
grid point distance (ignoring boundary points)
auto  h = 1.0 / (n + 1);
auto  h2 = h * h;
time step size for the simulation
auto  tau = 1.0 / steps_per_sec;
create a CUDA executor with an associated OpenMP host executor
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
Creates a new CudaExecutor.
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Creates a new OmpExecutor.
Definition  executor.hpp:1373
 load heat source and initial state vectors
std::ifstream initial_stream("data/gko_logo_2d.mtx" );
std::ifstream source_stream("data/gko_text_2d.mtx" );
constexpr T one()
Returns the multiplicative identity for T.
Definition  math.hpp:803
create output vector with initial guess for
auto  out_vector = in_vector->clone();
create scalar for source update
create stencil matrix as shared_ptr for solver
auto  stencil_matrix = 
gko::share (mtx::create(exec));
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition  utils_helper.hpp:254
assemble matrix
for  (int  i = 0; i < n; i++) {
    for  (int  j = 0; j < n; j++) {
        auto  c = i * n + j;
        auto  c_val = diffusion * tau * 4.0 / h2 + 1.0;
        auto  off_val = -diffusion * tau / h2;
A type representing the dimensions of a multidimensional object.
Definition  dim.hpp:55
This structure is used as an intermediate data type to store a sparse matrix.
Definition  matrix_data.hpp:155
 for each grid point: insert 5 stencil points with Dirichlet boundary conditions, i.e. with zero boundary value
        if  (i > 0) {
            mtx_data.nonzeros.emplace_back(c, c - n, off_val);
        }
        if  (j > 0) {
            mtx_data.nonzeros.emplace_back(c, c - 1, off_val);
        }
        mtx_data.nonzeros.emplace_back(c, c, c_val);
        if  (j < n - 1) {
            mtx_data.nonzeros.emplace_back(c, c + 1, off_val);
        }
        if  (i < n - 1) {
            mtx_data.nonzeros.emplace_back(c, c + n, off_val);
        }
    }
}
stencil_matrix->read(mtx_data);
prepare video output
auto  output = build_output(n, fps);
build CG solver on stencil with incomplete Cholesky preconditioner stopping at 1e-10 relative accuracy
auto  solver =
                           .with_baseline(gko::stop::mode::rhs_norm)
                           .with_reduction_factor(1e-10))
        .on(exec)
        ->generate(stencil_matrix);
The Incomplete Cholesky (IC) preconditioner solves the equation  for a given lower triangular matrix ...
Definition  ic.hpp:107
CG or the conjugate gradient method is an iterative type Krylov subspace method which is suitable for...
Definition  cg.hpp:76
The ResidualNorm class is a stopping criterion which stops the iteration process when the actual resi...
Definition  residual_norm.hpp:138
time stamp of the last output frame (initialized to a sentinel value)
execute implicit Euler method: for each timestep, solve stencil system
for  (double  t = 0; t < t0; t += tau) {
if enough time has passed, output the next video frame
if  (t - last_t > 1.0 / fps) {
    last_t = t;
    std::cout << t << std::endl;
    output_timestep(
        output, n,
            ->get_const_values());
}
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition  temporary_clone.hpp:207
add heat source contribution
in_vector->add_scaled(tau_source_scalar, source);
execute Euler step
solver->apply(in_vector, out_vector);
swap input and output
        std::swap(in_vector, out_vector);
    }
}
 
Results 
The program will generate a video file named heat.mp4 and output the timestamp of each generated frame.
Comments about programming and debugging  
The plain program 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
#include <ginkgo/ginkgo.hpp> 
 
 
#include <chrono> 
#include <fstream> 
#include <iostream> 
 
 
#include <opencv2/core.hpp> 
#include <opencv2/videoio.hpp> 
 
 
void  set_val(unsigned  char * data, double  value)
{
    double  col_r[] = {255, 221, 129, 201, 249, 255};
    double  col_g[] = {255, 220, 130, 161, 158, 204};
    double  col_b[] = {255, 220, 133, 93, 24, 8};
    value = std::max(0.0, value);
    auto  i = std::max(0, std::min(4, int (value)));
    auto  d = std::max(0.0, std::min(1.0, value - i));
    data[2] = static_cast< unsigned  char > (col_r[i + 1] * d + col_r[i] * (1 - d));
    data[1] = static_cast< unsigned  char > (col_g[i + 1] * d + col_g[i] * (1 - d));
    data[0] = static_cast< unsigned  char > (col_b[i + 1] * d + col_b[i] * (1 - d));
}
 
 
std::pair<cv::VideoWriter, cv::Mat> build_output(int  n, double  fps)
{
    cv::Size videosize{n, n};
    auto  output =
        std::make_pair(cv::VideoWriter{}, cv::Mat{videosize, CV_8UC3});
    auto  fourcc = cv::VideoWriter::fourcc('a' , 'v' , 'c' , '1' );
    output.first.open("heat.mp4" , fourcc, fps, videosize);
    return  output;
}
 
 
void  output_timestep(std::pair<cv::VideoWriter, cv::Mat>& output, int  n,
                     const  double * data)
{
    for  (int  i = 0; i < n; i++) {
        auto  row = output.second.ptr(i);
        for  (int  j = 0; j < n; j++) {
            set_val(&row[3 * j], data[i * n + j]);
        }
    }
    output.first.write(output.second);
}
 
 
int  main(int  argc, char * argv[])
{
 
    auto  t0 = 5.0;
    auto  diffusion = 0.0005;
    auto  source_scale = 2.5;
    auto  n = 256;
    auto  steps_per_sec = 500;
    auto  fps = 25;
    auto  n2 = n * n;
    auto  h = 1.0 / (n + 1);
    auto  h2 = h * h;
    auto  tau = 1.0 / steps_per_sec;
 
    std::ifstream initial_stream("data/gko_logo_2d.mtx" );
    std::ifstream source_stream("data/gko_text_2d.mtx" );
    auto  out_vector = in_vector->clone();
    auto  stencil_matrix = 
gko::share (mtx::create(exec));
 
    for  (int  i = 0; i < n; i++) {
        for  (int  j = 0; j < n; j++) {
            auto  c = i * n + j;
            auto  c_val = diffusion * tau * 4.0 / h2 + 1.0;
            auto  off_val = -diffusion * tau / h2;
            if  (i > 0) {
                mtx_data.nonzeros.emplace_back(c, c - n, off_val);
            }
            if  (j > 0) {
                mtx_data.nonzeros.emplace_back(c, c - 1, off_val);
            }
            mtx_data.nonzeros.emplace_back(c, c, c_val);
            if  (j < n - 1) {
                mtx_data.nonzeros.emplace_back(c, c + 1, off_val);
            }
            if  (i < n - 1) {
                mtx_data.nonzeros.emplace_back(c, c + n, off_val);
            }
        }
    }
    stencil_matrix->read(mtx_data);
    auto  output = build_output(n, fps);
                               .with_baseline(gko::stop::mode::rhs_norm)
                               .with_reduction_factor(1e-10))
            .on(exec)
            ->generate(stencil_matrix);
    double  last_t = -t0;
    for  (double  t = 0; t < t0; t += tau) {
        if  (t - last_t > 1.0 / fps) {
            last_t = t;
            std::cout << t << std::endl;
            output_timestep(
                output, n,
                    ->get_const_values());
        }
        in_vector->add_scaled(tau_source_scalar, source);
        solver ->apply(in_vector, out_vector);
 
        std::swap(in_vector, out_vector);
    }
}