BLOG.JUNGWON.KIM

CUDA C

direct.cpp

#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <mpi.h>
int main( int argc, char** argv )
{
    MPI_Init (&argc, &argv);
    int direct;
    int rank, size;
    int *h_buff = NULL;
    int *d_rank = NULL;
    int *d_buff = NULL;
    size_t bytes;
    int i;
    // Ensure that RDMA ENABLED CUDA is set correctly
    direct = getenv("MPICH_RDMA_ENABLED_CUDA")==NULL?0:atoi(getenv ("MPICH_RDMA_ENABLED_CUDA"));
    if(direct != 1){
        printf ("MPICH_RDMA_ENABLED_CUDA not enabled!n");
        exit (EXIT_FAILURE);
    }
    // Get MPI rank and size
    MPI_Comm_rank (MPI_COMM_WORLD, &rank);
    MPI_Comm_size (MPI_COMM_WORLD, &size);
    // Allocate host and device buffers and copy rank value to GPU
    bytes = size*sizeof(int);
    h_buff = (int*)malloc(bytes);
    cudaMalloc(&d_buff, bytes);
    cudaMalloc(&d_rank, sizeof(int));
    cudaMemcpy(d_rank, &rank, sizeof(int), cudaMemcpyHostToDevice);
    // Preform Allgather using device buffer
    MPI_Allgather(d_rank, 1, MPI_INT, d_buff, 1, MPI_INT, MPI_COMM_WORLD);
    // Check that the GPU buffer is correct
    cudaMemcpy(h_buff, d_buff, bytes, cudaMemcpyDeviceToHost);
    for(i=0; i<size; i++){
        if(h_buff[i] != i) {
            printf ("Alltoall Failed!n");
            exit (EXIT_FAILURE);
        }
    }
    if(rank==0)
        printf("Success!n");
    // Clean up
    free(h_buff);
    cudaFree(d_buff);
    cudaFree(d_rank);
    MPI_Finalize();
    return 0;
}

Compiling

For ease of compiling the GNU environment will be used.

$ module load cudatoolkit
$ module switch PrgEnv-pgi PrgEnv-gnu
$ CC -lcudart direct.cpp -o direct.out

Running

$ export LD_LIBRARY_PATH=$CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH
$ export MPICH_RDMA_ENABLED_CUDA=1
$ aprun -n2 -N1 ./direct.out

List of device bit rates

http://en.wikipedia.org/wiki/List_of_device_bit_rates

Print CPU Affinity of GPUs

#!/bin/bash

FILES=”/proc/driver/nvidia/gpus/*”

for f in $FILES
do
bus=$(basename $f)
echo $bus “–>” `cat /sys/class/pci_bus/${bus:0:7}/cpulistaffinity`
done

Code merging with VIM

http://www.wizonesolutions.com/2009/10/28/code-merging-with-vim/

Nested Parallelism OpenMP example

http://www.techdarting.com/2013/07/nested-parallelism-openmp-example.html

gpu cpu affinity

http://stackoverflow.com/questions/16056800/multi-gpu-programming-using-cuda-on-a-numa-machine

#!/bin/bash
#this script will output a listing of each GPU and it's CPU core affinity mask
file="/proc/driver/nvidia/gpus/0/information"
if [ ! -e $file ]; then
  echo "Unable to locate any GPUs!"
else
  gpu_num=0
  file="/proc/driver/nvidia/gpus/$gpu_num/information"
  if [ "-v" == "$1" ]; then echo "GPU:  CPU CORE AFFINITY MASK: PCI:"; fi
  while [ -e $file ]
  do
    line=`grep "Bus Location" $file | { read line; echo $line; }`
    pcibdf=${line:14}
    pcibd=${line:14:7}
    file2="/sys/class/pci_bus/$pcibd/cpuaffinity"
    read line2 < $file2
    if [ "-v" == "$1" ]; then
      echo " $gpu_num     $line2                  $pcibdf"
    else
      echo " $gpu_num     $line2 "
    fi
    gpu_num=`expr $gpu_num + 1`
    file="/proc/driver/nvidia/gpus/$gpu_num/information"
  done
fi

LaTeX/Colors

http://en.wikibooks.org/wiki/LaTeX/Colors

usepackage[usenames,dvipsnames]{xcolor}

MULTI GPU PROGRAMMING WITH MPI

http://on-demand.gputechconf.com/gtc/2014/presentations/S4236-multi-gpu-programming-mpi.pdf

What Every CUDA Programmer Should Know About OpenGL

http://www.nvidia.com/content/gtc/documents/1055_gtc09.pdf

Topic 851 – Resident and Nonresident Aliens

http://www.irs.gov/taxtopics/tc851.html

bash – how to pipe result from the which command to cd

http://stackoverflow.com/questions/3437514/bash-how-to-pipe-result-from-the-which-command-to-cd

cd $(dirname which `mpirun`)

bash loop iteration

http://www.cyberciti.biz/faq/bash-for-loop/

Lustre – FAQ – Sizing

http://wiki.lustre.org/manual/LustreManual20_HTML/SettingUpLustreSystem.html

http://wiki.lustre.org/index.php/FAQ_-_Sizing

OpenCL* Design and Programming Guide for the Intel® Xeon Phi™ Coprocessor

https://software.intel.com/en-us/articles/opencl-design-and-programming-guide-for-the-intel-xeon-phi-coprocessor

# Count the number of “physical processor(s)”
grep “physical id” /proc/cpuinfo | sort -u | wc -l
1
# Count the number of “physical cores per CPU”
grep “cpu cores” /proc/cpuinfo |sort -u |cut -d”:” -f2
2
# Count the number of “logical cores ” (including multi-threading cores)
grep -c “processor” /proc/cpuinfo
2

A Simple LD_PRELOAD Tutorial

http://www.catonmat.net/blog/simple-ld-preload-tutorial/

http://elinux.org/images/b/b5/Elc2013_Kobayashi.pdf

Avoid escape characters in GIT

http://michael.otacoo.com/linux-2/avoid-escape-characters-in-git/

The first one is to change the pager to “more”.

git config --global core.pager more

The second one is to append an additional command with “less -r”.

git diff --color | less -r
git log -p --color | less -r

And you get a nice colored output.

Here is another solution which is more portable to my mind, and it is the one I use.

git config --global core.pager "less -r"

This directly appends the modified less command when git pager is invocated to print correctly escape characters.