python & CUDA Integration

This template would be greatly helpful to whoever want to integrate python and cpp

test.cu

#include <iostream>

namespace test { namespace cuda {

extern "C" void hello()
{
    std::cout << "Hello" << std::endl;
}

}}

test.cpp

#include <boost/python.hpp>
#include <numpy/arrayobject.h>

using namespace boost::python;

namespace test { namespace cuda {
extern "C" void hello();
}}

BOOST_PYTHON_MODULE(test)
{
    numeric::array::set_module_and_type("numpy", "ndarray");
    test::cuda::hello();
    import_array();
}

and then

setup.py

import os
import sys
from distutils.core import setup, Extension
from subprocess import Popen

modules = []
modules.append(['test', 'test.cpp', 'test.cu'])

for module in modules:
    mod_name = module[0]
    mod_cu_source = module[2]
    proc = Popen(['nvcc', '-Xcompiler', '-fPIC', '-c', mod_cu_source, '-o', '%s.o' % mod_name])
    proc.wait()

extensions = []
for module in modules:
    mod_name = module[0]
    mod_source = module[1]
    ex_module = Extension(
        mod_name,
        sources = [mod_source],
        include_dirs = ['/usr/include/boost'],
        libraries = ['boost_python', 'stdc++'],
        library_dirs = ['.', '/usr/local/cuda-5.5/lib64'],
        extra_compile_args=['-fopenmp', '-O3', '-std=c++0x'],
        extra_link_args=['%s.o' % mod_name, '-lcudart']
    )
    extensions.append(ex_module)

setup(name = "tools", version = "1.0", ext_modules = extensions)

If you place 'cudart' in the libraries section instead of using '-lcudart' in extra_link_args, libcudart.so would not be loaded and the process would stop at 'import test'.
Type 'ldd test.so' and check the results.

This code works fine on my PC (Ubuntu 12.04 LTS 64bit & CUDA 5.5 & Python 2.7.3).


Notes:
If you would omit '-Xcompiler -fPIC', you would get

/usr/bin/ld: test.o: relocation R_X86_64_32 against `.rodata' can not be used when making a shared object; recompile with -fPIC
test.o: could not read symbols: Bad value

and if you would place 'cudart' instead of '-lcudart', you would get

>>> import test
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ImportError: ./test.so: undefined symbol: __cudaUnregisterFatBinary

For more information, please see:
https://devtalk.nvidia.com/default/topic/468304/linking-c-and-cuda-files-with-nvcc-and-gcc/
http://daily.belltail.jp/?p=791