1 #ifndef STK_UTIL_PARALLEL_MPI_hpp 2 #define STK_UTIL_PARALLEL_MPI_hpp 4 #include <stk_util/stk_config.h> 5 #if defined( STK_HAS_MPI ) 23 MPI_Datatype * datatype)
25 std::complex<T> *complex_in =
static_cast<std::complex<T> *
>(invec);
26 std::complex<T> *complex_inout =
static_cast<std::complex<T> *
>(inoutvec);
28 for (
int i = 0; i < *len; ++i)
29 complex_inout[i] += complex_in[i];
72 static MPI_Op s_mpi_real_complex_sum;
73 static bool initialized =
false;
78 MPI_Op_create(mpi_real_complex_sum<T>,
true, &s_mpi_real_complex_sum);
80 return s_mpi_real_complex_sum;
104 template <
typename T>
112 Loc(
const T &value,
int loc)
129 TempLoc(
double value,
double other,
int loc)
149 template <
typename T>
155 static MPI_Datatype type() {
161 struct Datatype<signed char>
163 static MPI_Datatype type() {
169 struct Datatype<unsigned char>
171 static MPI_Datatype type() {
179 static MPI_Datatype type() {
185 struct Datatype<unsigned int>
187 static MPI_Datatype type() {
193 struct Datatype<short>
195 static MPI_Datatype type() {
201 struct Datatype<unsigned short>
203 static MPI_Datatype type() {
204 return MPI_UNSIGNED_SHORT;
209 struct Datatype<long>
211 static MPI_Datatype type() {
217 struct Datatype<unsigned long>
219 static MPI_Datatype type() {
220 return MPI_UNSIGNED_LONG;
245 struct Datatype<float>
247 static MPI_Datatype type() {
253 struct Datatype<double>
255 static MPI_Datatype type() {
262 struct Datatype<
std::complex<float> >
264 static MPI_Datatype type() {
270 struct Datatype<
std::complex<double> >
272 static MPI_Datatype type() {
279 struct Datatype<Loc<int> >
281 static MPI_Datatype type() {
287 struct Datatype<Loc<short> >
289 static MPI_Datatype type() {
290 return MPI_SHORT_INT;
295 struct Datatype<Loc<long> >
297 static MPI_Datatype type() {
303 struct Datatype<Loc<unsigned long> >
305 static MPI_Datatype type() {
321 struct Datatype<Loc<float> >
323 static MPI_Datatype type() {
324 return MPI_FLOAT_INT;
329 struct Datatype<Loc<double> >
331 static MPI_Datatype type() {
332 return MPI_DOUBLE_INT;
337 struct Datatype<TempLoc>
339 static MPI_Datatype type() {
362 AllReduce(MPI_Comm mpi_comm, MPI_Op op, T *src_dest,
size_t size)
364 std::vector<T> source(src_dest, src_dest + size);
366 if (MPI_Allreduce(&source[0], &src_dest[0], (
int) size,
Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
367 throw std::runtime_error(
"MPI_Allreduce failed");
387 AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &dest)
389 std::vector<T> source(dest);
391 if (MPI_Allreduce(&source[0], &dest[0], (
int) dest.size(),
Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
392 throw std::runtime_error(
"MPI_Allreduce failed");
415 AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest)
417 if (source.size() != dest.size())
418 throw std::runtime_error(
"sierra::MPI::AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest) vector lengths not equal");
420 if (MPI_Allreduce(&source[0], &dest[0], (
int) dest.size(),
Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
421 throw std::runtime_error(
"MPI_Allreduce failed");
427 AllGather(MPI_Comm mpi_comm, std::vector<T> &source, std::vector<T> &dest)
430 MPI_Comm_size(mpi_comm,&nproc);
431 if (source.size()*nproc != dest.size())
432 throw std::runtime_error(
"sierra::MPI::AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest) vector lengths not equal");
434 if (MPI_Allgather(&source[0], (
int)source.size(), Datatype<T>::type(),
435 &dest[0], (int)source.size(), Datatype<T>::type(),
436 mpi_comm) != MPI_SUCCESS ){
437 throw std::runtime_error(
"MPI_Allreduce failed");
450 template <
typename T>
453 enum {alignment = (
sizeof(T) >
sizeof(
double) ?
sizeof(double) :
sizeof(T))};
454 enum {mask = alignment - 1};
456 char * c =
reinterpret_cast<char *
>(p);
457 size_t front_misalign = (c - (
char *)0) & mask;
458 if (front_misalign > 0) {
459 size_t correction = alignment - front_misalign;
460 T *q =
reinterpret_cast<T *
>((c - (
char *)0) + correction);
464 return reinterpret_cast<T *
>(p);
503 virtual void size(
void *&inbuf)
const = 0;
516 virtual void copyin(
void *&inbuf)
const = 0;
529 virtual void copyout(
void *&outbuf)
const = 0;
547 virtual void op(
void *&inbuf,
void *&outbuf)
const = 0;
563 template<
class Op,
class LocalIt,
class GlobalIt = LocalIt>
566 typedef typename std::iterator_traits<LocalIt>::value_type value_type;
567 typedef typename std::iterator_traits<LocalIt>::difference_type difference_type;
569 Reduce(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end)
570 : m_localBegin(local_begin),
571 m_localEnd(local_end),
572 m_globalBegin(global_begin),
573 m_globalEnd(global_end),
574 m_length(local_end - local_begin)
576 if (global_end - global_begin != m_length)
577 throw std::runtime_error(
"sierra::MPI::Reduce::Reduce(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) local and global lengths not equal");
583 virtual void size(
void *&inbuf)
const {
584 value_type *t =
align_cast<value_type>(inbuf);
589 virtual void copyin(
void *&inbuf)
const {
590 value_type *t =
align_cast<value_type>(inbuf);
591 for (LocalIt it = m_localBegin; it != m_localEnd; ++it)
597 value_type *t =
align_cast<value_type>(outbuf);
598 for (GlobalIt it = m_globalBegin; it != m_globalEnd; ++it)
603 virtual void op(
void *&inbuf,
void *&outbuf)
const {
604 value_type *tin =
align_cast<value_type>(inbuf);
607 for (
size_t i = m_length; i; --i)
613 LocalIt m_localBegin;
615 GlobalIt m_globalBegin;
616 GlobalIt m_globalEnd;
617 difference_type m_length;
628 typedef std::vector<ReduceInterface *> ReduceVector;
638 void copyin(
void *
const buffer_in)
const;
640 void copyout(
void *
const buffer_out)
const;
642 void op(
void *
const buffer_in,
void *
const buffer_out)
const;
644 static void void_op(
void * inv,
void * outv,
int *n, MPI_Datatype *datatype);
647 ReduceVector m_reduceVector;
666 template <
typename T>
667 inline Sum(T * dest,
const T *source) {
678 template <
typename T>
679 inline Prod(T * dest,
const T *source) {
690 template <
typename T>
691 inline Min(T * dest,
const T *source) {
692 *dest = std::min(*dest, *source);
702 template <
typename T>
703 inline Max(T * dest,
const T *source) {
704 *dest = std::max(*dest, *source);
714 template <
typename T>
716 if (source->m_value < dest->m_value) {
717 dest->m_value = source->m_value;
718 dest->m_loc = source->m_loc;
720 else if (source->m_value == dest->m_value)
721 dest->m_loc = std::min(dest->m_loc, source->m_loc);
731 template <
typename T>
733 if (source->m_value > dest->m_value) {
734 dest->m_value = source->m_value;
735 dest->m_loc = source->m_loc;
737 else if (source->m_value == dest->m_value)
738 dest->m_loc = std::min(dest->m_loc, source->m_loc);
745 inline MaxTempLoc(TempLoc * dest,
const TempLoc *source) {
746 if (source->m_value > dest->m_value) {
747 dest->m_value = source->m_value;
748 dest->m_other = source->m_other;
749 dest->m_loc = source->m_loc;
751 else if (source->m_value == dest->m_value) {
752 if (dest->m_loc > source->m_loc) {
753 dest->m_other = source->m_other;
754 dest->m_loc = source->m_loc;
762 inline MinTempLoc(TempLoc * dest,
const TempLoc *source) {
763 if (source->m_value < dest->m_value) {
764 dest->m_value = source->m_value;
765 dest->m_other = source->m_other;
766 dest->m_loc = source->m_loc;
768 else if (source->m_value == dest->m_value) {
769 if (dest->m_loc > source->m_loc) {
770 dest->m_other = source->m_other;
771 dest->m_loc = source->m_loc;
912 template<
class LocalIt,
class GlobalIt>
930 template<
class LocalIt,
class GlobalIt>
948 template<
typename T,
class LocalIt,
class GlobalIt>
966 template<
typename T,
class LocalIt,
class GlobalIt>
981 template<
class T,
class U>
985 std::vector<T> source;
987 std::back_insert_iterator<std::vector<T> > source_inserter(source);
988 collector.gather(op, source_inserter);
990 int size = source.size();
998 MPI_Comm_size(mpi_comm, &num_proc);
999 MPI_Comm_rank(mpi_comm, &my_proc);
1002 std::vector<int> local_array_len(num_proc, 0);
1003 local_array_len[my_proc] == size;
1004 std::vector<int> global_array_len(num_proc, 0);
1006 MPI_Allreduce(&local_array_len[0], &global_array_len[0], num_proc, MPI_INT, MPI_SUM, mpi_comm);
1008 for(
unsigned i = 0; i < num_proc; ++i) {
1009 if(global_array_len[i] != size) {
1010 throw std::runtime_error(
"Slib_MPI.h::AllReduceCollected, not all processors have the same length array");
1015 if (source.empty())
return;
1016 std::vector<T> dest(size);
1018 if (MPI_Allreduce(&source[0], &dest[0], size,
Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
1019 throw std::runtime_error(
"MPI_Allreduce failed");
1021 typename std::vector<T>::iterator dest_getter = dest.begin();
1022 collector.scatter(op, dest_getter);
1032 #endif // if defined( STK_HAS_MPI ) 1033 #endif // STK_UTIL_PARALLEL_MPI_hpp virtual void copyout(void *&outbuf) const =0
Member function copyin copies the data from outbuf pointer reference to the reduction interface data...
void AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector< T > &source, std::vector< T > &dest)
Function AllReduce copies the source/destination vector into a temporary vector and then executed the...
MPI_Datatype long_long_int_int_type()
Member function double_double_int_type ...
Reduce< Max, LocalIt, GlobalIt > * ReduceMax(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end)
Member function ReduceMax ...
Reduce< Prod, LocalIt, GlobalIt > * ReduceProd(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end)
Member function ReduceProd ...
virtual void op(void *&inbuf, void *&outbuf) const
Member function op executes the operation on the data at inbuf pointer reference and outbuf pointer r...
virtual void op(void *&inbuf, void *&outbuf) const =0
Member function op executes the operation on the data at inbuf pointer reference and outbuf pointer r...
virtual void size(void *&inbuf) const =0
Member function size returns the size in bytes needed to store the data for the reduction operation o...
Interface class ReduceInterface specifies the required virtual functions for the aggregated type and ...
virtual ~ReduceInterface()
Template class Reduce implements the ReduceInterface interface for any operator and type...
MPI_Datatype float_complex_type()
Function float_complex_type returns an MPI complex data type for C++.
MPI_Op real_complex_sum_op()
Function real_complex_sum_op returns a sum operation for the C++ complex MPI data type...
Reduce< Min, LocalIt, GlobalIt > * ReduceMin(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end)
Member function ReduceMin ...
MPI_Datatype double_double_int_type()
Member function double_double_int_type ...
virtual void copyin(void *&inbuf) const
Member function copyin copies the data from the reduction interface to the inbuf pointer reference...
Reduce< Sum, LocalIt, GlobalIt > * ReduceSum(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end)
Member function ReduceSum ...
std::ostream & tout()
Regression test textual output stream.
virtual void size(void *&inbuf) const
Member function size returns the size in bytes needed to store the data for the reduction operation o...
virtual void copyin(void *&inbuf) const =0
Member function copyin copies the data from the reduction interface to the inbuf pointer reference...
MPI_Datatype double_complex_type()
Function double_complex_type returns an MPI complex data type for C++.
T * align_cast(void *p)
Function align_cast returns a pointer that has been aligned to the specified alignment or double if t...
Template class loc implements the data structure for the MINLOC and MAXLOC data types.
MPI_Op double_complex_sum_op()
Function double_complex_sum_op returns a sum operation for the C++ complex MPI data type...
Traits class Datatype implements a traits class containing two static member functions which return t...
void AllReduceCollected(MPI_Comm mpi_comm, MPI_Op op, U collector)
Member function AllReduceCollected ...
virtual void copyout(void *&outbuf) const
Member function copyin copies the data from outbuf pointer reference to the reduction interface data...