Discussion:
[OMPI users] All processes waiting on MPI_Bcast
Pranav Sumanth
2017-05-24 11:12:27 UTC
Permalink
Greetings!

I include a static header file utils.h with a function linspace. My main.cpp file is as follows:

#include <iostream>
#include <utils.h>
#include <mpi.h>

using namespace std;

int main(int argc, const char * argv[]) {

float start = 0., end = 1.;
unsigned long int num = 100;

double *linspaced;

float delta = (end - start) / num;
int size, rank;


MPI_Init(NULL, NULL);

MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Status status;

// These have to be converted into unsigned long ints
int casesPerNode = num / size;
int remainderCases = num % size;


if(rank==0){
linspaced = new double[num];

if(remainderCases!=0){
linspace(&linspaced[(size-1)*casesPerNode], end - delta*remainderCases, end, remainderCases);

} else {
linspace(&linspaced[(size-1)*casesPerNode], end - delta*casesPerNode, end, casesPerNode);

}

} else {

MPI_Bcast(&linspaced, num, MPI_DOUBLE, 0, MPI_COMM_WORLD);


// Sending job to master node.
// The node is already overloaded with coordinating.
// Additional task now is also to take on remainder cases.


// cout << "Rank " << rank << endl;
float start_in = start + casesPerNode*delta*(rank-1);
float end_in = start + casesPerNode*delta*(rank) - delta;

linspace(&linspaced[(rank-1)*casesPerNode], start_in, end_in, casesPerNode);


}

MPI_Barrier(MPI_COMM_WORLD);
// cout << "Print Here Rank " << rank << endl ;


MPI_Finalize();

/*
for(int i=0; i< num; i++){
cout << *(linspaced + i) << endl;
}
*/

return 0;

}
and my utils.h file is:

void linspace(double *ret, double start_in, double end_in, unsigned long int num_in)
{
/* This function generates equally spaced elements and returns
an array with the results */


assert(num_in!=0);


cout << "\tReceived start :" << start_in << "\tEnd :" << end_in << "\tNum_in :" << num_in << endl;

double delta_in = (end_in - start_in) / (num_in - 1);

if(num_in == 1){
*(ret) = start_in;
}

*(ret) = start_in;
for(int i=1; i < num_in-1; i++) {
*(ret + i) = *(ret + i - 1) + delta_in;
}
*(ret + (num_in - 1)) = end_in;

/*
cout << "Finished executing linspace " << endl;
for(int i = 0; i<num_in; i++){
cout << "Address : " << &ret << "\tElement " << i << " : " << *(ret + i) << endl;
}
*/
}
I am unable to diagnose why my code gets stuck at MPI_Bcast. What could I do to fix it?

Thanks


PS: I’m new to OpenMPI and may have a lot of these doubts initially. Thanks for patience and support.
g***@rist.or.jp
2017-05-24 12:21:22 UTC
Permalink
At first, try to allocate linspaced on all ranks
linspaced = new double[num];
then use the pointer as the first parameter of MPI_Bcast
MPI_Bcast(linspaced, num, MPI_DOUBLE, 0, MPI_COMM_WORLD);

this mailing list is to discuss MPI stuff specific to Open MPI.
if you have a doubt whether your problem is specific to Open MPI,
you can try to use an other MPI library such as mpich or its derivate
(mvapich, Intel MPI, ...)
if both MPI implementations fail, then the odds are the issue is in your
app,
and forum such as stack overflow are more appropriate to look for help

Cheers,

Gilles

----- Original Message -----
@Siva, Thanks for your inputs. I changed it and the process no longer
hangs.
#include <iostream>
#include <utils.h>
#include <mpi.h>
using namespace std;
int main(int argc, const char * argv[]) {
float start = 0., end = 1.;
unsigned long int num = 100;
double *linspaced;
float delta = (end - start) / num;
int size, rank;
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Status status;
// These have to be converted into unsigned long ints
int casesPerNode = num / size;
int remainderCases = num % size;
if(rank==0){
linspaced = new double[num];
if(remainderCases!=0){
linspace(&linspaced[(size-1)*casesPerNode], end - delta*
remainderCases, end, remainderCases);
} else {
linspace(&linspaced[(size-1)*casesPerNode], end - delta*
casesPerNode, end, casesPerNode);
}
}
MPI_Bcast(&linspaced, num, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if(rank != 0) {
// Sending job to master node.
// The node is already overloaded with coordinating.
// Additional task now is also to take on remainder cases.
// cout << "Rank " << rank << endl;
float start_in = start + casesPerNode*delta*(rank-1);
float end_in = start + casesPerNode*delta*(rank) - delta;
linspace(&linspaced[(rank-1)*casesPerNode], start_in, end_in,
casesPerNode);
}
MPI_Finalize();
for(int i=0; i< num; i++){
cout << *(linspaced + i) << endl;
}
return 0;
}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[wlan-145-94-163-183:09801] *** Process received signal ***
[wlan-145-94-163-183:09801] Signal: Segmentation fault: 11 (11)
[wlan-145-94-163-183:09801] Signal code: Address not mapped (1)
[wlan-145-94-163-183:09801] Failing at address: 0x7fed2d314220
[wlan-145-94-163-183:09802] *** Process received signal ***
[wlan-145-94-163-183:09802] Signal: Segmentation fault: 11 (11)
[wlan-145-94-163-183:09802] Signal code: Address not mapped (1)
[wlan-145-94-163-183:09802] Failing at address: 0x7fed2d3142e8
Received start :0.5 End :0.74 Num_in :25
[wlan-145-94-163-183:09803] *** Process received signal ***
[wlan-145-94-163-183:09803] Signal: Segmentation fault: 11 (11)
[wlan-145-94-163-183:09803] Signal code: Address not mapped (1)
[wlan-145-94-163-183:09803] Failing at address: 0x7fed2d3143b0
[wlan-145-94-163-183:09801] [ 0] 0 libsystem_platform.dylib
0x00007fffd6902b3a _sigtramp + 26
[wlan-145-94-163-183:09801] [ 1] 0 ???
0x0000000000000000 0x0 + 0
[wlan-145-94-163-183:09801] [ 2] 0 test
0x0000000108afafda main + 602
[wlan-145-94-163-183:09801] [ 3] 0 libdyld.dylib
0x00007fffd66f3235 start + 1
[wlan-145-94-163-183:09801] *** End of error message ***
[wlan-145-94-163-183:09802] [ 0] 0 libsystem_platform.dylib
0x00007fffd6902b3a _sigtramp + 26
[wlan-145-94-163-183:09802] [ 1] 0 ???
0x0000000000000000 0x0 + 0
[wlan-145-94-163-183:09802] [ 2] 0 test
0x0000000107ed5fda main + 602
[wlan-145-94-163-183:09802] [ 3] 0 libdyld.dylib
0x00007fffd66f3235 start + 1
[wlan-145-94-163-183:09802] *** End of error message ***
[wlan-145-94-163-183:09803] [ 0] 0 libsystem_platform.dylib
0x00007fffd6902b3a _sigtramp + 26
[wlan-145-94-163-183:09803] [ 1] 0 ???
0x0000000000000000 0x0 + 0
[wlan-145-94-163-183:09803] [ 2] 0 test
0x000000010f314fda main + 602
[wlan-145-94-163-183:09803] [ 3] 0 libdyld.dylib
0x00007fffd66f3235 start + 1
[wlan-145-94-163-183:09803] *** End of error message ***
----------------------------------------------------------------------
----
mpirun noticed that process rank 3 with PID 0 on node wlan-145-94-163-
183 exited on signal 11 (Segmentation fault: 11).
—————————————————————————————————————
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
What could be going wrong here?
Best Regards,
Pranav
Send users mailing list submissions to
To subscribe or unsubscribe via the World Wide Web, visit
https://rfd.newmexicoconsortium.org/mailman/listinfo/users
or, via email, send a message with subject or body 'help' to
You can reach the person managing the list at
When replying, please edit your Subject line so it is more specific
than "Re: Contents of users digest..."
1. Re: All processes waiting on MPI_Bcast (Siva Srinivas Kolukula)
--------------------------------------------------------------------
--
Message: 1
Date: Wed, 24 May 2017 17:16:59 +0530
Subject: Re: [OMPI users] All processes waiting on MPI_Bcast
Content-Type: text/plain; charset="utf-8"
When you call mpi_bcast all the processors should be called....in
you code
you are calling mpi_bcast in all the roots expect root. Try keeping
mpi_bcast out of if condition.
_
Earth-22-june.gif (7996 bytes)]
http://sites.google.com/site/kolukulasivasrinivas/
Siva Srinivas Kolukula, PhD
*Scientist - B*
Indian Tsunami Early Warning Centre (ITEWC)
Advisory Services and Satellite Oceanography Group (ASG)
Indian National Centre for Ocean Information Services (INCOIS)
"Ocean Valley"
Pragathi Nagar (B.O)
Nizampet (S.O)
Hyderabad - 500 090
Telangana, INDIA
Office: 040 23886124
*Cell: +91 9381403232; +91 8977801947*
gmail.com>
Post by Pranav Sumanth
Greetings!
I include a static header file utils.h with a function linspace. My
#include <iostream>#include <utils.h>#include <mpi.h>
using namespace std;
int main(int argc, const char * argv[]) {
float start = 0., end = 1.;
unsigned long int num = 100;
double *linspaced;
float delta = (end - start) / num;
int size, rank;
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Status status;
// These have to be converted into unsigned long ints
int casesPerNode = num / size;
int remainderCases = num % size;
if(rank==0){
linspaced = new double[num];
if(remainderCases!=0){
linspace(&linspaced[(size-1)*casesPerNode], end - delta*
remainderCases, end, remainderCases);
Post by Pranav Sumanth
} else {
linspace(&linspaced[(size-1)*casesPerNode], end - delta*
casesPerNode, end, casesPerNode);
Post by Pranav Sumanth
}
} else {
MPI_Bcast(&linspaced, num, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Sending job to master node.
// The node is already overloaded with coordinating.
// Additional task now is also to take on remainder cases.
// cout << "Rank " << rank << endl;
float start_in = start + casesPerNode*delta*(rank-1);
float end_in = start + casesPerNode*delta*(rank) - delta;
linspace(&linspaced[(rank-1)*casesPerNode], start_in, end_in,
casesPerNode);
Post by Pranav Sumanth
}
MPI_Barrier(MPI_COMM_WORLD);
// cout << "Print Here Rank " << rank << endl ;
MPI_Finalize();
/*
for(int i=0; i< num; i++){
cout << *(linspaced + i) << endl;
}
*/
return 0;
}
void linspace(double *ret, double start_in, double end_in, unsigned
long int num_in){
Post by Pranav Sumanth
/* This function generates equally spaced elements and returns
an array with the results */
assert(num_in!=0);
cout << "\tReceived start :" << start_in << "\tEnd :" << end_in
<< "\tNum_in :" << num_in << endl;
Post by Pranav Sumanth
double delta_in = (end_in - start_in) / (num_in - 1);
if(num_in == 1){
*(ret) = start_in;
}
*(ret) = start_in;
for(int i=1; i < num_in-1; i++) {
*(ret + i) = *(ret + i - 1) + delta_in;
}
*(ret + (num_in - 1)) = end_in;
/*
cout << "Finished executing linspace " << endl;
for(int i = 0; i<num_in; i++){
cout << "Address : " << &ret << "\tElement " << i << " : " << *
(ret + i) << endl;
Post by Pranav Sumanth
}
*/}
I am unable to diagnose why my code gets stuck at MPI_Bcast. What
could I
Post by Pranav Sumanth
do to fix it?
Thanks
PS: I?m new to OpenMPI and may have a lot of these doubts initially.
Thanks for patience and support.
_______________________________________________
users mailing list
https://rfd.newmexicoconsortium.org/mailman/listinfo/users
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://rfd.newmexicoconsortium.org/mailman/private/users/attachments/20170524/53897485/attachment.html>
------------------------------
Subject: Digest Footer
_______________________________________________
users mailing list
https://rfd.newmexicoconsortium.org/mailman/listinfo/users
------------------------------
End of users Digest, Vol 3801, Issue 2
**************************************
_______________________________________________
users mailing list
https://rfd.newmexicoconsortium.org/mailman/listinfo/users
Siva Srinivas Kolukula
2017-05-24 11:46:59 UTC
Permalink
When you call mpi_bcast all the processors should be called....in you code
you are calling mpi_bcast in all the roots expect root. Try keeping
mpi_bcast out of if condition.

_
*SAVE WATER ** ~ **SAVE ENERGY**~ **~ **SAVE EARTH *[image:
Earth-22-june.gif (7996 bytes)]

http://sites.google.com/site/kolukulasivasrinivas/

Siva Srinivas Kolukula, PhD
*Scientist - B*
Indian Tsunami Early Warning Centre (ITEWC)
Advisory Services and Satellite Oceanography Group (ASG)
Indian National Centre for Ocean Information Services (INCOIS)
"Ocean Valley"
Pragathi Nagar (B.O)
Nizampet (S.O)
Hyderabad - 500 090
Telangana, INDIA

Office: 040 23886124


*Cell: +91 9381403232; +91 8977801947*
Post by Pranav Sumanth
Greetings!
I include a static header file utils.h with a function linspace. My
#include <iostream>#include <utils.h>#include <mpi.h>
using namespace std;
int main(int argc, const char * argv[]) {
float start = 0., end = 1.;
unsigned long int num = 100;
double *linspaced;
float delta = (end - start) / num;
int size, rank;
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Status status;
// These have to be converted into unsigned long ints
int casesPerNode = num / size;
int remainderCases = num % size;
if(rank==0){
linspaced = new double[num];
if(remainderCases!=0){
linspace(&linspaced[(size-1)*casesPerNode], end - delta*remainderCases, end, remainderCases);
} else {
linspace(&linspaced[(size-1)*casesPerNode], end - delta*casesPerNode, end, casesPerNode);
}
} else {
MPI_Bcast(&linspaced, num, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Sending job to master node.
// The node is already overloaded with coordinating.
// Additional task now is also to take on remainder cases.
// cout << "Rank " << rank << endl;
float start_in = start + casesPerNode*delta*(rank-1);
float end_in = start + casesPerNode*delta*(rank) - delta;
linspace(&linspaced[(rank-1)*casesPerNode], start_in, end_in, casesPerNode);
}
MPI_Barrier(MPI_COMM_WORLD);
// cout << "Print Here Rank " << rank << endl ;
MPI_Finalize();
/*
for(int i=0; i< num; i++){
cout << *(linspaced + i) << endl;
}
*/
return 0;
}
void linspace(double *ret, double start_in, double end_in, unsigned long int num_in){
/* This function generates equally spaced elements and returns
an array with the results */
assert(num_in!=0);
cout << "\tReceived start :" << start_in << "\tEnd :" << end_in << "\tNum_in :" << num_in << endl;
double delta_in = (end_in - start_in) / (num_in - 1);
if(num_in == 1){
*(ret) = start_in;
}
*(ret) = start_in;
for(int i=1; i < num_in-1; i++) {
*(ret + i) = *(ret + i - 1) + delta_in;
}
*(ret + (num_in - 1)) = end_in;
/*
cout << "Finished executing linspace " << endl;
for(int i = 0; i<num_in; i++){
cout << "Address : " << &ret << "\tElement " << i << " : " << *(ret + i) << endl;
}
*/}
I am unable to diagnose why my code gets stuck at MPI_Bcast. What could I do to fix it?
Thanks
PS: I’m new to OpenMPI and may have a lot of these doubts initially.
Thanks for patience and support.
_______________________________________________
users mailing list
https://rfd.newmexicoconsortium.org/mailman/listinfo/users
g***@rist.or.jp
2017-05-24 11:49:45 UTC
Permalink
Hi,

your program hangs because rank 0 does not call MPI_Bcast()

generally speaking, when using collective operations (such as MPI_Bcast),
all tasks of the communicators must invoke the collective operation, and
with "matching" arguments.
in the case of MPI_Bcast(), the root value must be the same on all tasks,
and all tasks must transfer
the same amount of data :
if all tasks use the same datatype, then the count must be the same on
all tasks,
otherwise, the datatype size * count must be the same on all tasks
/* for the sake of completion, there are known issues specific to Open
MPI when you mix
large and small datatypes */

Cheers,

Gilles

----- Original Message -----
Post by Pranav Sumanth
Greetings!
I include a static header file utils.h with a function linspace. My
#include <iostream>
#include <utils.h>
#include <mpi.h>
using namespace std;
int main(int argc, const char * argv[]) {
float start = 0., end = 1.;
unsigned long int num = 100;
double *linspaced;
float delta = (end - start) / num;
int size, rank;
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Status status;
// These have to be converted into unsigned long ints
int casesPerNode = num / size;
int remainderCases = num % size;
if(rank==0){
linspaced = new double[num];
if(remainderCases!=0){
linspace(&linspaced[(size-1)*casesPerNode], end - delta*
remainderCases, end, remainderCases);
Post by Pranav Sumanth
} else {
linspace(&linspaced[(size-1)*casesPerNode], end - delta*
casesPerNode, end, casesPerNode);
Post by Pranav Sumanth
}
} else {
MPI_Bcast(&linspaced, num, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Sending job to master node.
// The node is already overloaded with coordinating.
// Additional task now is also to take on remainder cases.
// cout << "Rank " << rank << endl;
float start_in = start + casesPerNode*delta*(rank-1);
float end_in = start + casesPerNode*delta*(rank) - delta;
linspace(&linspaced[(rank-1)*casesPerNode], start_in, end_in, casesPerNode);
}
MPI_Barrier(MPI_COMM_WORLD);
// cout << "Print Here Rank " << rank << endl ;
MPI_Finalize();
/*
for(int i=0; i< num; i++){
cout << *(linspaced + i) << endl;
}
*/
return 0;
}
void linspace(double *ret, double start_in, double end_in, unsigned long int num_in)
{
/* This function generates equally spaced elements and returns
an array with the results */
assert(num_in!=0);
cout << "\tReceived start :" << start_in << "\tEnd :" << end_in <
< "\tNum_in :" << num_in << endl;
Post by Pranav Sumanth
double delta_in = (end_in - start_in) / (num_in - 1);
if(num_in == 1){
*(ret) = start_in;
}
*(ret) = start_in;
for(int i=1; i < num_in-1; i++) {
*(ret + i) = *(ret + i - 1) + delta_in;
}
*(ret + (num_in - 1)) = end_in;
/*
cout << "Finished executing linspace " << endl;
for(int i = 0; i<num_in; i++){
cout << "Address : " << &ret << "\tElement " << i << " : " << *(
ret + i) << endl;
Post by Pranav Sumanth
}
*/
}
I am unable to diagnose why my code gets stuck at MPI_Bcast. What could I do to fix it?
Thanks
PS: I’m new to OpenMPI and may have a lot of these doubts initially.
Thanks for patience and support.
Post by Pranav Sumanth
_______________________________________________
users mailing list
https://rfd.newmexicoconsortium.org/mailman/listinfo/users
Loading...