Commit 3037bcaa authored by lheinz's avatar lheinz
Browse files

OMP parallelization

parent c21e4b71
......@@ -106,7 +106,7 @@ Output is written to a file specified by -o, there are two optional output files
-o the output trajectory
-oref trajectory containing COM coordinates in original order (optional)
-orefp trajectory containing the permuted COM coordinates (optional)
-orp trajectory containing the permuted COM coordinates (optional)
4. Example run
......
......@@ -3,12 +3,12 @@
* lap.h
version 1.0 - 21 june 1996
author Roy Jonker, MagicLogic Optimization Inc.
header file for LAP
*
**************************************************************************/
#include <typedefs.h>
//#include <typedefs.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
......
......@@ -22,24 +22,25 @@ PREFIX = ../../g_permute-1.1
EXEC_PREFIX = $(PREFIX)
BINDIR = $(EXEC_PREFIX)/bin
LIBDIR = $(EXEC_PREFIX)/lib
INCDIR = $(EXEC_PREFIX)/include
INCDIR = $(EXEC_PREFIX)/include
# pointer to your Gromacs installation - please adjust
GMXDIR = /usr/local/gromacs/3.3/334-impi403-fftw331-gcc470
GMXDIR = /usr/local/gromacs/2018/2018.2-impi2017-fftw337-gcc640-cuda91/
#/usr/local/gromacs/5.1/514-impi2017-fftw332-gcc540-cuda80
#/usr/local/gromacs/3.3/334-impi403-fftw331-gcc470
GMXLIB = $(GMXDIR)/lib
GMXINC = $(GMXDIR)/include/gromacs
GMXLIB = $(GMXDIR)/lib64
GMXINC = $(GMXDIR)/include
#$(GMXDIR)/include/gromacs
# Source of the installation. No need to change this
SRCDIR = ..
CC = cc
CC = g++
LDFLAGS = -L$(GMXLIB) -lm
INCFLAGS = -I$(SRCDIR)/include -I$(GMXINC)
LDSOFLAGS = -shared -Wl,-soname,
#for compilation on 64bit systems uncomment the following line
CFLAGS = -O0 -fomit-frame-pointer -finline-functions -Wall -Wno-unused -fPIC -DPIC -funroll-all-loops -ggdb
CFLAGS = -O3 -fomit-frame-pointer -finline-functions -Wall -Wno-unused -fPIC -DPIC -funroll-all-loops -ggdb -std=c++11
#for compilation on 32bit systems uncomment the following line
#CFLAGS = -O0 -fomit-frame-pointer -finline-functions -Wall -Wno-unused -malign-double -funroll-all-loops -ggdb
......@@ -55,7 +56,7 @@ tvar := $(shell if (test -h $(LIBDIR)/$(EXE).1);then echo 1;else echo 0;fi)
all: $(EXE)
install: $(EXE)
install: $(EXE)
mkdir -p $(BINDIR) $(LIBDIR) $(INCDIR)
cp $(EXE) $(LIBDIR)
cp ../include/lap.h ../include/gnrl.h $(INCDIR)
......
......@@ -5,28 +5,29 @@
author: Roy Jonker @ MagicLogic Optimization Inc.
e-mail: roy_jonker@magiclogic.com
Code for Linear Assignment Problem, according to
"A Shortest Augmenting Path Algorithm for Dense and Sparse Linear
Code for Linear Assignment Problem, according to
"A Shortest Augmenting Path Algorithm for Dense and Sparse Linear
Assignment Problems," Computing 38, 325-340, 1987
by
R. Jonker and A. Volgenant, University of Amsterdam.
*
*************************************************************************/
#include "smalloc.h"
//#include "smalloc.h"
#include "gromacs/utility/smalloc.h"
#include "gnrl.h"
#include "lap.h"
#include <math.h>
#define VERYSMALL 0.00001
double lap(int dim,
double lap(int dim,
double **assigncost,
col *rowsol,
row *colsol,
double *u,
col *rowsol,
row *colsol,
double *u,
double *v)
/* input: */
......@@ -68,45 +69,45 @@ double lap(int dim,
#endif
/* init how many times a row will be assigned in the column reduction. */
for (i = 0; i < dim; i++)
for (i = 0; i < dim; i++)
matches[i] = 0;
/* COLUMN REDUCTION */
for (j = dim-1; j >= 0; j--) /* reverse order gives better results. */
{
/* find minimum cost over rows. */
min = assigncost[0][j];
min = assigncost[0][j];
imin = 0;
for (i = 1; i < dim; i++)
if (assigncost[i][j] < min)
{
min = assigncost[i][j];
for (i = 1; i < dim; i++)
if (assigncost[i][j] < min)
{
min = assigncost[i][j];
imin = i;
}
v[j] = min;
v[j] = min;
if (++matches[imin] == 1)
{
if (++matches[imin] == 1)
{
/* init assignment if minimum row assigned for first time. */
rowsol[imin] = j;
colsol[j] = imin;
rowsol[imin] = j;
colsol[j] = imin;
}
else
colsol[j] = -1; /* row already assigned, column not assigned. */
}
/* REDUCTION TRANSFER */
for (i = 0; i < dim; i++)
for (i = 0; i < dim; i++)
if (matches[i] == 0) /* fill list of unassigned 'free' rows. */
unassigned[numfree++] = i;
else
if (matches[i] == 1)/* transfer reduction from rows that are assigned once. */
{
j1 = rowsol[i];
j1 = rowsol[i];
min = BIG;
for (j = 0; j < dim; j++)
for (j = 0; j < dim; j++)
if (j != j1)
if (assigncost[i][j] - v[j] < min)
if (assigncost[i][j] - v[j] < min)
min = assigncost[i][j] - v[j];
v[j1] = v[j1] - min;
}
......@@ -119,37 +120,37 @@ double lap(int dim,
/* scan all free rows. */
/* in some cases, a free row may be replaced with another one to be scanned next. */
k = 0;
prvnumfree = numfree;
k = 0;
prvnumfree = numfree;
numfree = 0;/* start list of rows still free after augmenting row reduction. */
while (k < prvnumfree)
{
i = unassigned[k];
i = unassigned[k];
k++;
/* find minimum and second minimum reduced cost over columns. */
umin = assigncost[i][0] - v[0];
j1 = 0;
umin = assigncost[i][0] - v[0];
j1 = 0;
usubmin = BIG;
for (j = 1; j < dim; j++)
for (j = 1; j < dim; j++)
{
h = assigncost[i][j] - v[j];
if (h < usubmin) {
if (h >= umin)
{
usubmin = h;
if (h >= umin)
{
usubmin = h;
j2 = j;
}
else
{
usubmin = umin;
umin = h;
j2 = j1;
else
{
usubmin = umin;
umin = h;
j2 = j1;
j1 = j;
}
}
}
i0 = colsol[j1];
if (umin < usubmin) /* - VERYSMALL) */
/* change the reduction of the minimum column to increase the minimum */
......@@ -157,22 +158,22 @@ double lap(int dim,
v[j1] = v[j1] - (usubmin - umin);
else /* minimum and subminimum equal. */
if (i0 >= 0) /* minimum column j1 is assigned. */
{
{
/* swap columns j1 and j2, as j2 may be unassigned. */
j1 = j2;
j1 = j2;
i0 = colsol[j2];
}
/* (re-)assign i to j1, possibly de-assigning an i0. */
rowsol[i] = j1;
rowsol[i] = j1;
colsol[j1] = i;
if (i0 >= 0) {/* minimum column j1 assigned earlier. */
if (umin < usubmin)/* - VERYSMALL) */
/* put in current k, and go back to that k. */
/* continue augmenting path i - j1 with i0. */
unassigned[--k] = i0;
else
unassigned[--k] = i0;
else
/* no further augmenting reduction possible. */
/* store i0 in list of free rows for next phase. */
unassigned[numfree++] = i0; }
......@@ -181,15 +182,15 @@ double lap(int dim,
while (loopcnt < 2); /* repeat once. */
/* AUGMENT SOLUTION for each free row. */
for (f = 0; f < numfree; f++)
for (f = 0; f < numfree; f++)
{
freerow = unassigned[f];/* start row of augmenting path. */
/* Dijkstra shortest path algorithm. */
/* runs until unassigned column added to shortest path tree. */
for (j = 0; j < dim; j++)
{
d[j] = assigncost[freerow][j] - v[j];
for (j = 0; j < dim; j++)
{
d[j] = assigncost[freerow][j] - v[j];
pred[j] = freerow;
collist[j] = j; /* init column list. */
}
......@@ -203,32 +204,32 @@ double lap(int dim,
{
if (up == low) /* no more columns to be scanned for current minimum. */
{
last = low - 1;
last = low - 1;
/* scan columns for up..dim-1 to find all indices for which new minimum occurs. */
/* store these indices between low..up-1 (increasing up). */
min = d[collist[up++]];
for (k = up; k < dim; k++)
min = d[collist[up++]];
for (k = up; k < dim; k++)
{
j = collist[k];
j = collist[k];
h = d[j];
if (h <= min)
{
if (h < min) /* new minimum. */
{
{
up = low; /* restart list at index low. */
min = h;
}
/* new index with same minimum, put on undex up, and extend list. */
collist[k] = collist[up];
collist[up++] = j;
collist[k] = collist[up];
collist[up++] = j;
}
}
/* check if any of the minimum columns happens to be unassigned. */
/* if so, we have an augmenting path right away. */
for (k = low; k < up; k++)
if (colsol[collist[k]] < 0)
for (k = low; k < up; k++)
if (colsol[collist[k]] < 0)
{
endofpath = collist[k];
unassignedfound = TRUE;
......@@ -236,23 +237,23 @@ double lap(int dim,
}
}
if (!unassignedfound)
if (!unassignedfound)
{
/* update 'distances' between freerow and all unscanned columns, via next scanned column. */
j1 = collist[low];
low++;
i = colsol[j1];
j1 = collist[low];
low++;
i = colsol[j1];
h = assigncost[i][j1] - v[j1] - min;
for (k = up; k < dim; k++)
for (k = up; k < dim; k++)
{
j = collist[k];
j = collist[k];
v2 = assigncost[i][j] - v[j] - h;
if (v2 < d[j])
{
pred[j] = i;
if (v2 == min) {/* new column found at same minimum value */
if (colsol[j] < 0)
if (colsol[j] < 0)
{
/* if unassigned, shortest augmenting path is complete. */
endofpath = j;
......@@ -260,32 +261,32 @@ double lap(int dim,
break;
}
/* else add to list to be scanned right away. */
else
{
collist[k] = collist[up];
collist[up++] = j;
else
{
collist[k] = collist[up];
collist[up++] = j;
}}
d[j] = v2;
}
}
}
}
}
while (!unassignedfound);
/* update column prices. */
for (k = 0; k <= last; k++)
{
j1 = collist[k];
for (k = 0; k <= last; k++)
{
j1 = collist[k];
v[j1] = v[j1] + d[j1] - min;
}
/* reset row and column assignments along the alternating path. */
do
{
i = pred[endofpath];
colsol[endofpath] = i;
j1 = endofpath;
endofpath = rowsol[i];
i = pred[endofpath];
colsol[endofpath] = i;
j1 = endofpath;
endofpath = rowsol[i];
rowsol[i] = j1;
}
while (i != freerow);
......@@ -294,11 +295,11 @@ double lap(int dim,
/* calculate optimal cost. */
lapcost = 0;
for (i = 0; i < dim; i++)
for (i = 0; i < dim; i++)
{
j = rowsol[i];
u[i] = assigncost[i][j] - v[j];
lapcost = lapcost + assigncost[i][j];
lapcost = lapcost + assigncost[i][j];
}
/* free reserved memory. */
......@@ -319,65 +320,65 @@ void checklap(int dim, double **assigncost,
double lapcost = 0, redcost = 0;
boolean *matched;
char wait;
snew(matched, dim);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
if ((redcost = assigncost[i][j] - u[i] - v[j]) < -VERYSMALL)
{
printf("\n");
printf("negative reduced cost i %d j %d redcost %f\n", i, j, redcost);
printf("\n\ndim %5d - press key\n", dim);
scanf("%c", &wait);
break;
break;
}
for (i = 0; i < dim; i++)
if (fabs(redcost = assigncost[i][rowsol[i]] - u[i] - v[rowsol[i]]) >
for (i = 0; i < dim; i++)
if (fabs(redcost = assigncost[i][rowsol[i]] - u[i] - v[rowsol[i]]) >
VERYSMALL)
{
printf("\n");
printf("non-null reduced cost i %d soli %d redcost %f\n", i, rowsol[i], redcost);
printf("\n\ndim %5d - press key\n", dim);
scanf("%c", &wait);
break;
break;
}
for (j = 0; j < dim; j++)
for (j = 0; j < dim; j++)
matched[j] = FALSE;
for (i = 0; i < dim; i++)
for (i = 0; i < dim; i++)
if (matched[rowsol[i]])
{
printf("\n");
printf("column matched more than once - i %d soli %d\n", i, rowsol[i]);
printf("\n\ndim %5d - press key\n", dim);
scanf("%c", &wait);
break;
break;
}
else
matched[rowsol[i]] = TRUE;
for (i = 0; i < dim; i++)
for (i = 0; i < dim; i++)
if (colsol[rowsol[i]] != i)
{
printf("\n");
printf("error in row solution i %d soli %d solsoli %d\n", i, rowsol[i], colsol[rowsol[i]]);
printf("\n\ndim %5d - press key\n", dim);
scanf("%c", &wait);
break;
break;
}
for (j = 0; j < dim; j++)
for (j = 0; j < dim; j++)
if (rowsol[colsol[j]] != j)
{
printf("\n");
printf("error in col solution j %d solj %d solsolj %d\n", j, colsol[j], rowsol[colsol[j]]);
printf("\n\ndim %5d - press key\n", dim);
scanf("%c", &wait);
break;
break;
}
sfree(matched);
......
No preview for this file type
No preview for this file type
......@@ -2,50 +2,64 @@
SHELL=/bin/sh
# set LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/lheinz/Install/g_permute_dev/g_permute-1.1/lib/:/cm/local/apps/gcc/6.1.0/lib64/ # the last path is needed for libstdc++
# load MPI: module load intel-mpi/64/5.1.2/150
# this is where executable and libraries will go - we recommend to
# leave PREFIX unchanged
# in any case this must match the location where liblap was installed to
PREFIX = ../../g_permute-1.1
EXEC_PREFIX = $(PREFIX)
LIBDIR = $(EXEC_PREFIX)/lib
BINDIR = $(EXEC_PREFIX)/bin
# pointer to your Gromacs installation - adjust only if you did *NOT* use the installation script instGMX331.sh supplied with the distribution
GMXDIR = /usr/local/gromacs/3.3/334-impi403-fftw331-gcc470
GMXDIR = /usr/local/gromacs/2018/2018.2-impi2017-fftw337-gcc640-cuda91/
#/usr/local/gromacs/2019/2019.3-impi2017-fftw337-gcc740-cuda10/
#/usr/local/gromacs/2018/2018.2-impi2017-fftw337-gcc640-cuda91/
#/usr/local/gromacs/5.1/514-impi2017-fftw332-gcc540-cuda80
#/usr/local/gromacs/3.3/334-impi403-fftw331-gcc470
GMXLIB = $(GMXDIR)/lib
GMXINC = $(GMXDIR)/include/gromacs
GMXLIB = $(GMXDIR)/lib64
GMXINC = $(GMXDIR)/include
# we have to link fftw, even if we don't use it
FFTWLIB = /home/ckutzne/fftw/337-gcc485-sse2-avx-avx2/lib64
# Source of the installation. No need to change this
SRCDIR = ..
CC = cc
LDFLAGS = -L/usr/lib -L/usr/local/lib -L$(GMXLIB) -L$(LIBDIR) -lm -llap -lgmx
INCFLAGS = -I$(SRCDIR)/include -I..//include -I$(GMXINC)
# CC = cc # switch to C++
CXX = mpicxx
LDFLAGS = -L/usr/lib -L/usr/lib64 -L/usr/local/lib -L$(GMXLIB) -L$(FFTWLIB) -L/cm/local/apps/gcc/6.1.0/lib64/ -lm -lgromacs -lpthread -lstdc++ -lfftw3f
#-L/cm/local/apps/gcc/6.1.0/lib64/
INCFLAGS= -I$(SRCDIR)/include -I../include -I$(GMXINC)
#for compilation on 64bit systems uncomment the following line
CFLAGS = -O0 -g -fomit-frame-pointer -finline-functions -Wall -Wno-unused -funroll-all-loops -ggdb
CFLAGS = -O3 -g -fomit-frame-pointer -finline-functions -Wall -Wno-unused -funroll-all-loops -ggdb -std=c++11 -fopenmp
#for compilation on 32bit systems uncomment the following line
#CFLAGS = -O0 -g -fomit-frame-pointer -finline-functions -Wall -Wno-unused -malign-double -funroll-all-loops -ggdb
LDSOFLAGS = -Wl
OBJS = g_permute.o
OBJS = g_permute.o ../liblap/lap.o ../liblap/memory.o
EXE = g_permute
# pattern rule to compile object files from C files
# might not work with make programs other than GNU make
%.o : %.c Makefile
$(CC) $(INCFLAGS) $(CFLAGS) -c $< -o $@
all: $(EXE)
install: $(EXE)
install: $(EXE)
mkdir -p $(BINDIR)
cp $(EXE) $(BINDIR)
../liblap/lap.o: ../liblap/lap.c Makefile
$(CXX) $(INCFLAGS) $(CFLAGS) -c $< -o $@
../liblap/memory.o: ../liblap/memory.c Makefile
$(CXX) $(INCFLAGS) $(CFLAGS) -c $< -o $@
g_permute.o: g_permute.cpp Makefile
$(CXX) $(INCFLAGS) $(CFLAGS) -c $< -o $@
all: $(EXE)
$(EXE): $(OBJS) Makefile
$(CC) -o $(EXE) $(OBJS) $(LDFLAGS) $(CFLAGS)
$(CXX) -o $(EXE) $(OBJS) $(LDFLAGS) $(CFLAGS)
.PHONY : clean
clean:
......
#Makefile for g_permute; edit the variables according to your needs
SHELL=/bin/sh
# set LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/lheinz/Install/g_permute_dev/g_permute-1.1/lib/:/cm/local/apps/gcc/6.1.0/lib64/ # the last path is needed for libstdc++
# load MPI: module load intel-mpi/64/5.1.2/150
# this is where executable and libraries will go - we recommend to
# leave PREFIX unchanged
# in any case this must match the location where liblap was installed to
PREFIX = ../../g_permute-1.1
EXEC_PREFIX = $(PREFIX)
BINDIR = $(EXEC_PREFIX)/bin
# pointer to your Gromacs installation - adjust only if you did *NOT* use the installation script instGMX331.sh supplied with the distribution
GMXDIR = /usr/local/gromacs/2018/2018.2-impi2017-fftw337-gcc640-cuda91/
#/usr/local/gromacs/2019/2019.3-impi2017-fftw337-gcc740-cuda10/
#/usr/local/gromacs/2018/2018.2-impi2017-fftw337-gcc640-cuda91/
#/usr/local/gromacs/5.1/514-impi2017-fftw332-gcc540-cuda80
#/usr/local/gromacs/3.3/334-impi403-fftw331-gcc470
GMXLIB = $(GMXDIR)/lib64
GMXINC = $(GMXDIR)/include
# we have to link fftw, even if we don't use it
FFTWLIB = /home/ckutzne/fftw/337-gcc485-sse2-avx-avx2/lib64
# Source of the installation. No need to change this
SRCDIR = ..
# CC = cc # switch to C++
CXX = mpicxx
LDFLAGS = -L/usr/lib -L/usr/lib64 -L/usr/local/lib -L$(GMXLIB) -L$(FFTWLIB) -L/cm/local/apps/gcc/6.1.0/lib64/ -lm -lgromacs -lpthread -lstdc++ -lfftw3f
#-L/cm/local/apps/gcc/6.1.0/lib64/
INCFLAGS= -I$(SRCDIR)/include -I../include -I$(GMXINC)
#for compilation on 64bit systems uncomment the following line
CFLAGS = -O3 -g -fomit-frame-pointer -finline-functions -Wall -Wno-unused -funroll-all-loops -ggdb -std=c++11 -fopenmp
#for compilation on 32bit systems uncomment the following line
#CFLAGS = -O0 -g -fomit-frame-pointer -finline-functions -Wall -Wno-unused -malign-double -funroll-all-loops -ggdb
LDSOFLAGS = -Wl
OBJS = g_permute.o ../liblap/lap.o ../liblap/memory.o
EXE = g_permute
install: $(EXE)
mkdir -p $(BINDIR)
cp $(EXE) $(BINDIR)
../liblap/lap.o: ../liblap/lap.c Makefile
$(CXX) $(INCFLAGS) $(CFLAGS) -c $< -o $@
../liblap/memory.o: ../liblap/memory.c Makefile
$(CXX) $(INCFLAGS) $(CFLAGS) -c $< -o $@
g_permute.o: g_permute_noOMP.cpp Makefile
$(CXX) $(INCFLAGS) $(CFLAGS) -c $< -o $@
all: $(EXE)
$(EXE): $(OBJS) Makefile
$(CXX) -o $(EXE) $(OBJS) $(LDFLAGS) $(CFLAGS)