Skip to content

Commit

Permalink
Develop (#66)
Browse files Browse the repository at this point in the history
- Fixed issue for new versions of Ray on Ubuntu and NVIDIA that prevented GPU indexing and multiprocessing.
- Small adjustments to peak fitting suggested by Will Lenthe.
Signed-off by: David Rowenhorst <david.rowenhorst@nrl.navy.mil>
  • Loading branch information
drowenhorst-nrl authored Aug 6, 2024
1 parent d39e7eb commit 54961b2
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 18 deletions.
11 changes: 2 additions & 9 deletions pyebsdindex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,10 @@


# Try to import only once - also will perform check that at least one GPU is found.

try:
_pyopencl_installed = False
import pyopencl
from pyebsdindex.opencl import openclparam
testcl = openclparam.OpenClParam()
try:
gpu = testcl.get_gpu()
if len(gpu) > 0:
_pyopencl_installed = True
except:
raise ImportError('pyopencl could not find GPU')
_pyopencl_installed = True
except ImportError:
_pyopencl_installed = False

Expand Down
1 change: 1 addition & 0 deletions pyebsdindex/_ebsd_index_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ def index_pats_distributed(
_node_ip_address=RAYIPADDRESS, #"0.0.0.0",
runtime_env={"env_vars":
{"PYTHONPATH": os.path.dirname(os.path.dirname(__file__)),
"CUDA_VISIBLE_DEVICES": cudagpuvis,
}},
logging_level=logging.WARNING,
) # Supress INFO messages from ray.
Expand Down
15 changes: 13 additions & 2 deletions pyebsdindex/band_detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,8 @@ def band_label_numba(nBands,nPats,nRho,nTheta,rdnConv,rdnPad,lMaxRdn):
bandData_avemax[q,i] = sumnn / nnN
# rnn = np.sum(nn * (np.float32(r) + nnr))
# cnn = np.sum(nn * (np.float32(c) + nnc))
#dx = 0.125 * (2.0 * (nn[1,2] - nn[1,0]) + (nn[0,2] - nn[0,0]) + (nn[2,2] - nn[2,0]))
#dy = 0.125 * (2.0 * (nn[2,1] - nn[0,1]) + (nn[2,0] - nn[0,0]) + (nn[2,2] - nn[0,2]))
dx = 0.5*(nn[1,2] - nn[1,0])
dy = 0.5*(nn[2,1] - nn[0,1])
dxx = nn[1,2] + nn[1,0] - 2 * nn[1,1]
Expand All @@ -655,8 +657,17 @@ def band_label_numba(nBands,nPats,nRho,nTheta,rdnConv,rdnPad,lMaxRdn):
det = 1.0/det
dc = (dyy * dx - dxy * dy) * det
rc = (dxx * dy - dxy * dx) * det
dc = max(-1.0, dc) ; rc = max(-1.0, rc)
dc = min(1.0, dc) ; rc = min(1.0, rc)
# protect against a bad dxy estimate, assume dxy == 0.0 -- per suggestion of W. Lenthe
if (np.abs(dc) > 0.875) or (np.abs(rc) > 0.875):
det = (dxx * dyy)
det = det if np.fabs(det) > 1e-12 else 1.0e-12
det = 1.0 / det
dc = (dyy * dx) * det
rc = (dxx * dy) * det
if (np.abs(dc) > 0.875) or (np.abs(rc) > 0.875):
dc = 0.0 ; rc = 0.0
# dc = max(-1.0, dc) ; rc = max(-1.0, rc)
# dc = min(1.0, dc) ; rc = min(1.0, rc)
cnn = c - dc
rnn = r - rc
bandData_aveloc[q,i,:] = np.array([rnn,cnn])
Expand Down
23 changes: 20 additions & 3 deletions pyebsdindex/nlpar.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,27 @@
from pyebsdindex import _ray_installed
from pyebsdindex import _pyopencl_installed


if _ray_installed and _pyopencl_installed:
gpuisthere = False
if _pyopencl_installed:
# check for at least one gpu
import pyopencl as cl
try:
plt = cl.get_platforms()
if len(plt) > 0:
for p in plt:
g = p.get_devices(device_type=cl.device_type.GPU)
if len(g) > 0:
gpuisthere = True
g = None
break
plt = None
except:
pass


if _ray_installed and gpuisthere:
from pyebsdindex.opencl.nlpar_clray import NLPAR
elif _pyopencl_installed and not _ray_installed:
elif gpuisthere and not _ray_installed:
from pyebsdindex.opencl.nlpar_cl import NLPAR
else:
from pyebsdindex.nlpar_cpu import NLPAR
Expand Down
45 changes: 41 additions & 4 deletions pyebsdindex/opencl/clkernels.cl
Original file line number Diff line number Diff line change
Expand Up @@ -654,11 +654,15 @@ __kernel void maxlabel( __global const uchar *maxlocin,__global const float *max
imVal2 = maxvalin[indxy];
avetempweight += imVal2;
dxy += imVal2;

//dx -= imVal2;
//dy -= imVal2;

indxy = ((y-1)*imszx + (x))*nImChunk+z;
imVal2 = maxvalin[indxy];
avetempweight += imVal2;
dy -= imVal2;
//dy -= 2.0 * imVal2;
dyy += imVal2;
imValym1 = imVal2;

Expand All @@ -668,10 +672,14 @@ __kernel void maxlabel( __global const uchar *maxlocin,__global const float *max
avetempweight += imVal2;
dxy -= imVal2;

//dx += imVal2;
//dy -= imVal2;

indxy = ((y)*imszx + (x-1))*nImChunk+z;
imVal2 = maxvalin[indxy];
avetempweight += imVal2;
dx -= imVal2;
//dx -= 2.0 * imVal2;
dxx += imVal2;

indxy = ((y)*imszx + (x))*nImChunk+z;
Expand All @@ -685,17 +693,24 @@ __kernel void maxlabel( __global const uchar *maxlocin,__global const float *max
imVal2 = maxvalin[indxy];
avetempweight += imVal2;
dx += imVal2;
//dx += 2.0 * imVal2;
dxx += imVal2;



indxy = ((y+1)*imszx + (x-1))*nImChunk+z;
imVal2 = maxvalin[indxy];
avetempweight += imVal2;
dxy -= imVal2;

//dx -= imVal2;
//dy += imVal2;

indxy = ((y+1)*imszx + (x))*nImChunk+z;
imVal2 = maxvalin[indxy];
avetempweight += imVal2;
dy += imVal2;
//dy += 2.0 * imVal2;
dyy += imVal2;
imValyp1 = imVal2;

Expand All @@ -704,17 +719,39 @@ __kernel void maxlabel( __global const uchar *maxlocin,__global const float *max
avetempweight += imVal2;
dxy += imVal2;

//dx += imVal2;
//dy += imVal2;

dxy *= 0.25;
dx *= 0.5;
dy *= 0.5;
dx *= 0.5; //0.125; //
dy *= 0.5; //0.125; //

det = (dxx*dyy - dxy*dxy);
det = (fabs(det) > 1.0e-12) ? det : 1.0e-12;
det = 1.0 / det;

ix = (float) x - (dyy * dx - dxy * dy) * det;
iy = (float) y - (dxx * dy - dxy * dx) * det;
//ix = (float) x - (dyy * dx - dxy * dy) * det;
//iy = (float) y - (dxx * dy - dxy * dx) * det;

ix = (dyy * dx - dxy * dy) * det;
iy = (dxx * dy - dxy * dx) * det;
// protect against bad estimates in dxy, assume == 0.0 -- per suggestion of W. Lenthe
if ( (fabs(ix) > 0.875) || (fabs(iy) > 0.875) ){
det = (dxx*dyy);
det = (fabs(det) > 1.0e-12) ? det : 1.0e-12;
det = 1.0 / det;
ix = (dyy * dx) * det;
iy = (dxx * dy) * det;
if ( (fabs(ix) > 0.875) || (fabs(iy) > 0.875) ){
ix = 0.0;
iy = 0.0;
}

}

ix = (float) x - ix;
iy = (float) y - iy;

aveloc[z*lnmax + i] = (float2) (iy, ix);
aveval[z*lnmax + i] = avetempweight/9.0;
// band width metric
Expand Down

0 comments on commit 54961b2

Please sign in to comment.